Exemple #1
0
def _file_type_to_output_file_type(file_type, index):
    fid = "_".join([file_type.file_type_id, str(index)])
    return OutputFileType(file_type.file_type_id,
                          "Label " + fid,
                          repr(file_type),
                          "description for {f}".format(f=file_type),
                          file_type.default_name)
def _to_output(i, file_type):
    default_name = "_".join(
        [file_type.file_type_id, file_type.base_name + "_" + str(i)])
    label = "label_" + file_type.file_type_id
    desc = "File {f}".format(f=file_type)
    return OutputFileType(file_type.file_type_id, label, repr(file_type), desc,
                          default_name)
Exemple #3
0
def FT(file_type, basename, title):
    # (file_type_id, label, display_name, description, default_name)
    return OutputFileType(file_type.file_type_id,
                          basename + '_id',
                          title,
                          "description for {f}".format(f=file_type),
                          basename)
Exemple #4
0
log = logging.getLogger(__name__)

TOOL_NAMESPACE = 'pbcoretools'
DRIVER_BASE = "python -m pbcoretools.tasks.filters "

registry = registry_builder(TOOL_NAMESPACE, DRIVER_BASE)

rl_opt = QuickOpt(0, "Minimum subread length", "Minimum length of subreads")

filters_opt = QuickOpt(
    "", "Filters to add to the DataSet",
    "A comma separated list of other filters to add to the DataSet")

subreads_file_type = OutputFileType(FileTypes.DS_SUBREADS.file_type_id,
                                    "SubreadSet", "Filtered SubreadSet XML",
                                    "Filtered SubreadSet XML", "filtered")


def sanitize_read_length(read_length):
    if read_length:
        if not re.search('^-?\d*(\.\d*)?$', str(read_length).strip()):
            raise ValueError('read_length filter value "{v}" is not a '
                             'number'.format(v=read_length))
        try:
            return int(read_length)
        except ValueError:
            return int(float(read_length))


def run_filter_dataset(in_file, out_file, read_length, other_filters):
Exemple #5
0
FT_DUMMY = FileTypes.TXT
FT_SUBREADS = FileTypes.DS_SUBREADS
FT_CONTIGS = FileTypes.DS_CONTIG
FT_FASTA = FileTypes.FASTA
FT_REPORT = FileTypes.REPORT
FT_LOG = FileTypes.LOG


def FT(file_type, basename, title):
    # (file_type_id, label, display_name, description, default_name)
    return OutputFileType(file_type.file_type_id, basename + '_id', title,
                          "description for {f}".format(f=file_type), basename)


FT_DB = FT(FT_DUMMY, 'dazzler.db', "DAZZ_DB (implies dot-files too)")
FT_JSON_OUT = OutputFileType(FileTypes.JSON.file_type_id, "json_id", "JSON",
                             "Generic JSON file", "file")
FT_FASTA_OUT = OutputFileType(FileTypes.FASTA.file_type_id, "fasta_id",
                              "FASTA", "FASTA sequences", "reads")
FT_CONTIGS_OUT = OutputFileType(FileTypes.DS_CONTIG.file_type_id, "contig_id",
                                "contigset",
                                "Contigset of polished FASTA sequences",
                                "polished.contigset")

FT_FOFN_OUT = OutputFileType(
    FileTypes.FOFN.file_type_id, "fofn_id",
    "FOFN of daligner input (.fasta paths, possibly relative)",
    "file of file names of fasta input", "file")


@registry('task_falcon_config_get_fasta',
          '0.0.0', [FT_CFG], [FT_FOFN_OUT],
Exemple #6
0
def _file_type_to_output_file_type(file_type):
    return OutputFileType(file_type.file_type_id,
                          "Label " + file_type.file_type_id, repr(file_type),
                          "description for {f}".format(f=file_type),
                          file_type.default_name)
Exemple #7
0
    ref_file = op.join(output_dir_name, reference_name, "referenceset.xml")
    assert op.isfile(ref_file)
    with ReferenceSet(ref_file, strict=True) as ds_ref:
        ds_ref.makePathsAbsolute()
        log.info("saving final ReferenceSet to {f}".format(f=output_file_name))
        ds_ref.write(output_file_name)
    return 0


run_bam_to_fasta = functools.partial(_run_bam_to_fastx, "bam2fasta",
                                     FastaReader, FastaWriter)
run_bam_to_fastq = functools.partial(_run_bam_to_fastx, "bam2fastq",
                                     FastqReader, FastqWriter)

subreads_from_h5_file_type = OutputFileType(FileTypes.DS_SUBREADS.file_type_id,
                                            "SubreadSet", "SubreadSet",
                                            "Imported SubreadSet", "subreads")
subreads_barcoded_file_type = OutputFileType(
    FileTypes.DS_SUBREADS.file_type_id, "SubreadSet", "Barcoded SubreadSet",
    "Barcoded SubreadSet", "subreads_barcoded")


@registry("h5_subreads_to_subread",
          "0.1.0",
          FileTypes.DS_SUBREADS_H5,
          subreads_from_h5_file_type,
          is_distributed=True,
          nproc=1)
def run_bax2bam(rtc):
    return run_bax_to_bam(rtc.task.input_files[0], rtc.task.output_files[0])
Exemple #8
0

run_fasta_to_reference = functools.partial(__run_fasta_to_reference,
    "fasta-to-reference", ReferenceSet)
run_fasta_to_gmap_reference = functools.partial(__run_fasta_to_reference,
    "fasta-to-gmap-reference", GmapReferenceSet)


run_bam_to_fasta = functools.partial(_run_bam_to_fastx, "bam2fasta",
    FastaReader, FastaWriter)
run_bam_to_fastq = functools.partial(_run_bam_to_fastx, "bam2fastq",
    FastqReader, FastqWriter)


subreads_from_h5_file_type = OutputFileType(FileTypes.DS_SUBREADS.file_type_id,
                                            "Subreads", "Subread data in XML dataset",
                                            "Imported SubreadSet", "subreads")
subreads_barcoded_file_type = OutputFileType(FileTypes.DS_SUBREADS.file_type_id,
                                             "SubreadSet",
                                             "Barcoded Subreads",
                                             "Barcoded Subreads DataSet XML",
                                             "subreads_barcoded")

@registry("h5_subreads_to_subread", "0.1.0",
          FileTypes.DS_SUBREADS_H5,
          subreads_from_h5_file_type, is_distributed=True, nproc=1)
def run_bax2bam(rtc):
    return run_bax_to_bam(rtc.task.input_files[0], rtc.task.output_files[0])


@registry("bam2bam_barcode", "0.1.0",