def _file_type_to_output_file_type(file_type, index): fid = "_".join([file_type.file_type_id, str(index)]) return OutputFileType(file_type.file_type_id, "Label " + fid, repr(file_type), "description for {f}".format(f=file_type), file_type.default_name)
def _to_output(i, file_type): default_name = "_".join( [file_type.file_type_id, file_type.base_name + "_" + str(i)]) label = "label_" + file_type.file_type_id desc = "File {f}".format(f=file_type) return OutputFileType(file_type.file_type_id, label, repr(file_type), desc, default_name)
def FT(file_type, basename, title): # (file_type_id, label, display_name, description, default_name) return OutputFileType(file_type.file_type_id, basename + '_id', title, "description for {f}".format(f=file_type), basename)
log = logging.getLogger(__name__) TOOL_NAMESPACE = 'pbcoretools' DRIVER_BASE = "python -m pbcoretools.tasks.filters " registry = registry_builder(TOOL_NAMESPACE, DRIVER_BASE) rl_opt = QuickOpt(0, "Minimum subread length", "Minimum length of subreads") filters_opt = QuickOpt( "", "Filters to add to the DataSet", "A comma separated list of other filters to add to the DataSet") subreads_file_type = OutputFileType(FileTypes.DS_SUBREADS.file_type_id, "SubreadSet", "Filtered SubreadSet XML", "Filtered SubreadSet XML", "filtered") def sanitize_read_length(read_length): if read_length: if not re.search('^-?\d*(\.\d*)?$', str(read_length).strip()): raise ValueError('read_length filter value "{v}" is not a ' 'number'.format(v=read_length)) try: return int(read_length) except ValueError: return int(float(read_length)) def run_filter_dataset(in_file, out_file, read_length, other_filters):
FT_DUMMY = FileTypes.TXT FT_SUBREADS = FileTypes.DS_SUBREADS FT_CONTIGS = FileTypes.DS_CONTIG FT_FASTA = FileTypes.FASTA FT_REPORT = FileTypes.REPORT FT_LOG = FileTypes.LOG def FT(file_type, basename, title): # (file_type_id, label, display_name, description, default_name) return OutputFileType(file_type.file_type_id, basename + '_id', title, "description for {f}".format(f=file_type), basename) FT_DB = FT(FT_DUMMY, 'dazzler.db', "DAZZ_DB (implies dot-files too)") FT_JSON_OUT = OutputFileType(FileTypes.JSON.file_type_id, "json_id", "JSON", "Generic JSON file", "file") FT_FASTA_OUT = OutputFileType(FileTypes.FASTA.file_type_id, "fasta_id", "FASTA", "FASTA sequences", "reads") FT_CONTIGS_OUT = OutputFileType(FileTypes.DS_CONTIG.file_type_id, "contig_id", "contigset", "Contigset of polished FASTA sequences", "polished.contigset") FT_FOFN_OUT = OutputFileType( FileTypes.FOFN.file_type_id, "fofn_id", "FOFN of daligner input (.fasta paths, possibly relative)", "file of file names of fasta input", "file") @registry('task_falcon_config_get_fasta', '0.0.0', [FT_CFG], [FT_FOFN_OUT],
def _file_type_to_output_file_type(file_type): return OutputFileType(file_type.file_type_id, "Label " + file_type.file_type_id, repr(file_type), "description for {f}".format(f=file_type), file_type.default_name)
ref_file = op.join(output_dir_name, reference_name, "referenceset.xml") assert op.isfile(ref_file) with ReferenceSet(ref_file, strict=True) as ds_ref: ds_ref.makePathsAbsolute() log.info("saving final ReferenceSet to {f}".format(f=output_file_name)) ds_ref.write(output_file_name) return 0 run_bam_to_fasta = functools.partial(_run_bam_to_fastx, "bam2fasta", FastaReader, FastaWriter) run_bam_to_fastq = functools.partial(_run_bam_to_fastx, "bam2fastq", FastqReader, FastqWriter) subreads_from_h5_file_type = OutputFileType(FileTypes.DS_SUBREADS.file_type_id, "SubreadSet", "SubreadSet", "Imported SubreadSet", "subreads") subreads_barcoded_file_type = OutputFileType( FileTypes.DS_SUBREADS.file_type_id, "SubreadSet", "Barcoded SubreadSet", "Barcoded SubreadSet", "subreads_barcoded") @registry("h5_subreads_to_subread", "0.1.0", FileTypes.DS_SUBREADS_H5, subreads_from_h5_file_type, is_distributed=True, nproc=1) def run_bax2bam(rtc): return run_bax_to_bam(rtc.task.input_files[0], rtc.task.output_files[0])
run_fasta_to_reference = functools.partial(__run_fasta_to_reference, "fasta-to-reference", ReferenceSet) run_fasta_to_gmap_reference = functools.partial(__run_fasta_to_reference, "fasta-to-gmap-reference", GmapReferenceSet) run_bam_to_fasta = functools.partial(_run_bam_to_fastx, "bam2fasta", FastaReader, FastaWriter) run_bam_to_fastq = functools.partial(_run_bam_to_fastx, "bam2fastq", FastqReader, FastqWriter) subreads_from_h5_file_type = OutputFileType(FileTypes.DS_SUBREADS.file_type_id, "Subreads", "Subread data in XML dataset", "Imported SubreadSet", "subreads") subreads_barcoded_file_type = OutputFileType(FileTypes.DS_SUBREADS.file_type_id, "SubreadSet", "Barcoded Subreads", "Barcoded Subreads DataSet XML", "subreads_barcoded") @registry("h5_subreads_to_subread", "0.1.0", FileTypes.DS_SUBREADS_H5, subreads_from_h5_file_type, is_distributed=True, nproc=1) def run_bax2bam(rtc): return run_bax_to_bam(rtc.task.input_files[0], rtc.task.output_files[0]) @registry("bam2bam_barcode", "0.1.0",