def get_contract_parser(): p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION, "Scatter ContigSet", __doc__, Constants.DRIVER_EXE, chunk_keys=Constants.CHUNK_KEYS, is_distributed=True) p.add_input_file_type(FileTypes.DS_SUBREADS, "subreads_in", "SubreadSet In", "PacBio ContigSet") p.add_input_file_type(FileTypes.DS_CONTIG, "fasta_in", "ContigSet", "PacBio ContigSet") p.add_input_file_type(FileTypes.PICKLE, "pickle_in", "Pickle", "Cluster pickle file") p.add_input_file_type(FileTypes.PICKLE, "nfl_pickle_in", "Pickle", "Non-full-length pickle file") p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk JSON Filtered Fasta", "Chunked JSON ContigSet", "pickles.chunked") # max nchunks for this specific task p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks", default=Constants.DEFAULT_NCHUNKS, name="Max NChunks", description="Maximum number of Chunks") return p
def get_contract_parser(): driver = "python -m pbcoretools.tasks.scatter_filter_fasta --resolved-tool-contract " chunk_keys = ("$chunk.fasta_id", ) p = get_scatter_pbparser(TOOL_ID, "0.1.3", "Scatter Filter Fasta", "Scatter Filter Fasta", driver, chunk_keys, is_distributed=False) p.add_input_file_type(FileTypes.FASTA, "fasta_in", "Fasta In", "Pac Bio Fasta format") p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk JSON Filtered Fasta", "Chunked JSON Filtered Fasta", "fasta.chunked") # max nchunks for this specific task p.add_int("pbcoretools.task_options.dev_scatter_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") p.add_str("pbcoretools.task_options.dev_scatter_chunk_key", "chunk_key", Constants.CHUNK_KEY, "Chunk key", "Chunk key to use (format $chunk:{chunk-key}") return p
def get_contract_parser(): """ input idx 0: polish_chunk_pickle_id input idx 1: sentinel.txt input idx 2: *.subreadset.xml output idx 0: chunk json """ p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION, "Scatter Ice Polish Chunks", __doc__, Constants.DRIVER_EXE, chunk_keys=Constants.CHUNK_KEYS, is_distributed=True) p.add_input_file_type(FileTypes.PICKLE, "polish_chunk_pickle", "PICKLE", "Polish Chunk Tasks Pickle") # input idx 0 p.add_input_file_type(FileTypes.TXT, "sentinel_in", "Sentinel In", "Setinel file") # input idx 1 p.add_input_file_type(FileTypes.DS_SUBREADS, "subreads_in", "SubreadSet In", "PacBio SubreadSet") # input idx 2 p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk JSON Polish Tasks", "Chunked JSON Polish Tasks", "ice_polish.chunked") # max nchunks for this specific task p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") return p
def get_contract_parser(tool_id=TOOL_ID, module_name=MODULE_NAME): p = get_scatter_pbparser( tool_id, "0.1.3", "Scatter AlignmentSet", "Pacbio DataSet AlignmentSet", Constants.DRIVER_BASE.format(module=module_name), Constants.CHUNK_KEYS, is_distributed=False, ) p.add_input_file_type(FileTypes.DS_ALIGN, "alignment_ds", "AlignmentSet", "Pacbio DataSet AlignmentSet") p.add_input_file_type(FileTypes.DS_REF, "ds_reference", "ReferenceSet", "Pac Bio Fasta format") p.add_output_file_type( FileTypes.CHUNK, "cjson_out", "Chunk JSON Filtered Fasta", "Chunked JSON Filtered Fasta", "alignments_reference.chunked.json", ) # max nchunks for this specific task p.add_int( Constants.OPT_MAX_NCHUNKS, "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks" ) return p
def get_contract_parser(): driver = "python -m pbsmrtpipe.tools_dev.scatter_ccs_reference --resolved-tool-contract " p = get_scatter_pbparser(TOOL_ID, "0.1.3", "ConsensusReadSet scatter", "Scatter ConsensusRead DataSet", driver, Constants.CHUNK_KEYS, is_distributed=False) p.add_input_file_type(FileTypes.DS_CCS, "ccsset", "ConsensusReadSet", "Pac Bio Fasta format") p.add_input_file_type(FileTypes.DS_REF, "ds_reference", "ReferenceSet", "Pac Bio Fasta format") p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk ConsensusReadSet", "PacBio Chunked JSON ConsensusReadSet", "ccsset_chunked.json") # max nchunks for this specific task # FIXME using same option names as scatter_subread_reference.py - it would # be nice if these were more generic p.add_int("pbsmrtpipe.task_options.scatter_subread_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") p.add_str("pbsmrtpipe.task_options.scatter_subreadset_chunk_key", "chunk_key", "$chunk:fasta_id", "Chunk key", "Chunk key to use (format $chunk:{chunk-key}") return p
def get_contract_parser(): """ input: idx 0: json_id idx 1: txt_id output: idx 0: chunk json """ p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION, "Scatter json with Scripts into Chunks", __doc__, Constants.DRIVER_EXE, chunk_keys=Constants.CHUNK_KEYS, is_distributed=True) p.add_input_file_type(FileTypes.JSON, "json_with_scripts", "JSON", "Json containing scripts") # input idx 0 p.add_input_file_type(FileTypes.TXT, "sentinel_txt", "TXT", "Sentinel txt") # input idx 1 p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk JSON", "Chunked JSON", "json_with_scripts.chunked") # max nchunks for this specific task p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") return p
def get_contract_parser_impl(C): p = get_scatter_pbparser(C.TOOL_ID, "0.1.3", "%sSet ZMW scatter" % C.READ_TYPE, "Scatter %s DataSet for barcoding" % C.READ_TYPE, C.DRIVER_EXE, C.CHUNK_KEYS, is_distributed=False) p.add_input_file_type(C.DATASET_TYPE, "dataset", "%sSet" % C.READ_TYPE, "Pac Bio Fasta format") p.add_input_file_type(FileTypes.DS_BARCODE, "barcodes", "BarcodeSet", "Pac Bio Barcode Dataset XML") p.add_output_file_type(FileTypes.CHUNK, "chunk_report_json", "Chunk %sSet" % C.READ_TYPE, "PacBio Chunked JSON %sSet" % C.READ_TYPE, "%sset_chunked" % C.READ_TYPE_ABBREV) # max nchunks for this specific task p.add_int("pbcoretools.task_options.scatter_subread_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") p.add_str("pbcoretools.task_options.scatter_subreadset_chunk_key", "chunk_key", "$chunk:subreadset_id", "Chunk key", "Chunk key to use (format $chunk:{chunk-key}") return p
def get_contract_parser(): """ input: idx 0: partial_chunk_pickle_id idx 1: sentinel txt file idx 2: ccs_id output: idx 0: chunk json """ p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION, "Scatter Ice Partial Chunks", __doc__, Constants.DRIVER_EXE, chunk_keys=Constants.CHUNK_KEYS, is_distributed=True) p.add_input_file_type(FileTypes.PICKLE, "partial_chunk_pickle", "PICKLE", "Partial Chunk Tasks Pickle") # input idx 0 p.add_input_file_type(FileTypes.TXT, "partial_sentinel_in", "Sentinel In", "Setinel file") # input idx 1 p.add_input_file_type(FileTypes.DS_CCS, "ccs_in", "ConsensusReadSet In", "PacBio ConsensusReadSet") # input idx 2 p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk JSON Partial Tasks", "Chunked JSON Partial Tasks", "ice_partial.chunked") # max nchunks for this specific task p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") return p
def get_contract_parser(): """Get scatter tool contract parser Input: idx 0 - cluster_chunks.pickle idx 1 - ccs Output: idx 0 - chunk.json """ p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION, "Scatter Cluster Bins", __doc__, Constants.DRIVER_EXE, chunk_keys=Constants.CHUNK_KEYS, is_distributed=True) p.add_input_file_type(FileTypes.PICKLE, "cluster_chunks_pickle", "Pickle In", "Cluster chunks pickle file") # input 0 p.add_input_file_type(FileTypes.DS_CCS, "ccs_in", "ConsensusReadSet In", "PacBio ConsensusReadSet") # input 1 p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk JSON Cluster Bins", "Chunked JSON Cluster Bins", "ice_cluster.chunked") # output 0 # max nchunks for this specific task p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") return p
def get_contract_parser(): driver = "python -m pbcoretools.tasks.scatter_filter_fasta --resolved-tool-contract " chunk_keys = ("$chunk.fasta_id",) p = get_scatter_pbparser( TOOL_ID, "0.1.3", "Scatter Filter Fasta", "Scatter Filter Fasta", driver, chunk_keys, is_distributed=False ) p.add_input_file_type(FileTypes.FASTA, "fasta_in", "Fasta In", "Pac Bio Fasta format") p.add_output_file_type( FileTypes.CHUNK, "cjson_out", "Chunk JSON Filtered Fasta", "Chunked JSON Filtered Fasta", "fasta.chunked" ) # max nchunks for this specific task p.add_int( "pbcoretools.task_options.dev_scatter_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks", ) p.add_str( "pbcoretools.task_options.dev_scatter_chunk_key", "chunk_key", Constants.CHUNK_KEY, "Chunk key", "Chunk key to use (format $chunk:{chunk-key}", ) return p
def get_contract_parser(): """ input: idx 0: fastq_id idx 1: gmap_ref_id output: idx 0: chunk json """ p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION, "Scatter Map Isoforms Chunks", __doc__, Constants.DRIVER_EXE, chunk_keys=Constants.CHUNK_KEYS, is_distributed=True) p.add_input_file_type(FileTypes.FASTQ, "fastq_in", "FASTQ In", "HQ isoforms FASTQ file") # input idx 0 p.add_input_file_type(FileTypes.DS_GMAP_REF, "gmap_referenceset", "GmapReferenceSet In", "Gmap reference set file") # input 1 p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk JSON Map Isoforms Tasks", "Chunked JSON Map Isoforms Tasks", "map_isoforms_to_genome.chunked") # max nchunks for this specific task p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") return p
def get_contract_parser(): driver = "python -m pbcoretools.tasks.scatter_ccs_reference --resolved-tool-contract " p = get_scatter_pbparser(TOOL_ID, "0.1.3", "ConsensusReadSet scatter", "Scatter ConsensusRead DataSet", driver, Constants.CHUNK_KEYS, is_distributed=True) p.add_input_file_type(FileTypes.DS_CCS, "ccsset", "ConsensusReadSet", "Pac Bio Fasta format") p.add_input_file_type(FileTypes.DS_REF, "ds_reference", "ReferenceSet", "Pac Bio Fasta format") p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk ConsensusReadSet", "PacBio Chunked JSON ConsensusReadSet", "ccsset_chunked") # max nchunks for this specific task # FIXME using same option names as scatter_subread_reference.py - it would # be nice if these were more generic p.add_int("pbcoretools.task_options.scatter_subread_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") p.add_str("pbcoretools.task_options.scatter_subreadset_chunk_key", "chunk_key", "$chunk:fasta_id", "Chunk key", "Chunk key to use (format $chunk:{chunk-key}") return p
def get_contract_parser(): driver = "python -m pbcoretools.tasks.scatter_subread_reference --resolved-tool-contract " # These Keys are expected to be PipelineChunks produced by this tool chunk_keys = ("$chunk.reference_id", "$chunk.subreadset_id") p = get_scatter_pbparser(TOOL_ID, "0.1.3", "SubreadSet scatter", "Scatter Subread DataSet", driver, chunk_keys, is_distributed=True) p.add_input_file_type(FileTypes.DS_SUBREADS, "subreads", "SubreadSet", "Pac Bio Fasta format") p.add_input_file_type(FileTypes.DS_REF, "ds_reference", "ReferenceSet", "Pac Bio Fasta format") p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk SubreadSet", "PacBio Chunked JSON SubreadSet", "subreadset_chunked") # max nchunks for this specific task p.add_int("pbcoretools.task_options.scatter_subread_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") # This should only be added at the argparse level. # Disabling for now. # FIXME. This should support --reference-chunk-key and --subread-key # p.arg_parser.add_str("pbcoretools.task_options.scatter_subreadset_chunk_key", # "chunk_key", # "$chunk:fasta_id", "Chunk key", "Chunk key to use (format $chunk:{chunk-key}") return p
def get_contract_parser_impl(C): p = get_scatter_pbparser(C.TOOL_ID, "0.1.3", "%sSet ZMW scatter" % C.READ_TYPE, "Scatter %s DataSet for barcoding" % C.READ_TYPE, C.DRIVER_EXE, C.CHUNK_KEYS, is_distributed=True) p.add_input_file_type(C.DATASET_TYPE, "dataset", "%sSet" % C.READ_TYPE, "Pac Bio Fasta format") p.add_input_file_type(FileTypes.DS_BARCODE, "barcodes", "BarcodeSet", "Pac Bio Barcode Dataset XML") p.add_output_file_type(FileTypes.CHUNK, "chunk_report_json", "Chunk %sSet" % C.READ_TYPE, "PacBio Chunked JSON %sSet" % C.READ_TYPE, "%sset_chunked" % C.READ_TYPE_ABBREV) # max nchunks for this specific task p.add_int("pbcoretools.task_options.scatter_subread_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") p.add_str("pbcoretools.task_options.scatter_subreadset_chunk_key", "chunk_key", "$chunk:subreadset_id", "Chunk key", "Chunk key to use (format $chunk:{chunk-key}") return p
def get_contract_parser(): p = get_scatter_pbparser(TOOL_ID, "0.1.3", "H5 SubreadSet scatter", "Scatter Hdf5 Subread DataSet", Constants.DRIVER, Constants.CHUNK_KEYS, is_distributed=True, nchunks=SymbolTypes.MAX_NCHUNKS) p.add_input_file_type(FileTypes.DS_SUBREADS_H5, "h5_subreads", "HdfSubreadSet", "Pac Bio Fasta format") p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk HdfSubreadSet", "PacBio Chunked JSON HdfSubread Set", "hdfsubreadset_chunked") # max nchunks for this specific task p.add_int("pbcoretools.task_options.scatter_hdfsubread_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") p.add_str("pbcoretools.task_options.dev_scatter_chunk_key", "chunk_key", "$chunk:fasta_id", "Chunk key", "Chunk key to use (format $chunk:{chunk-key}") return p
def get_contract_parser(): driver = "python -m pbsmrtpipe.tools_dev.scatter_subread_zmws --resolved-tool-contract " chunk_keys = ("$chunk.subreadset_id", ) p = get_scatter_pbparser(TOOL_ID, "0.1.3", "SubreadSet ZMW scatter", "Scatter Subread DataSet by ZMWs", driver, chunk_keys, is_distributed=False) p.add_input_file_type(FileTypes.DS_SUBREADS, "subreadset", "SubreadSet", "Pac Bio Fasta format") p.add_output_file_type(FileTypes.CHUNK, "chunk_report_json", "Chunk SubreadSet", "PacBio Chunked JSON SubreadSet", "subreadset_chunked.json") # max nchunks for this specific task p.add_int("pbsmrtpipe.task_options.scatter_subread_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") p.add_str("pbsmrtpipe.task_options.scatter_subreadset_chunk_key", "chunk_key", "$chunk:subreadset_id", "Chunk key", "Chunk key to use (format $chunk:{chunk-key}") return p
def get_contract_parser(tool_id=TOOL_ID, module_name=MODULE_NAME): p = get_scatter_pbparser(tool_id, "0.1.3", "Scatter AlignmentSet", "Pacbio DataSet AlignmentSet", Constants.DRIVER_BASE.format(module=module_name), Constants.CHUNK_KEYS, is_distributed=True) p.add_input_file_type(FileTypes.DS_ALIGN, "alignment_ds", "AlignmentSet", "Pacbio DataSet AlignmentSet") p.add_input_file_type(FileTypes.DS_REF, "ds_reference", "ReferenceSet", "Pac Bio Fasta format") p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk JSON Filtered Fasta", "Chunked JSON Filtered Fasta", "alignments_reference.chunked.json") # max nchunks for this specific task p.add_int(Constants.OPT_MAX_NCHUNKS, "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") return p
def get_parser(): driver = "python -m pbcommand.cli.examples.dev_scatter_fasta_app --resolved-tool-contract " desc = "Scatter a single fasta file to create chunk.json file" # chunk keys that **will** be written to the file chunk_keys = (Constants.FA_CHUNK_KEY, ) p = get_scatter_pbparser(TOOL_ID, __version__, "Fasta Scatter", desc, driver, chunk_keys, is_distributed=False) p.add_input_file_type(FileTypes.FASTA, "fasta_in", "Fasta In", "Fasta file to scatter") p.add_output_file_type(FileTypes.CHUNK, "cjson", "Chunk JSON", "Scattered/Chunked Fasta Chunk.json", "fasta.chunks") p.add_int("pbcommand.task_options.dev_scatter_fa_nchunks", "nchunks", 10, "Number of chunks", "Suggested number of chunks. May be overridden by $max_nchunks") return p
def get_contract_parser(): driver = "python -m pbfalcon.tasks.scatter0_run_daligner_jobs --resolved-tool-contract " p = get_scatter_pbparser(TOOL_ID, "0.1.3", "Scatter Daligner", "Scatter Daligner Jobs", driver, Constants.CHUNK_KEYS, is_distributed=False) p.add_input_file_type(FileTypes.JSON, "config", "Config", "Pac Bio ???") p.add_input_file_type(FileTypes.TXT, "bash", "Bash", "Pac Bio ???") p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk of .fasta for daligner, stage-0", "Chunked JSON Filtered Fasta", "fasta.chunked") return p
def get_contract_parser(): driver = "python -m pbfalcon.tasks.scatter1_run_daligner_jobs --resolved-tool-contract " p = get_scatter_pbparser(TOOL_ID, "0.1.3", "Scatter Daligner", "Scatter Daligner Jobs", driver, Constants.CHUNK_KEYS, is_distributed=False) p.add_input_file_type(FileTypes.JSON, "config", "Config", "Pac Bio ???") p.add_input_file_type(FileTypes.TXT, "bash", "Bash", "Pac Bio ???") p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk of .fasta for daligner, stage-1", "Chunked JSON Filtered Fasta", "fasta.chunked") return p
def get_contract_parser(): driver = "python -m pbsmrtpipe.tools_dev.scatter_contigset --resolved-tool-contract " chunk_keys = ("$chunk.contigset_id", ) p = get_scatter_pbparser(TOOL_ID, "0.1.3", "Scatter ContigSet", "Scatter ContigSet", driver, chunk_keys, is_distributed=False) p.add_input_file_type(FileTypes.DS_CONTIG, "dataset_in", "ContigSet In", "PacBio ContigSet") p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk JSON Filtered Fasta", "Chunked JSON ContigSet", "fasta.chunked.json") # max nchunks for this specific task p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") p.add_str("pbsmrtpipe.task_options.dev_scatter_chunk_key", "chunk_key", Constants.CHUNK_KEY, "Chunk key", "Chunk key to use (format $chunk:{chunk-key}") return p
def get_contract_parser(): p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION, "Scatter ContigSet for GMAP", __doc__, Constants.DRIVER_EXE, chunk_keys=Constants.CHUNK_KEYS, is_distributed=True) p.add_input_file_type(FileTypes.DS_CONTIG, "dataset_in", "ContigSet In", "PacBio ContigSet") p.add_input_file_type(FileTypes.DS_REF, "ref_in", "ReferenceSet", "PacBio ReferenceSet") p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk JSON Filtered Fasta", "Chunked JSON ContigSet", "fasta.chunked") # max nchunks for this specific task p.add_int("pbcoretools.task_options.dev_scatter_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") return p
def get_contract_parser(): p = get_scatter_pbparser( TOOL_ID, "0.1.3", "H5 SubreadSet scatter", "Scatter Hdf5 Subread DataSet", Constants.DRIVER, Constants.CHUNK_KEYS, is_distributed=False, nchunks=SymbolTypes.MAX_NCHUNKS, ) p.add_input_file_type(FileTypes.DS_SUBREADS_H5, "h5_subreads", "HdfSubreadSet", "Pac Bio Fasta format") p.add_output_file_type( FileTypes.CHUNK, "cjson_out", "Chunk HdfSubreadSet", "PacBio Chunked JSON HdfSubread Set", "hdfsubreadset_chunked.json", ) # max nchunks for this specific task p.add_int( "pbsmrtpipe.task_options.scatter_hdfsubread_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks", ) p.add_str( "pbsmrtpipe.task_options.dev_scatter_chunk_key", "chunk_key", "$chunk:fasta_id", "Chunk key", "Chunk key to use (format $chunk:{chunk-key}", ) return p
def get_contract_parser(): driver = "python -m pbsmrtpipe.tools_dev.scatter_subread_reference --resolved-tool-contract " # These Keys are expected to be PipelineChunks produced by this tool chunk_keys = ("$chunk.reference_id", "$chunk.subreadset_id") p = get_scatter_pbparser(TOOL_ID, "0.1.3", "SubreadSet scatter", "Scatter Subread DataSet", driver, chunk_keys, is_distributed=True) p.add_input_file_type(FileTypes.DS_SUBREADS, "subreads", "SubreadSet", "Pac Bio Fasta format") p.add_input_file_type(FileTypes.DS_REF, "ds_reference", "ReferenceSet", "Pac Bio Fasta format") p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk SubreadSet", "PacBio Chunked JSON SubreadSet", "subreadset_chunked.json") # max nchunks for this specific task p.add_int("pbsmrtpipe.task_options.scatter_subread_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks") # This should only be added at the argparse level. # Disabling for now. # FIXME. This should support --reference-chunk-key and --subread-key # p.arg_parser.add_str("pbsmrtpipe.task_options.scatter_subreadset_chunk_key", # "chunk_key", # "$chunk:fasta_id", "Chunk key", "Chunk key to use (format $chunk:{chunk-key}") return p