Example #1
0
def get_contract_parser():

    p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION,
                             "Scatter ContigSet",
                             __doc__, Constants.DRIVER_EXE,
                             chunk_keys=Constants.CHUNK_KEYS,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.DS_SUBREADS, "subreads_in", "SubreadSet In",
                          "PacBio ContigSet")
    p.add_input_file_type(FileTypes.DS_CONTIG, "fasta_in", "ContigSet",
                          "PacBio ContigSet")
    p.add_input_file_type(FileTypes.PICKLE, "pickle_in", "Pickle",
                          "Cluster pickle file")
    p.add_input_file_type(FileTypes.PICKLE, "nfl_pickle_in", "Pickle",
                          "Non-full-length pickle file")
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON Filtered Fasta",
                           "Chunked JSON ContigSet",
                           "pickles.chunked")
    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks",
              default=Constants.DEFAULT_NCHUNKS,
              name="Max NChunks",
              description="Maximum number of Chunks")
    return p
def get_contract_parser():
    driver = "python -m pbcoretools.tasks.scatter_filter_fasta --resolved-tool-contract "

    chunk_keys = ("$chunk.fasta_id", )
    p = get_scatter_pbparser(TOOL_ID,
                             "0.1.3",
                             "Scatter Filter Fasta",
                             "Scatter Filter Fasta",
                             driver,
                             chunk_keys,
                             is_distributed=False)

    p.add_input_file_type(FileTypes.FASTA, "fasta_in", "Fasta In",
                          "Pac Bio Fasta format")
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON Filtered Fasta",
                           "Chunked JSON Filtered Fasta", "fasta.chunked")
    # max nchunks for this specific task
    p.add_int("pbcoretools.task_options.dev_scatter_max_nchunks",
              "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks",
              "Maximum number of Chunks")
    p.add_str("pbcoretools.task_options.dev_scatter_chunk_key", "chunk_key",
              Constants.CHUNK_KEY, "Chunk key",
              "Chunk key to use (format $chunk:{chunk-key}")
    return p
def get_contract_parser():
    """
    input idx 0: polish_chunk_pickle_id
    input idx 1: sentinel.txt
    input idx 2: *.subreadset.xml
    output idx 0: chunk json
    """
    p = get_scatter_pbparser(Constants.TOOL_ID,
                             Constants.VERSION,
                             "Scatter Ice Polish Chunks",
                             __doc__,
                             Constants.DRIVER_EXE,
                             chunk_keys=Constants.CHUNK_KEYS,
                             is_distributed=True)
    p.add_input_file_type(FileTypes.PICKLE, "polish_chunk_pickle", "PICKLE",
                          "Polish Chunk Tasks Pickle")  # input idx 0
    p.add_input_file_type(FileTypes.TXT, "sentinel_in", "Sentinel In",
                          "Setinel file")  # input idx 1
    p.add_input_file_type(FileTypes.DS_SUBREADS, "subreads_in",
                          "SubreadSet In", "PacBio SubreadSet")  # input idx 2
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON Polish Tasks",
                           "Chunked JSON Polish Tasks", "ice_polish.chunked")
    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks",
              Constants.DEFAULT_NCHUNKS, "Max NChunks",
              "Maximum number of Chunks")
    return p
def get_contract_parser(tool_id=TOOL_ID, module_name=MODULE_NAME):
    p = get_scatter_pbparser(
        tool_id,
        "0.1.3",
        "Scatter AlignmentSet",
        "Pacbio DataSet AlignmentSet",
        Constants.DRIVER_BASE.format(module=module_name),
        Constants.CHUNK_KEYS,
        is_distributed=False,
    )

    p.add_input_file_type(FileTypes.DS_ALIGN, "alignment_ds", "AlignmentSet", "Pacbio DataSet AlignmentSet")

    p.add_input_file_type(FileTypes.DS_REF, "ds_reference", "ReferenceSet", "Pac Bio Fasta format")

    p.add_output_file_type(
        FileTypes.CHUNK,
        "cjson_out",
        "Chunk JSON Filtered Fasta",
        "Chunked JSON Filtered Fasta",
        "alignments_reference.chunked.json",
    )

    # max nchunks for this specific task
    p.add_int(
        Constants.OPT_MAX_NCHUNKS, "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks"
    )
    return p
def get_contract_parser():
    driver = "python -m pbsmrtpipe.tools_dev.scatter_ccs_reference --resolved-tool-contract "

    p = get_scatter_pbparser(TOOL_ID, "0.1.3", "ConsensusReadSet scatter",
                             "Scatter ConsensusRead DataSet", driver,
                             Constants.CHUNK_KEYS, is_distributed=False)

    p.add_input_file_type(FileTypes.DS_CCS,
                          "ccsset",
                          "ConsensusReadSet",
                          "Pac Bio Fasta format")

    p.add_input_file_type(FileTypes.DS_REF,
                          "ds_reference",
                          "ReferenceSet",
                          "Pac Bio Fasta format")

    p.add_output_file_type(FileTypes.CHUNK,
                           "cjson_out",
                           "Chunk ConsensusReadSet",
                           "PacBio Chunked JSON ConsensusReadSet",
                           "ccsset_chunked.json")

    # max nchunks for this specific task
    # FIXME using same option names as scatter_subread_reference.py - it would
    # be nice if these were more generic
    p.add_int("pbsmrtpipe.task_options.scatter_subread_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS,
              "Max NChunks", "Maximum number of Chunks")

    p.add_str("pbsmrtpipe.task_options.scatter_subreadset_chunk_key", "chunk_key",
              "$chunk:fasta_id", "Chunk key", "Chunk key to use (format $chunk:{chunk-key}")
    return p
def get_contract_parser():
    """
    input:
      idx 0: json_id
      idx 1: txt_id
    output:
      idx 0: chunk json
    """
    p = get_scatter_pbparser(Constants.TOOL_ID,
                             Constants.VERSION,
                             "Scatter json with Scripts into Chunks",
                             __doc__,
                             Constants.DRIVER_EXE,
                             chunk_keys=Constants.CHUNK_KEYS,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.JSON, "json_with_scripts", "JSON",
                          "Json containing scripts")  # input idx 0
    p.add_input_file_type(FileTypes.TXT, "sentinel_txt", "TXT",
                          "Sentinel txt")  # input idx 1
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk JSON",
                           "Chunked JSON", "json_with_scripts.chunked")
    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks",
              Constants.DEFAULT_NCHUNKS, "Max NChunks",
              "Maximum number of Chunks")
    return p
def get_contract_parser_impl(C):
    p = get_scatter_pbparser(C.TOOL_ID, "0.1.3",
        "%sSet ZMW scatter" % C.READ_TYPE,
        "Scatter %s DataSet for barcoding" % C.READ_TYPE, C.DRIVER_EXE,
        C.CHUNK_KEYS, is_distributed=False)

    p.add_input_file_type(C.DATASET_TYPE,
                          "dataset",
                          "%sSet" % C.READ_TYPE,
                          "Pac Bio Fasta format")
    p.add_input_file_type(FileTypes.DS_BARCODE,
                          "barcodes",
                          "BarcodeSet",
                          "Pac Bio Barcode Dataset XML")
    p.add_output_file_type(FileTypes.CHUNK,
                           "chunk_report_json",
                           "Chunk %sSet" % C.READ_TYPE,
                           "PacBio Chunked JSON %sSet" % C.READ_TYPE,
                           "%sset_chunked" % C.READ_TYPE_ABBREV)

    # max nchunks for this specific task
    p.add_int("pbcoretools.task_options.scatter_subread_max_nchunks",
              "max_nchunks", Constants.DEFAULT_NCHUNKS,
              "Max NChunks", "Maximum number of Chunks")

    p.add_str("pbcoretools.task_options.scatter_subreadset_chunk_key",
              "chunk_key", "$chunk:subreadset_id", "Chunk key",
              "Chunk key to use (format $chunk:{chunk-key}")
    return p
def get_contract_parser():
    """
    input idx 0: polish_chunk_pickle_id
    input idx 1: sentinel.txt
    input idx 2: *.subreadset.xml
    output idx 0: chunk json
    """
    p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION,
                             "Scatter Ice Polish Chunks",
                             __doc__, Constants.DRIVER_EXE,
                             chunk_keys=Constants.CHUNK_KEYS,
                             is_distributed=True)
    p.add_input_file_type(FileTypes.PICKLE, "polish_chunk_pickle",
                          "PICKLE", "Polish Chunk Tasks Pickle") # input idx 0
    p.add_input_file_type(FileTypes.TXT, "sentinel_in", "Sentinel In",
                          "Setinel file") # input idx 1
    p.add_input_file_type(FileTypes.DS_SUBREADS, "subreads_in", "SubreadSet In",
                          "PacBio SubreadSet") # input idx 2
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON Polish Tasks",
                           "Chunked JSON Polish Tasks",
                           "ice_polish.chunked")
    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks",
              Constants.DEFAULT_NCHUNKS,
              "Max NChunks", "Maximum number of Chunks")
    return p
def get_contract_parser():
    """
    input:
      idx 0: partial_chunk_pickle_id
      idx 1: sentinel txt file
      idx 2: ccs_id
    output:
      idx 0: chunk json
    """
    p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION,
                             "Scatter Ice Partial Chunks",
                             __doc__, Constants.DRIVER_EXE,
                             chunk_keys=Constants.CHUNK_KEYS,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.PICKLE, "partial_chunk_pickle",
                          "PICKLE", "Partial Chunk Tasks Pickle") # input idx 0
    p.add_input_file_type(FileTypes.TXT, "partial_sentinel_in", "Sentinel In",
                          "Setinel file") # input idx 1
    p.add_input_file_type(FileTypes.DS_CCS, "ccs_in", "ConsensusReadSet In",
                          "PacBio ConsensusReadSet") # input idx 2
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON Partial Tasks",
                           "Chunked JSON Partial Tasks",
                           "ice_partial.chunked")
    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks",
              Constants.DEFAULT_NCHUNKS,
              "Max NChunks", "Maximum number of Chunks")
    return p
def get_contract_parser():
    """Get scatter tool contract parser
    Input:
        idx 0 - cluster_chunks.pickle
        idx 1 - ccs
    Output:
        idx 0 - chunk.json
    """
    p = get_scatter_pbparser(Constants.TOOL_ID,
                             Constants.VERSION,
                             "Scatter Cluster Bins",
                             __doc__,
                             Constants.DRIVER_EXE,
                             chunk_keys=Constants.CHUNK_KEYS,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.PICKLE, "cluster_chunks_pickle",
                          "Pickle In", "Cluster chunks pickle file")  # input 0
    p.add_input_file_type(FileTypes.DS_CCS, "ccs_in", "ConsensusReadSet In",
                          "PacBio ConsensusReadSet")  # input 1
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON Cluster Bins",
                           "Chunked JSON Cluster Bins",
                           "ice_cluster.chunked")  # output 0
    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks",
              Constants.DEFAULT_NCHUNKS, "Max NChunks",
              "Maximum number of Chunks")
    return p
def get_contract_parser():
    """
    input:
      idx 0: json_id
      idx 1: txt_id
    output:
      idx 0: chunk json
    """
    p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION,
                             "Scatter json with Scripts into Chunks",
                             __doc__, Constants.DRIVER_EXE,
                             chunk_keys=Constants.CHUNK_KEYS,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.JSON, "json_with_scripts",
                          "JSON", "Json containing scripts") # input idx 0
    p.add_input_file_type(FileTypes.TXT, "sentinel_txt",
                          "TXT", "Sentinel txt") # input idx 1
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON", "Chunked JSON",
                           "json_with_scripts.chunked")
    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks",
              Constants.DEFAULT_NCHUNKS,
              "Max NChunks", "Maximum number of Chunks")
    return p
def get_contract_parser():
    driver = "python -m pbcoretools.tasks.scatter_filter_fasta --resolved-tool-contract "

    chunk_keys = ("$chunk.fasta_id",)
    p = get_scatter_pbparser(
        TOOL_ID, "0.1.3", "Scatter Filter Fasta", "Scatter Filter Fasta", driver, chunk_keys, is_distributed=False
    )

    p.add_input_file_type(FileTypes.FASTA, "fasta_in", "Fasta In", "Pac Bio Fasta format")
    p.add_output_file_type(
        FileTypes.CHUNK, "cjson_out", "Chunk JSON Filtered Fasta", "Chunked JSON Filtered Fasta", "fasta.chunked"
    )
    # max nchunks for this specific task
    p.add_int(
        "pbcoretools.task_options.dev_scatter_max_nchunks",
        "max_nchunks",
        Constants.DEFAULT_NCHUNKS,
        "Max NChunks",
        "Maximum number of Chunks",
    )
    p.add_str(
        "pbcoretools.task_options.dev_scatter_chunk_key",
        "chunk_key",
        Constants.CHUNK_KEY,
        "Chunk key",
        "Chunk key to use (format $chunk:{chunk-key}",
    )
    return p
def get_contract_parser():
    """
    input:
      idx 0: fastq_id
      idx 1: gmap_ref_id
    output:
      idx 0: chunk json
    """
    p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION,
                             "Scatter Map Isoforms Chunks",
                             __doc__, Constants.DRIVER_EXE,
                             chunk_keys=Constants.CHUNK_KEYS,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.FASTQ, "fastq_in",
                          "FASTQ In", "HQ isoforms FASTQ file") # input idx 0
    p.add_input_file_type(FileTypes.DS_GMAP_REF, "gmap_referenceset", "GmapReferenceSet In",
                          "Gmap reference set file") # input 1
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON Map Isoforms Tasks",
                           "Chunked JSON Map Isoforms Tasks",
                           "map_isoforms_to_genome.chunked")
    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks",
              Constants.DEFAULT_NCHUNKS,
              "Max NChunks", "Maximum number of Chunks")
    return p
def get_contract_parser():
    driver = "python -m pbcoretools.tasks.scatter_ccs_reference --resolved-tool-contract "

    p = get_scatter_pbparser(TOOL_ID,
                             "0.1.3",
                             "ConsensusReadSet scatter",
                             "Scatter ConsensusRead DataSet",
                             driver,
                             Constants.CHUNK_KEYS,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.DS_CCS, "ccsset", "ConsensusReadSet",
                          "Pac Bio Fasta format")

    p.add_input_file_type(FileTypes.DS_REF, "ds_reference", "ReferenceSet",
                          "Pac Bio Fasta format")

    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk ConsensusReadSet",
                           "PacBio Chunked JSON ConsensusReadSet",
                           "ccsset_chunked")

    # max nchunks for this specific task
    # FIXME using same option names as scatter_subread_reference.py - it would
    # be nice if these were more generic
    p.add_int("pbcoretools.task_options.scatter_subread_max_nchunks",
              "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks",
              "Maximum number of Chunks")

    p.add_str("pbcoretools.task_options.scatter_subreadset_chunk_key",
              "chunk_key", "$chunk:fasta_id", "Chunk key",
              "Chunk key to use (format $chunk:{chunk-key}")
    return p
def get_contract_parser():
    """
    input:
      idx 0: fastq_id
      idx 1: gmap_ref_id
    output:
      idx 0: chunk json
    """
    p = get_scatter_pbparser(Constants.TOOL_ID,
                             Constants.VERSION,
                             "Scatter Map Isoforms Chunks",
                             __doc__,
                             Constants.DRIVER_EXE,
                             chunk_keys=Constants.CHUNK_KEYS,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.FASTQ, "fastq_in", "FASTQ In",
                          "HQ isoforms FASTQ file")  # input idx 0
    p.add_input_file_type(FileTypes.DS_GMAP_REF, "gmap_referenceset",
                          "GmapReferenceSet In",
                          "Gmap reference set file")  # input 1
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON Map Isoforms Tasks",
                           "Chunked JSON Map Isoforms Tasks",
                           "map_isoforms_to_genome.chunked")
    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks",
              Constants.DEFAULT_NCHUNKS, "Max NChunks",
              "Maximum number of Chunks")
    return p
def get_contract_parser():
    driver = "python -m pbcoretools.tasks.scatter_subread_reference --resolved-tool-contract "

    # These Keys are expected to be PipelineChunks produced by this tool
    chunk_keys = ("$chunk.reference_id", "$chunk.subreadset_id")
    p = get_scatter_pbparser(TOOL_ID,
                             "0.1.3",
                             "SubreadSet scatter",
                             "Scatter Subread DataSet",
                             driver,
                             chunk_keys,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.DS_SUBREADS, "subreads", "SubreadSet",
                          "Pac Bio Fasta format")

    p.add_input_file_type(FileTypes.DS_REF, "ds_reference", "ReferenceSet",
                          "Pac Bio Fasta format")

    p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk SubreadSet",
                           "PacBio Chunked JSON SubreadSet",
                           "subreadset_chunked")

    # max nchunks for this specific task
    p.add_int("pbcoretools.task_options.scatter_subread_max_nchunks",
              "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks",
              "Maximum number of Chunks")

    # This should only be added at the argparse level.
    # Disabling for now.
    # FIXME. This should support --reference-chunk-key and --subread-key
    # p.arg_parser.add_str("pbcoretools.task_options.scatter_subreadset_chunk_key",
    #                      "chunk_key",
    #                      "$chunk:fasta_id", "Chunk key", "Chunk key to use (format $chunk:{chunk-key}")
    return p
Example #17
0
def get_contract_parser_impl(C):
    p = get_scatter_pbparser(C.TOOL_ID, "0.1.3",
        "%sSet ZMW scatter" % C.READ_TYPE,
        "Scatter %s DataSet for barcoding" % C.READ_TYPE, C.DRIVER_EXE,
        C.CHUNK_KEYS, is_distributed=True)

    p.add_input_file_type(C.DATASET_TYPE,
                          "dataset",
                          "%sSet" % C.READ_TYPE,
                          "Pac Bio Fasta format")
    p.add_input_file_type(FileTypes.DS_BARCODE,
                          "barcodes",
                          "BarcodeSet",
                          "Pac Bio Barcode Dataset XML")
    p.add_output_file_type(FileTypes.CHUNK,
                           "chunk_report_json",
                           "Chunk %sSet" % C.READ_TYPE,
                           "PacBio Chunked JSON %sSet" % C.READ_TYPE,
                           "%sset_chunked" % C.READ_TYPE_ABBREV)

    # max nchunks for this specific task
    p.add_int("pbcoretools.task_options.scatter_subread_max_nchunks",
              "max_nchunks", Constants.DEFAULT_NCHUNKS,
              "Max NChunks", "Maximum number of Chunks")

    p.add_str("pbcoretools.task_options.scatter_subreadset_chunk_key",
              "chunk_key", "$chunk:subreadset_id", "Chunk key",
              "Chunk key to use (format $chunk:{chunk-key}")
    return p
Example #18
0
def get_contract_parser():
    p = get_scatter_pbparser(TOOL_ID,
                             "0.1.3",
                             "H5 SubreadSet scatter",
                             "Scatter Hdf5 Subread DataSet",
                             Constants.DRIVER,
                             Constants.CHUNK_KEYS,
                             is_distributed=True,
                             nchunks=SymbolTypes.MAX_NCHUNKS)

    p.add_input_file_type(FileTypes.DS_SUBREADS_H5, "h5_subreads",
                          "HdfSubreadSet", "Pac Bio Fasta format")

    p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk HdfSubreadSet",
                           "PacBio Chunked JSON HdfSubread Set",
                           "hdfsubreadset_chunked")

    # max nchunks for this specific task
    p.add_int("pbcoretools.task_options.scatter_hdfsubread_max_nchunks",
              "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks",
              "Maximum number of Chunks")

    p.add_str("pbcoretools.task_options.dev_scatter_chunk_key", "chunk_key",
              "$chunk:fasta_id", "Chunk key",
              "Chunk key to use (format $chunk:{chunk-key}")
    return p
def get_contract_parser():
    """Get scatter tool contract parser
    Input:
        idx 0 - cluster_chunks.pickle
        idx 1 - ccs
    Output:
        idx 0 - chunk.json
    """
    p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION,
                             "Scatter Cluster Bins",
                             __doc__, Constants.DRIVER_EXE,
                             chunk_keys=Constants.CHUNK_KEYS,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.PICKLE, "cluster_chunks_pickle", "Pickle In",
                          "Cluster chunks pickle file") # input 0
    p.add_input_file_type(FileTypes.DS_CCS, "ccs_in", "ConsensusReadSet In",
                          "PacBio ConsensusReadSet") # input 1
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON Cluster Bins",
                           "Chunked JSON Cluster Bins",
                           "ice_cluster.chunked") # output 0
    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks",
              Constants.DEFAULT_NCHUNKS, "Max NChunks", "Maximum number of Chunks")
    return p
Example #20
0
def get_contract_parser():
    driver = "python -m pbsmrtpipe.tools_dev.scatter_subread_zmws --resolved-tool-contract "

    chunk_keys = ("$chunk.subreadset_id", )
    p = get_scatter_pbparser(TOOL_ID, "0.1.3", "SubreadSet ZMW scatter",
                             "Scatter Subread DataSet by ZMWs", driver,
                             chunk_keys, is_distributed=False)

    p.add_input_file_type(FileTypes.DS_SUBREADS,
                          "subreadset",
                          "SubreadSet",
                          "Pac Bio Fasta format")

    p.add_output_file_type(FileTypes.CHUNK,
                           "chunk_report_json",
                           "Chunk SubreadSet",
                           "PacBio Chunked JSON SubreadSet",
                           "subreadset_chunked.json")

    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.scatter_subread_max_nchunks",
              "max_nchunks", Constants.DEFAULT_NCHUNKS,
              "Max NChunks", "Maximum number of Chunks")

    p.add_str("pbsmrtpipe.task_options.scatter_subreadset_chunk_key",
              "chunk_key", "$chunk:subreadset_id", "Chunk key",
              "Chunk key to use (format $chunk:{chunk-key}")
    return p
Example #21
0
def get_contract_parser():
    driver = "python -m pbsmrtpipe.tools_dev.scatter_subread_zmws --resolved-tool-contract "

    chunk_keys = ("$chunk.subreadset_id", )
    p = get_scatter_pbparser(TOOL_ID,
                             "0.1.3",
                             "SubreadSet ZMW scatter",
                             "Scatter Subread DataSet by ZMWs",
                             driver,
                             chunk_keys,
                             is_distributed=False)

    p.add_input_file_type(FileTypes.DS_SUBREADS, "subreadset", "SubreadSet",
                          "Pac Bio Fasta format")

    p.add_output_file_type(FileTypes.CHUNK, "chunk_report_json",
                           "Chunk SubreadSet",
                           "PacBio Chunked JSON SubreadSet",
                           "subreadset_chunked.json")

    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.scatter_subread_max_nchunks",
              "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks",
              "Maximum number of Chunks")

    p.add_str("pbsmrtpipe.task_options.scatter_subreadset_chunk_key",
              "chunk_key", "$chunk:subreadset_id", "Chunk key",
              "Chunk key to use (format $chunk:{chunk-key}")
    return p
def get_contract_parser():
    """
    input:
      idx 0: partial_chunk_pickle_id
      idx 1: sentinel txt file
      idx 2: ccs_id
    output:
      idx 0: chunk json
    """
    p = get_scatter_pbparser(Constants.TOOL_ID,
                             Constants.VERSION,
                             "Scatter Ice Partial Chunks",
                             __doc__,
                             Constants.DRIVER_EXE,
                             chunk_keys=Constants.CHUNK_KEYS,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.PICKLE, "partial_chunk_pickle", "PICKLE",
                          "Partial Chunk Tasks Pickle")  # input idx 0
    p.add_input_file_type(FileTypes.TXT, "partial_sentinel_in", "Sentinel In",
                          "Setinel file")  # input idx 1
    p.add_input_file_type(FileTypes.DS_CCS, "ccs_in", "ConsensusReadSet In",
                          "PacBio ConsensusReadSet")  # input idx 2
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON Partial Tasks",
                           "Chunked JSON Partial Tasks", "ice_partial.chunked")
    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks",
              Constants.DEFAULT_NCHUNKS, "Max NChunks",
              "Maximum number of Chunks")
    return p
def get_contract_parser(tool_id=TOOL_ID, module_name=MODULE_NAME):
    p = get_scatter_pbparser(tool_id,
                             "0.1.3",
                             "Scatter AlignmentSet",
                             "Pacbio DataSet AlignmentSet",
                             Constants.DRIVER_BASE.format(module=module_name),
                             Constants.CHUNK_KEYS,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.DS_ALIGN, "alignment_ds", "AlignmentSet",
                          "Pacbio DataSet AlignmentSet")

    p.add_input_file_type(FileTypes.DS_REF, "ds_reference", "ReferenceSet",
                          "Pac Bio Fasta format")

    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON Filtered Fasta",
                           "Chunked JSON Filtered Fasta",
                           "alignments_reference.chunked.json")

    # max nchunks for this specific task
    p.add_int(Constants.OPT_MAX_NCHUNKS, "max_nchunks",
              Constants.DEFAULT_NCHUNKS, "Max NChunks",
              "Maximum number of Chunks")
    return p
def get_parser():

    driver = "python -m pbcommand.cli.examples.dev_scatter_fasta_app --resolved-tool-contract "
    desc = "Scatter a single fasta file to create chunk.json file"
    # chunk keys that **will** be written to the file
    chunk_keys = (Constants.FA_CHUNK_KEY, )
    p = get_scatter_pbparser(TOOL_ID, __version__, "Fasta Scatter",
                             desc, driver, chunk_keys, is_distributed=False)
    p.add_input_file_type(FileTypes.FASTA, "fasta_in", "Fasta In", "Fasta file to scatter")
    p.add_output_file_type(FileTypes.CHUNK, "cjson", "Chunk JSON", "Scattered/Chunked Fasta Chunk.json", "fasta.chunks")
    p.add_int("pbcommand.task_options.dev_scatter_fa_nchunks", "nchunks", 10, "Number of chunks",
              "Suggested number of chunks. May be overridden by $max_nchunks")
    return p
def get_contract_parser():
    driver = "python -m pbfalcon.tasks.scatter0_run_daligner_jobs --resolved-tool-contract "

    p = get_scatter_pbparser(TOOL_ID, "0.1.3", "Scatter Daligner",
                             "Scatter Daligner Jobs", driver, Constants.CHUNK_KEYS,
                             is_distributed=False)

    p.add_input_file_type(FileTypes.JSON, "config", "Config",
                          "Pac Bio ???")
    p.add_input_file_type(FileTypes.TXT, "bash", "Bash",
                          "Pac Bio ???")
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk of .fasta for daligner, stage-0",
                           "Chunked JSON Filtered Fasta",
                           "fasta.chunked")
    return p
Example #26
0
def get_contract_parser():
    driver = "python -m pbfalcon.tasks.scatter1_run_daligner_jobs --resolved-tool-contract "

    p = get_scatter_pbparser(TOOL_ID, "0.1.3", "Scatter Daligner",
                             "Scatter Daligner Jobs", driver, Constants.CHUNK_KEYS,
                             is_distributed=False)

    p.add_input_file_type(FileTypes.JSON, "config", "Config",
                          "Pac Bio ???")
    p.add_input_file_type(FileTypes.TXT, "bash", "Bash",
                          "Pac Bio ???")
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk of .fasta for daligner, stage-1",
                           "Chunked JSON Filtered Fasta",
                           "fasta.chunked")
    return p
Example #27
0
def get_contract_parser():
    driver = "python -m pbsmrtpipe.tools_dev.scatter_contigset --resolved-tool-contract "

    chunk_keys = ("$chunk.contigset_id", )
    p = get_scatter_pbparser(TOOL_ID, "0.1.3", "Scatter ContigSet",
                             "Scatter ContigSet", driver, chunk_keys,
                             is_distributed=False)

    p.add_input_file_type(FileTypes.DS_CONTIG, "dataset_in", "ContigSet In",
                          "PacBio ContigSet")
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out", "Chunk JSON Filtered Fasta",
                           "Chunked JSON ContigSet",
                           "fasta.chunked.json")
    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.dev_scatter_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS,
              "Max NChunks", "Maximum number of Chunks")
    p.add_str("pbsmrtpipe.task_options.dev_scatter_chunk_key", "chunk_key",
              Constants.CHUNK_KEY, "Chunk key", "Chunk key to use (format $chunk:{chunk-key}")
    return p
def get_contract_parser():

    p = get_scatter_pbparser(Constants.TOOL_ID, Constants.VERSION,
                             "Scatter ContigSet for GMAP",
                             __doc__, Constants.DRIVER_EXE,
                             chunk_keys=Constants.CHUNK_KEYS,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.DS_CONTIG, "dataset_in", "ContigSet In",
                          "PacBio ContigSet")
    p.add_input_file_type(FileTypes.DS_REF, "ref_in", "ReferenceSet",
                          "PacBio ReferenceSet")
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON Filtered Fasta",
                           "Chunked JSON ContigSet",
                           "fasta.chunked")
    # max nchunks for this specific task
    p.add_int("pbcoretools.task_options.dev_scatter_max_nchunks", "max_nchunks",
              Constants.DEFAULT_NCHUNKS,
              "Max NChunks", "Maximum number of Chunks")
    return p
Example #29
0
def get_contract_parser():
    p = get_scatter_pbparser(
        TOOL_ID,
        "0.1.3",
        "H5 SubreadSet scatter",
        "Scatter Hdf5 Subread DataSet",
        Constants.DRIVER,
        Constants.CHUNK_KEYS,
        is_distributed=False,
        nchunks=SymbolTypes.MAX_NCHUNKS,
    )

    p.add_input_file_type(FileTypes.DS_SUBREADS_H5, "h5_subreads", "HdfSubreadSet", "Pac Bio Fasta format")

    p.add_output_file_type(
        FileTypes.CHUNK,
        "cjson_out",
        "Chunk HdfSubreadSet",
        "PacBio Chunked JSON HdfSubread Set",
        "hdfsubreadset_chunked.json",
    )

    # max nchunks for this specific task
    p.add_int(
        "pbsmrtpipe.task_options.scatter_hdfsubread_max_nchunks",
        "max_nchunks",
        Constants.DEFAULT_NCHUNKS,
        "Max NChunks",
        "Maximum number of Chunks",
    )

    p.add_str(
        "pbsmrtpipe.task_options.dev_scatter_chunk_key",
        "chunk_key",
        "$chunk:fasta_id",
        "Chunk key",
        "Chunk key to use (format $chunk:{chunk-key}",
    )
    return p
def get_contract_parser():

    p = get_scatter_pbparser(Constants.TOOL_ID,
                             Constants.VERSION,
                             "Scatter ContigSet for GMAP",
                             __doc__,
                             Constants.DRIVER_EXE,
                             chunk_keys=Constants.CHUNK_KEYS,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.DS_CONTIG, "dataset_in", "ContigSet In",
                          "PacBio ContigSet")
    p.add_input_file_type(FileTypes.DS_REF, "ref_in", "ReferenceSet",
                          "PacBio ReferenceSet")
    p.add_output_file_type(FileTypes.CHUNK, "cjson_out",
                           "Chunk JSON Filtered Fasta",
                           "Chunked JSON ContigSet", "fasta.chunked")
    # max nchunks for this specific task
    p.add_int("pbcoretools.task_options.dev_scatter_max_nchunks",
              "max_nchunks", Constants.DEFAULT_NCHUNKS, "Max NChunks",
              "Maximum number of Chunks")
    return p
def get_contract_parser():
    driver = "python -m pbsmrtpipe.tools_dev.scatter_subread_reference --resolved-tool-contract "

    # These Keys are expected to be PipelineChunks produced by this tool
    chunk_keys = ("$chunk.reference_id", "$chunk.subreadset_id")
    p = get_scatter_pbparser(TOOL_ID, "0.1.3", "SubreadSet scatter",
                             "Scatter Subread DataSet", driver, chunk_keys,
                             is_distributed=True)

    p.add_input_file_type(FileTypes.DS_SUBREADS,
                          "subreads",
                          "SubreadSet",
                          "Pac Bio Fasta format")

    p.add_input_file_type(FileTypes.DS_REF,
                          "ds_reference",
                          "ReferenceSet",
                          "Pac Bio Fasta format")

    p.add_output_file_type(FileTypes.CHUNK,
                           "cjson_out",
                           "Chunk SubreadSet",
                           "PacBio Chunked JSON SubreadSet",
                           "subreadset_chunked.json")

    # max nchunks for this specific task
    p.add_int("pbsmrtpipe.task_options.scatter_subread_max_nchunks", "max_nchunks", Constants.DEFAULT_NCHUNKS,
              "Max NChunks", "Maximum number of Chunks")

    # This should only be added at the argparse level.
    # Disabling for now.
    # FIXME. This should support --reference-chunk-key and --subread-key
    # p.arg_parser.add_str("pbsmrtpipe.task_options.scatter_subreadset_chunk_key",
    #                      "chunk_key",
    #                      "$chunk:fasta_id", "Chunk key", "Chunk key to use (format $chunk:{chunk-key}")
    return p