Example #1
0
def parseOptions(program_version_message):
    # Setup argument parser
    description = program_version_message + '''

    Oncotator is a tool for annotating human genomic point mutations and indels with data relevant to cancer researchers.

    '''
    epilog = '''
    Example usage
    -------------
    oncotator -v --input_format=MAFLITE --output_format=TCGAMAF myInputFile.maflite myOutputFile.maf.annotated hg19
    
    IMPORTANT NOTE:  hg19 is only supported genome build for now.

    Default values specified by -d or --default_annotation_values are used when an annotation does not exist or is populated with an empty string ("")

    Both default and override config files and command line specifications stack.

    Example of an override_config or default_config file:

    # Create center, source, sequencer, and score annotations, with the values broad.mit.edu, WXS, Illumina GAIIx, and <blank> for all mutations.
    #  This will overwrite all mutations.
    [manual_annotations]
    override:center=broad.mit.edu,source=WXS,sequencer=Illumina GAIIx,score=

    Example of cache urls:

    # Use a file (/home/user/myfile.cache) ... note the three forward slashes after "file:" for absolute path.
    -u file:///home/user/myfile.cache
    -u file://relative_file.cache

    # memcache
    -u memcache://localhost:11211

    Please note that only VCF input will populate the alt_allele_seen annotation.  All other inputs assume that the alternate is present if it appears at all.
        This feature is to allow users to include or exclude GT of 0/0 or ./. variants when converting VCFs to MAF.

        If --skip-no-alt is specified, VCF input processing will remove mutations with alt_allele_seen of False entirely (the mutations will not even seen when output format is SIMPLE_TSV).

    -----
    Copyright 2012 Broad Institute. All rights reserved.  Distributed on an "AS IS" basis without warranties or conditions of any kind, either express or implied.
    Oncotator is free for non-profit use.  See LICENSE for complete licensing information.
    '''
    parser = ArgumentParser(description=description,
                            formatter_class=RawDescriptionHelpFormatter,
                            epilog=epilog)
    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        action="count",
                        help="set verbosity level [default: 5]",
                        default=5)
    parser.add_argument('-V',
                        '--version',
                        action='version',
                        version=program_version_message)
    parser.add_argument(
        '-i',
        '--input_format',
        type=str,
        default="MAFLITE",
        choices=OncotatorCLIUtils.getSupportedInputFormats(),
        help=
        'Input format.  Note that MAFLITE will work for any tsv file with appropriate headers, so long as all of the required headers (or an alias -- see maflite.config) are present.  [default: %s]'
        % "MAFLITE")
    parser.add_argument(
        '--db-dir',
        dest='dbDir',
        default=DEFAULT_DB_DIR,
        help='Main annotation database directory. [default: %s]' %
        DEFAULT_DB_DIR)
    parser.add_argument('-o',
                        '--output_format',
                        type=str,
                        default="TCGAMAF",
                        choices=OncotatorCLIUtils.getSupportedOutputFormats(),
                        help='Output format. [default: %s]' % "TCGAMAF")
    parser.add_argument(
        '--override_config',
        type=str,
        help=
        "File path to manual annotations in a config file format (section is 'manual_annotations' and annotation:value pairs)."
    )
    parser.add_argument(
        '--default_config',
        type=str,
        help=
        "File path to default annotation values in a config file format (section is 'manual_annotations' and annotation:value pairs)."
    )
    parser.add_argument('--no-multicore',
                        dest="noMulticore",
                        action='store_true',
                        default=False,
                        help="Disables all multicore functionality.")
    parser.add_argument(
        'input_file',
        type=str,
        help='Input file to be annotated.  Type is specified through options.')
    parser.add_argument('output_file',
                        type=str,
                        help='Output file name of annotated file.')
    parser.add_argument('genome_build',
                        metavar='genome_build',
                        type=str,
                        help="Genome build.  For example: hg19",
                        choices=["hg19"])
    parser.add_argument(
        '-a',
        '--annotate-manual',
        dest="override_cli",
        type=str,
        action='append',
        default=[],
        help=
        "Specify annotations to override.  Can be specified multiple times.  E.g. -a 'name1:value1' -a 'name2:value2' "
    )
    parser.add_argument(
        '-d',
        '--annotate-default',
        dest="default_cli",
        type=str,
        action='append',
        default=[],
        help=
        "Specify default values for annotations.  Can be specified multiple times.  E.g. -d 'name1:value1' -d 'name2:value2' "
    )
    parser.add_argument('-u',
                        '--cache-url',
                        dest="cache_url",
                        type=str,
                        default=None,
                        help=" URL to use for cache.  See help for examples.")
    parser.add_argument('-r',
                        '--read_only_cache',
                        action='store_true',
                        dest="read_only_cache",
                        default=False,
                        help="Makes the cache read-only")
    parser.add_argument(
        '--tx-mode',
        dest="tx_mode",
        default=DEFAULT_TX_MODE,
        choices=TranscriptProvider.TX_MODE_CHOICES,
        help=
        "Specify transcript mode for transcript providing datasources that support multiple modes.  [default: %s]"
        % DEFAULT_TX_MODE)
    parser.add_argument(
        '--infer_genotypes',
        dest='infer_genotypes',
        default="false",
        choices=["yes", "true", "t", "1", "y", "no", "false", "f", "0", "n"],
        help=
        "Forces the VCF output renderer to populate the output genotypes as heterozygous.  This option should only be used when converting a MAFLITE to a VCF; otherwise, the option has no effect.  [default: %s]"
        % "false")
    parser.add_argument(
        '--skip-no-alt',
        dest="skip_no_alt",
        action='store_true',
        help=
        "If specified, any mutation with annotation alt_allele_seen of 'False' will not be annotated or rendered.  Do not use if output format is a VCF.  If alt_allele_seen annotation is missing, render the mutation."
    )
    parser.add_argument(
        '--log_name',
        dest='log_name',
        default="oncotator.log",
        help="Specify log output location.  Default: oncotator.log")
    parser.add_argument(
        '--prepend',
        dest="prepend",
        action='store_true',
        help=
        "If specified for TCGAMAF output, will put a 'i_' in front of fields that are not directly rendered in Oncotator TCGA MAFs"
    )
    parser.add_argument(
        '--infer-onps',
        dest="infer_onps",
        action='store_true',
        help=
        "Will merge adjacent SNPs,DNPs,TNPs,etc if they are in the same sample.  This assumes that the input file is position sorted.  This may cause problems with VCF -> VCF conversion, and does not guarantee input order is maintained."
    )
    parser.add_argument(
        '-c',
        '--canonical-tx-file',
        dest="canonical_tx_file",
        type=str,
        help=
        "Simple text file with list of transcript IDs (one per line) to always select where possible for variants.  Transcript IDs must match the ones used by the transcript provider in your datasource (e.g. gencode ENST00000123456).  If more than one transcript can be selected for a variant, uses the method defined by --tx-mode to break ties.  Using this list means that a transcript will be selected from this list first, possibly superseding a best-effect.  Note that transcript version number is not considered, whether included in the list or not."
    )

    # Process arguments
    args = parser.parse_args()

    return args
Example #2
0
def parseOptions(program_version_message):
    # Setup argument parser
    description = program_version_message + '''

    Oncotator is a tool for annotating human genomic point mutations and indels with data relevant to cancer researchers.

    '''
    epilog = '''
    Example usage
    -------------
    oncotator -v --input_format=MAFLITE --output_format=TCGAMAF myInputFile.maflite myOutputFile.maf.annotated hg19
    
    IMPORTANT NOTE:  hg19 is only supported genome build for now.

    Default values specified by -d or --default_annotation_values are used when an annotation does not exist or is populated with an empty string ("")

    Both default and override config files and command line specifications stack.

    Example of an override_config or default_config file:

    # Create center, source, sequencer, and score annotations, with the values broad.mit.edu, WXS, Illumina GAIIx, and <blank> for all mutations.
    #  This will overwrite all mutations.
    [manual_annotations]
    override:center=broad.mit.edu,source=WXS,sequencer=Illumina GAIIx,score=

    Example of cache urls:

    # Use a file (/home/user/myfile.cache) ... note the three forward slashes after "file:" for absolute path.
    -u file:///home/user/myfile.cache
    -u file://relative_file.cache

    # memcache
    -u memcache://localhost:11211

    Please note that only VCF input will populate the alt_allele_seen annotation.  All other inputs assume that the alternate is present if it appears at all.
        This feature is to allow users to include or exclude GT of 0/0 or ./. variants when converting VCFs to MAF.

        If --skip-no-alt is specified, VCF input processing will remove mutations with alt_allele_seen of False entirely (the mutations will not even seen when output format is SIMPLE_TSV).

    -----
    Copyright 2012 Broad Institute. All rights reserved.  Distributed on an "AS IS" basis without warranties or conditions of any kind, either express or implied.
    Oncotator is free for non-profit use.  See LICENSE for complete licensing information.
    '''
    parser = ArgumentParser(description=description, formatter_class=RawDescriptionHelpFormatter, epilog=epilog)
    parser.add_argument("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: 5]", default=5)
    parser.add_argument('-V', '--version', action='version', version=program_version_message)
    parser.add_argument('-i', '--input_format', type=str, default="MAFLITE", choices=OncotatorCLIUtils.getSupportedInputFormats(), help='Input format.  Note that MAFLITE will work for any tsv file with appropriate headers, so long as all of the required headers (or an alias -- see maflite.config) are present.  [default: %s]' % "MAFLITE")
    parser.add_argument('--db-dir', dest='dbDir', default=DEFAULT_DB_DIR,
                        help='Main annotation database directory. [default: %s]' % DEFAULT_DB_DIR)
    parser.add_argument('-o', '--output_format', type=str, default="TCGAMAF",choices=OncotatorCLIUtils.getSupportedOutputFormats(), help='Output format. [default: %s]' % "TCGAMAF")
    parser.add_argument('--override_config', type=str, 
                        help="File path to manual annotations in a config file format (section is 'manual_annotations' and annotation:value pairs).")
    parser.add_argument('--default_config', type=str,
                        help="File path to default annotation values in a config file format (section is 'manual_annotations' and annotation:value pairs).")
    parser.add_argument('--no-multicore', dest="noMulticore", action='store_true', default=False, help="Disables all multicore functionality.")
    parser.add_argument('input_file', type=str, help='Input file to be annotated.  Type is specified through options.')
    parser.add_argument('output_file', type=str, help='Output file name of annotated file.')
    parser.add_argument('genome_build', metavar='genome_build', type=str, help="Genome build.  For example: hg19", choices=["hg19"])
    parser.add_argument('-a', '--annotate-manual', dest="override_cli",type=str, action='append', default=[], help="Specify annotations to override.  Can be specified multiple times.  E.g. -a 'name1:value1' -a 'name2:value2' ")
    parser.add_argument('-d', '--annotate-default', dest="default_cli",type=str, action='append', default=[], help="Specify default values for annotations.  Can be specified multiple times.  E.g. -d 'name1:value1' -d 'name2:value2' ")
    parser.add_argument('-u', '--cache-url', dest="cache_url", type=str, default=None, help=" URL to use for cache.  See help for examples.")
    parser.add_argument('-r', '--read_only_cache', action='store_true', dest="read_only_cache", default=False, help="Makes the cache read-only")
    parser.add_argument('--tx-mode', dest="tx_mode", default=DEFAULT_TX_MODE, choices=TranscriptProvider.TX_MODE_CHOICES, help="Specify transcript mode for transcript providing datasources that support multiple modes.  [default: %s]" % DEFAULT_TX_MODE)
    parser.add_argument('--infer_genotypes', dest='infer_genotypes', default="false", choices=["yes", "true", "t", "1", "y", "no", "false", "f", "0", "n"],
                        help="Forces the VCF output renderer to populate the output genotypes as heterozygous.  This option should only be used when converting a MAFLITE to a VCF; otherwise, the option has no effect.  [default: %s]" % "false")
    parser.add_argument('--skip-no-alt', dest="skip_no_alt", action='store_true', help="If specified, any mutation with annotation alt_allele_seen of 'False' will not be annotated or rendered.  Do not use if output format is a VCF.  If alt_allele_seen annotation is missing, render the mutation.")
    parser.add_argument('--log_name', dest='log_name', default="oncotator.log", help="Specify log output location.  Default: oncotator.log")
    parser.add_argument('--prepend', dest="prepend", action='store_true', help="If specified for TCGAMAF output, will put a 'i_' in front of fields that are not directly rendered in Oncotator TCGA MAFs")

    # Process arguments
    args = parser.parse_args()
    
    return args