예제 #1
0
    def create_run_spec_given_datasources(input_format, output_format, input_filename, output_filename, global_annotations=None,
                        datasource_list=None, genomeBuild="hg19", is_multicore=False, num_cores=4,
                        default_annotations=None, cache_url=None, read_only_cache=True,
                        tx_mode=TranscriptProvider.TX_MODE_CANONICAL, is_skip_no_alts=False, other_opts=None, annotating_type=None):
        """Same as create_run_spec, but a list of datasource instances can be used.  Typically, this method is only called
        by automated tests."""


        global_annotations = dict() if global_annotations is None else global_annotations
        default_annotations = dict() if default_annotations is None else default_annotations
        datasource_list = [] if datasource_list is None else datasource_list

        other_opts = dict() if other_opts is None else other_opts

        if input_format == "TCGAMAF" and not other_opts.get(OptionConstants.REANNOTATE_TCGA_MAF_COLS, False):
            other_opts[OptionConstants.REANNOTATE_TCGA_MAF_COLS] = True

        other_opts[InputMutationCreatorOptions.IS_SKIP_ALTS] = is_skip_no_alts

        # Step 0 Validate given parameters and log messages.  If an error or critical is found, throw an exception.
        validation_messages = RunSpecificationFactory._validate_run_spec_parameters(input_format, output_format, input_filename, output_filename, global_annotations,
                        datasource_list, genomeBuild, is_multicore, num_cores,
                        default_annotations, cache_url, read_only_cache,
                        tx_mode, is_skip_no_alts, other_opts, annotating_type)
        for msg in validation_messages:
            logging.getLogger(__name__).log(msg.level, msg.message)
            if (msg.level == logging.ERROR) or (msg.level == logging.CRITICAL):
                raise RunSpecificationException(msg.message)

        # Step 1 Initialize input and output
        is_allow_annotation_overwriting = other_opts.get(OptionConstants.ALLOW_ANNOTATION_OVERWRITING, False)
        mutation_data_factory = MutationDataFactory(is_allow_annotation_overwriting)

        inputCreator = OncotatorCLIUtils.create_input_creator(input_filename, input_format, mutation_data_factory, genomeBuild, other_opts)
        outputRenderer = OncotatorCLIUtils.create_output_renderer(output_filename, output_format, other_opts)

        result = RunSpecification()
        result.initialize(inputCreator, outputRenderer, manualAnnotations=global_annotations, datasources=datasource_list,
                          isMulticore=is_multicore, numCores=num_cores, defaultAnnotations=default_annotations,
                          cacheUrl=cache_url, read_only_cache=read_only_cache, is_skip_no_alts=is_skip_no_alts, annotating_type=annotating_type,
                          is_allow_annotation_overwriting=is_allow_annotation_overwriting)
        return result
예제 #2
0
    def create_run_spec_given_datasources(input_format, output_format, input_filename, output_filename, global_annotations=None,
                        datasource_list=None, genomeBuild="hg19", is_multicore=False, num_cores=4,
                        default_annotations=None, cache_url=None, read_only_cache=True,
                        tx_mode=TranscriptProvider.TX_MODE_CANONICAL, is_skip_no_alts=False, other_opts=None, annotating_type=None):
        """Same as create_run_spec, but a list of datasource instances can be used.  Typically, this method is only called
        by automated tests."""


        global_annotations = dict() if global_annotations is None else global_annotations
        default_annotations = dict() if default_annotations is None else default_annotations
        datasource_list = [] if datasource_list is None else datasource_list

        other_opts = dict() if other_opts is None else other_opts

        if input_format == "TCGAMAF" and not other_opts.get(OptionConstants.REANNOTATE_TCGA_MAF_COLS, False):
            other_opts[OptionConstants.REANNOTATE_TCGA_MAF_COLS] = True

        other_opts[InputMutationCreatorOptions.IS_SKIP_ALTS] = is_skip_no_alts

        # Step 0 Validate given parameters and log messages.  If an error or critical is found, throw an exception.
        validation_messages = RunSpecificationFactory._validate_run_spec_parameters(input_format, output_format, input_filename, output_filename, global_annotations,
                        datasource_list, genomeBuild, is_multicore, num_cores,
                        default_annotations, cache_url, read_only_cache,
                        tx_mode, is_skip_no_alts, other_opts, annotating_type)
        for msg in validation_messages:
            logging.getLogger(__name__).log(msg.level, msg.message)
            if (msg.level == logging.ERROR) or (msg.level == logging.CRITICAL):
                raise RunSpecificationException(msg.message)

        # Step 1 Initialize input and output
        is_allow_annotation_overwriting = other_opts.get(OptionConstants.ALLOW_ANNOTATION_OVERWRITING, False)
        mutation_data_factory = MutationDataFactory(is_allow_annotation_overwriting)

        inputCreator = OncotatorCLIUtils.create_input_creator(input_filename, input_format, mutation_data_factory, genomeBuild, other_opts)
        outputRenderer = OncotatorCLIUtils.create_output_renderer(output_filename, output_format, other_opts)

        result = RunSpecification()
        result.initialize(inputCreator, outputRenderer, manualAnnotations=global_annotations, datasources=datasource_list,
                          isMulticore=is_multicore, numCores=num_cores, defaultAnnotations=default_annotations,
                          cacheUrl=cache_url, read_only_cache=read_only_cache, is_skip_no_alts=is_skip_no_alts, annotating_type=annotating_type,
                          is_allow_annotation_overwriting=is_allow_annotation_overwriting)
        return result
예제 #3
0
    def _annotateTest(self, inputFilename, outputFilename, datasource_dir, inputFormat="MAFLITE", outputFormat="TCGAMAF", default_annotations=TCGA_MAF_DEFAULTS, override_annotations=None, is_skip_no_alts=False):
        self.logger.info("Initializing Annotator...")

        if override_annotations is None:
            override_annotations = dict()

        annotator = Annotator()
        runSpec = OncotatorCLIUtils.create_run_spec(inputFormat, outputFormat, inputFilename, outputFilename, defaultAnnotations=default_annotations, datasourceDir=datasource_dir, globalAnnotations=override_annotations, is_skip_no_alts=is_skip_no_alts)
        annotator.initialize(runSpec)
        self.logger.info("Annotation starting...")
        return annotator.annotate()
예제 #4
0
    def create_run_spec(
        inputFormat,
        outputFormat,
        inputFilename,
        outputFilename,
        globalAnnotations=None,
        datasourceDir=None,
        genomeBuild="hg19",
        isMulticore=False,
        numCores=4,
        defaultAnnotations=None,
        cacheUrl=None,
        read_only_cache=True,
        tx_mode=TranscriptProvider.TX_MODE_CANONICAL,
        is_skip_no_alts=False,
        other_opts=None,
        annotating_type=None,
    ):
        """ This is a very simple interface to start an Oncotator session.  As a warning, this interface may notbe supported in future versions.

        If datasourceDir is None, then the default location is used.  TODO: Define default location.

        IMPORTANT: Current implementation attempts to annotate using a default set of datasources.

        TODO: Make sure that this note above is no longer the case.  Current implementation attempts to annotate using a default set of datasources
        TODO: This method may get refactored into a separate class that handles RunConfigutaion objects.
        """
        # TODO: Use dependency injection for list of name value pairs?  Otherwise, set it up as an attribute on this class.
        # TODO: Use dependency injection to return instance of the input/output classes

        globalAnnotations = dict() if globalAnnotations is None else globalAnnotations
        defaultAnnotations = dict() if defaultAnnotations is None else defaultAnnotations
        other_opts = dict() if other_opts is None else other_opts

        other_opts[InputMutationCreatorOptions.IS_SKIP_ALTS] = is_skip_no_alts

        # Step 0 Validate given parameters and log messages.  If an error or critical is found, throw an exception.
        validation_messages = RunSpecificationFactory._validate_run_spec_parameters(
            inputFormat,
            outputFormat,
            inputFilename,
            outputFilename,
            globalAnnotations,
            datasourceDir,
            genomeBuild,
            isMulticore,
            numCores,
            defaultAnnotations,
            cacheUrl,
            read_only_cache,
            tx_mode,
            is_skip_no_alts,
            other_opts,
            annotating_type,
        )
        for msg in validation_messages:
            logging.getLogger(__name__).log(msg.level, msg.message)
            if (msg.level == logging.ERROR) or (msg.level == logging.CRITICAL):
                raise RunSpecificationException(msg.message)

        # Step 1 Initialize input and output
        inputCreator = OncotatorCLIUtils.create_input_creator(inputFilename, inputFormat, genomeBuild, other_opts)
        outputRenderer = OncotatorCLIUtils.create_output_renderer(outputFilename, outputFormat, other_opts)

        # Step 2 Datasources
        datasource_list = DatasourceFactory.createDatasources(
            datasourceDir, genomeBuild, isMulticore=isMulticore, numCores=numCores, tx_mode=tx_mode
        )

        # TODO: Refactoring needed here to specify tx-mode (or any option not in a config file) in a cleaner way.
        for ds in datasource_list:
            if isinstance(ds, TranscriptProvider):
                logging.getLogger(__name__).info("Setting %s %s to tx-mode of %s..." % (ds.title, ds.version, tx_mode))
                ds.set_tx_mode(tx_mode)

        result = RunSpecification()
        result.initialize(
            inputCreator,
            outputRenderer,
            manualAnnotations=globalAnnotations,
            datasources=datasource_list,
            isMulticore=isMulticore,
            numCores=numCores,
            defaultAnnotations=defaultAnnotations,
            cacheUrl=cacheUrl,
            read_only_cache=read_only_cache,
            is_skip_no_alts=is_skip_no_alts,
            annotating_type=annotating_type,
        )
        return result
예제 #5
0
def main(argv=None):  # IGNORE:C0111
    """Command line options."""
    from oncotator.utils.OncotatorCLIUtils import OncotatorCLIUtils
    from oncotator.Annotator import Annotator

    if argv is None:
        argv = sys.argv
    else:
        sys.argv.extend(argv)

    program_version = "%s" % __version__
    program_version_message = '%%(prog)s %s' % program_version

    try:
        args = parseOptions(program_version_message)
        verbose = args.verbose
        if verbose > 0:
            print("Verbose mode on")

        logFilename = args.log_name  # 'oncotator.log'

        # Create a basic logger to a file
        loggingFormat = '%(asctime)s %(levelname)s [%(name)s:%(lineno)d] %(message)s'
        logging.basicConfig(filename=logFilename,
                            level=logging.INFO,
                            format=loggingFormat)

        # Add a console logger to the root logger, which means that all loggers generated will have the console dump.
        #    Output on the console will be the same as what is in the log file.
        ch = logging.StreamHandler()
        ch.setLevel(logging.WARN)
        formatter = logging.Formatter(loggingFormat)
        ch.setFormatter(formatter)

        if verbose:
            ch.setLevel(logging.INFO)
            print("Path:")
            print(sys.path)
            print(" ")

        logging.getLogger('').addHandler(ch)

        logger = logging.getLogger(__name__)
        logger.info("Oncotator " + program_version)
        logger.info("Args: " + str(args))
        logger.info('Log file: ' + os.path.abspath(logFilename))

        if DEBUG:
            logger.setLevel(logging.DEBUG)

        if not NGSLIB_INSTALLED:
            logger.warn(
                "ngslib module not installed.  Will be unable to annotate with BigWig datasources."
            )

        # Initiate an Oncotator session.
        inputFilename = os.path.expanduser(args.input_file)
        outputFilename = os.path.expanduser(args.output_file)
        inputFormat = args.input_format.upper()
        outputFormat = args.output_format.upper()

        datasourceDir = os.path.expanduser(args.dbDir)
        cache_url = args.cache_url
        read_only_cache = args.read_only_cache
        tx_mode = args.tx_mode
        is_skip_no_alts = args.skip_no_alt
        genome_build = args.genome_build
        is_no_prepend = not args.prepend

        # Parse annotation overrides
        commandLineManualOverrides = args.override_cli
        overrideConfigFile = args.override_config
        if overrideConfigFile is not None and not os.path.exists(
                overrideConfigFile):
            logger.warn("Could not find " + overrideConfigFile +
                        "   ... proceeding anyway.")
            overrideConfigFile = None
        manualOverrides = OncotatorCLIUtils.determineAllAnnotationValues(
            commandLineManualOverrides, overrideConfigFile)

        # Parse default overrides
        commandLineDefaultValues = args.default_cli
        defaultConfigFile = args.default_config
        if defaultConfigFile is not None and not os.path.exists(
                defaultConfigFile):
            if defaultConfigFile != DEFAULT_DEFAULT_ANNOTATIONS:
                logger.warn("Could not find " + defaultConfigFile +
                            "   ... proceeding anyway.")
            else:
                logger.info(
                    "Could not find Broad-specific " + defaultConfigFile +
                    "   ... proceeding without any default annotations.  __UNKNOWN__ may appear in TCGA MAF outputs."
                )
            defaultConfigFile = None
        defaultValues = OncotatorCLIUtils.determineAllAnnotationValues(
            commandLineDefaultValues, defaultConfigFile)

        # Create a run configuration to pass to the Annotator class.
        annotating_type = None
        if inputFormat == "SEG_FILE":
            annotating_type = RunSpecification.ANNOTATE_SEGMENTS
        runConfig = RunSpecificationFactory.create_run_spec(
            inputFormat,
            outputFormat,
            inputFilename,
            outputFilename,
            globalAnnotations=manualOverrides,
            datasourceDir=datasourceDir,
            isMulticore=(not args.noMulticore),
            defaultAnnotations=defaultValues,
            cacheUrl=cache_url,
            read_only_cache=read_only_cache,
            tx_mode=tx_mode,
            is_skip_no_alts=is_skip_no_alts,
            genomeBuild=genome_build,
            other_opts=determineOtherOptions(args),
            annotating_type=annotating_type)

        annotator = Annotator()
        annotator.initialize(runConfig)
        annotator.annotate()

        return 0
    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 0
예제 #6
0
def parseOptions(program_version_message):
    # Setup argument parser
    description = program_version_message + '''

    Oncotator is a tool for annotating human genomic point mutations and indels with data relevant to cancer researchers.

    '''
    epilog = '''
    Example usage
    -------------
    oncotator -v --input_format=MAFLITE --output_format=TCGAMAF myInputFile.maflite myOutputFile.maf.annotated hg19
    
    IMPORTANT NOTE:  hg19 is only supported genome build for now.

    Default values specified by -d or --default_annotation_values are used when an annotation does not exist or is populated with an empty string ("")

    Both default and override config files and command line specifications stack.

    Example of an override_config or default_config file:

    # Create center, source, sequencer, and score annotations, with the values broad.mit.edu, WXS, Illumina GAIIx, and <blank> for all mutations.
    #  This will overwrite all mutations.
    [manual_annotations]
    override:center=broad.mit.edu,source=WXS,sequencer=Illumina GAIIx,score=

    Example of cache urls:

    # Use a file (/home/user/myfile.cache) ... note the three forward slashes after "file:" for absolute path.
    -u file:///home/user/myfile.cache
    -u file://relative_file.cache

    # memcache
    -u memcache://localhost:11211

    Please note that only VCF input will populate the alt_allele_seen annotation.  All other inputs assume that the alternate is present if it appears at all.
        This feature is to allow users to include or exclude GT of 0/0 or ./. variants when converting VCFs to MAF.

        If --skip-no-alt is specified, VCF input processing will remove mutations with alt_allele_seen of False entirely (the mutations will not even seen when output format is SIMPLE_TSV).

    -----
    Copyright 2012 Broad Institute. All rights reserved.  Distributed on an "AS IS" basis without warranties or conditions of any kind, either express or implied.
    Oncotator is free for non-profit use.  See LICENSE for complete licensing information.
    '''
    parser = ArgumentParser(description=description,
                            formatter_class=RawDescriptionHelpFormatter,
                            epilog=epilog)
    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        action="count",
                        help="set verbosity level [default: 5]",
                        default=5)
    parser.add_argument('-V',
                        '--version',
                        action='version',
                        version=program_version_message)
    parser.add_argument(
        '-i',
        '--input_format',
        type=str,
        default="MAFLITE",
        choices=OncotatorCLIUtils.getSupportedInputFormats(),
        help=
        'Input format.  Note that MAFLITE will work for any tsv file with appropriate headers, so long as all of the required headers (or an alias -- see maflite.config) are present.  [default: %s]'
        % "MAFLITE")
    parser.add_argument(
        '--db-dir',
        dest='dbDir',
        default=DEFAULT_DB_DIR,
        help='Main annotation database directory. [default: %s]' %
        DEFAULT_DB_DIR)
    parser.add_argument('-o',
                        '--output_format',
                        type=str,
                        default="TCGAMAF",
                        choices=OncotatorCLIUtils.getSupportedOutputFormats(),
                        help='Output format. [default: %s]' % "TCGAMAF")
    parser.add_argument(
        '--override_config',
        type=str,
        help=
        "File path to manual annotations in a config file format (section is 'manual_annotations' and annotation:value pairs)."
    )
    parser.add_argument(
        '--default_config',
        type=str,
        help=
        "File path to default annotation values in a config file format (section is 'manual_annotations' and annotation:value pairs)."
    )
    parser.add_argument('--no-multicore',
                        dest="noMulticore",
                        action='store_true',
                        default=False,
                        help="Disables all multicore functionality.")
    parser.add_argument(
        'input_file',
        type=str,
        help='Input file to be annotated.  Type is specified through options.')
    parser.add_argument('output_file',
                        type=str,
                        help='Output file name of annotated file.')
    parser.add_argument('genome_build',
                        metavar='genome_build',
                        type=str,
                        help="Genome build.  For example: hg19",
                        choices=["hg19"])
    parser.add_argument(
        '-a',
        '--annotate-manual',
        dest="override_cli",
        type=str,
        action='append',
        default=[],
        help=
        "Specify annotations to override.  Can be specified multiple times.  E.g. -a 'name1:value1' -a 'name2:value2' "
    )
    parser.add_argument(
        '-d',
        '--annotate-default',
        dest="default_cli",
        type=str,
        action='append',
        default=[],
        help=
        "Specify default values for annotations.  Can be specified multiple times.  E.g. -d 'name1:value1' -d 'name2:value2' "
    )
    parser.add_argument('-u',
                        '--cache-url',
                        dest="cache_url",
                        type=str,
                        default=None,
                        help=" URL to use for cache.  See help for examples.")
    parser.add_argument('-r',
                        '--read_only_cache',
                        action='store_true',
                        dest="read_only_cache",
                        default=False,
                        help="Makes the cache read-only")
    parser.add_argument(
        '--tx-mode',
        dest="tx_mode",
        default=DEFAULT_TX_MODE,
        choices=TranscriptProvider.TX_MODE_CHOICES,
        help=
        "Specify transcript mode for transcript providing datasources that support multiple modes.  [default: %s]"
        % DEFAULT_TX_MODE)
    parser.add_argument(
        '--infer_genotypes',
        dest='infer_genotypes',
        default="false",
        choices=["yes", "true", "t", "1", "y", "no", "false", "f", "0", "n"],
        help=
        "Forces the VCF output renderer to populate the output genotypes as heterozygous.  This option should only be used when converting a MAFLITE to a VCF; otherwise, the option has no effect.  [default: %s]"
        % "false")
    parser.add_argument(
        '--skip-no-alt',
        dest="skip_no_alt",
        action='store_true',
        help=
        "If specified, any mutation with annotation alt_allele_seen of 'False' will not be annotated or rendered.  Do not use if output format is a VCF.  If alt_allele_seen annotation is missing, render the mutation."
    )
    parser.add_argument(
        '--log_name',
        dest='log_name',
        default="oncotator.log",
        help="Specify log output location.  Default: oncotator.log")
    parser.add_argument(
        '--prepend',
        dest="prepend",
        action='store_true',
        help=
        "If specified for TCGAMAF output, will put a 'i_' in front of fields that are not directly rendered in Oncotator TCGA MAFs"
    )
    parser.add_argument(
        '--infer-onps',
        dest="infer_onps",
        action='store_true',
        help=
        "Will merge adjacent SNPs,DNPs,TNPs,etc if they are in the same sample.  This assumes that the input file is position sorted.  This may cause problems with VCF -> VCF conversion, and does not guarantee input order is maintained."
    )
    parser.add_argument(
        '-c',
        '--canonical-tx-file',
        dest="canonical_tx_file",
        type=str,
        help=
        "Simple text file with list of transcript IDs (one per line) to always select where possible for variants.  Transcript IDs must match the ones used by the transcript provider in your datasource (e.g. gencode ENST00000123456).  If more than one transcript can be selected for a variant, uses the method defined by --tx-mode to break ties.  Using this list means that a transcript will be selected from this list first, possibly superseding a best-effect.  Note that transcript version number is not considered, whether included in the list or not."
    )

    # Process arguments
    args = parser.parse_args()

    return args
예제 #7
0
def main(argv=None):  # IGNORE:C0111
    """Command line options."""
    from oncotator.utils.OncotatorCLIUtils import OncotatorCLIUtils
    from oncotator.Annotator import Annotator

    if argv is None:
        argv = sys.argv
    else:
        sys.argv.extend(argv)

    program_version = "%s" % __version__
    program_version_message = '%%(prog)s %s' % program_version

    try:
        args = parseOptions(program_version_message)
        verbose = args.verbose
        if verbose > 0:
            print("Verbose mode on")
        
        logFilename = args.log_name  # 'oncotator.log'

        # Create a basic logger to a file
        loggingFormat = '%(asctime)s %(levelname)s [%(name)s:%(lineno)d] %(message)s'
        logging.basicConfig(filename=logFilename, level=logging.INFO, format=loggingFormat)
        
        
        # Add a console logger to the root logger, which means that all loggers generated will have the console dump.  
        #    Output on the console will be the same as what is in the log file. 
        ch = logging.StreamHandler()
        ch.setLevel(logging.WARN)
        formatter = logging.Formatter(loggingFormat)
        ch.setFormatter(formatter)
        
        if verbose:
            ch.setLevel(logging.INFO)
            print("Path:")
            print(sys.path)
            print(" ")
        
        logging.getLogger('').addHandler(ch)
        
        logger = logging.getLogger(__name__)
        logger.info("Oncotator " + program_version)
        logger.info("Args: " + str(args))
        logger.info('Log file: ' + os.path.abspath(logFilename))
        
        if DEBUG:
            logger.setLevel(logging.DEBUG)
        
        # Initiate an Oncotator session.
        inputFilename = os.path.expanduser(args.input_file)
        outputFilename = os.path.expanduser(args.output_file)
        inputFormat = args.input_format.upper()
        outputFormat = args.output_format.upper()

        datasourceDir = os.path.expanduser(args.dbDir)
        cache_url = args.cache_url
        read_only_cache = args.read_only_cache
        tx_mode = args.tx_mode
        is_skip_no_alts = args.skip_no_alt
        genome_build = args.genome_build
        is_no_prepend = not args.prepend

        # Parse annotation overrides
        commandLineManualOverrides = args.override_cli
        overrideConfigFile = args.override_config
        if overrideConfigFile is not None and not os.path.exists(overrideConfigFile):
            logger.warn("Could not find " + overrideConfigFile + "   ... proceeding anyway.")
            overrideConfigFile = None
        manualOverrides = OncotatorCLIUtils.determineAllAnnotationValues(commandLineManualOverrides, overrideConfigFile)

        # Parse default overrides
        commandLineDefaultValues = args.default_cli
        defaultConfigFile = args.default_config
        if defaultConfigFile is not None and not os.path.exists(defaultConfigFile):
            if defaultConfigFile != DEFAULT_DEFAULT_ANNOTATIONS:
                logger.warn("Could not find " + defaultConfigFile + "   ... proceeding anyway.")
            else:
                logger.info("Could not find Broad-specific " + defaultConfigFile + "   ... proceeding without any default annotations.  __UNKNOWN__ may appear in TCGA MAF outputs.")
            defaultConfigFile = None
        defaultValues = OncotatorCLIUtils.determineAllAnnotationValues(commandLineDefaultValues, defaultConfigFile)

        # Create a run configuration to pass to the Annotator class.
        annotating_type = None
        if inputFormat == "SEG_FILE":
            annotating_type = RunSpecification.ANNOTATE_SEGMENTS
        runConfig = RunSpecificationFactory.create_run_spec(inputFormat, outputFormat, inputFilename, outputFilename,
                                                      globalAnnotations=manualOverrides, datasourceDir=datasourceDir,
                                                      isMulticore=(not args.noMulticore),
                                                      defaultAnnotations=defaultValues, cacheUrl=cache_url,
                                                      read_only_cache=read_only_cache, tx_mode=tx_mode,
                                                      is_skip_no_alts=is_skip_no_alts, genomeBuild=genome_build,
                                                      other_opts=determineOtherOptions(args), annotating_type=annotating_type)

        annotator = Annotator()
        annotator.initialize(runConfig)
        annotator.annotate()
        
        return 0
    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 0
예제 #8
0
def parseOptions(program_version_message):
    # Setup argument parser
    description = program_version_message + '''

    Oncotator is a tool for annotating human genomic point mutations and indels with data relevant to cancer researchers.

    '''
    epilog = '''
    Example usage
    -------------
    oncotator -v --input_format=MAFLITE --output_format=TCGAMAF myInputFile.maflite myOutputFile.maf.annotated hg19
    
    IMPORTANT NOTE:  hg19 is only supported genome build for now.

    Default values specified by -d or --default_annotation_values are used when an annotation does not exist or is populated with an empty string ("")

    Both default and override config files and command line specifications stack.

    Example of an override_config or default_config file:

    # Create center, source, sequencer, and score annotations, with the values broad.mit.edu, WXS, Illumina GAIIx, and <blank> for all mutations.
    #  This will overwrite all mutations.
    [manual_annotations]
    override:center=broad.mit.edu,source=WXS,sequencer=Illumina GAIIx,score=

    Example of cache urls:

    # Use a file (/home/user/myfile.cache) ... note the three forward slashes after "file:" for absolute path.
    -u file:///home/user/myfile.cache
    -u file://relative_file.cache

    # memcache
    -u memcache://localhost:11211

    Please note that only VCF input will populate the alt_allele_seen annotation.  All other inputs assume that the alternate is present if it appears at all.
        This feature is to allow users to include or exclude GT of 0/0 or ./. variants when converting VCFs to MAF.

        If --skip-no-alt is specified, VCF input processing will remove mutations with alt_allele_seen of False entirely (the mutations will not even seen when output format is SIMPLE_TSV).

    -----
    Copyright 2012 Broad Institute. All rights reserved.  Distributed on an "AS IS" basis without warranties or conditions of any kind, either express or implied.
    Oncotator is free for non-profit use.  See LICENSE for complete licensing information.
    '''
    parser = ArgumentParser(description=description, formatter_class=RawDescriptionHelpFormatter, epilog=epilog)
    parser.add_argument("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: 5]", default=5)
    parser.add_argument('-V', '--version', action='version', version=program_version_message)
    parser.add_argument('-i', '--input_format', type=str, default="MAFLITE", choices=OncotatorCLIUtils.getSupportedInputFormats(), help='Input format.  Note that MAFLITE will work for any tsv file with appropriate headers, so long as all of the required headers (or an alias -- see maflite.config) are present.  [default: %s]' % "MAFLITE")
    parser.add_argument('--db-dir', dest='dbDir', default=DEFAULT_DB_DIR,
                        help='Main annotation database directory. [default: %s]' % DEFAULT_DB_DIR)
    parser.add_argument('-o', '--output_format', type=str, default="TCGAMAF",choices=OncotatorCLIUtils.getSupportedOutputFormats(), help='Output format. [default: %s]' % "TCGAMAF")
    parser.add_argument('--override_config', type=str, 
                        help="File path to manual annotations in a config file format (section is 'manual_annotations' and annotation:value pairs).")
    parser.add_argument('--default_config', type=str,
                        help="File path to default annotation values in a config file format (section is 'manual_annotations' and annotation:value pairs).")
    parser.add_argument('--no-multicore', dest="noMulticore", action='store_true', default=False, help="Disables all multicore functionality.")
    parser.add_argument('input_file', type=str, help='Input file to be annotated.  Type is specified through options.')
    parser.add_argument('output_file', type=str, help='Output file name of annotated file.')
    parser.add_argument('genome_build', metavar='genome_build', type=str, help="Genome build.  For example: hg19", choices=["hg19"])
    parser.add_argument('-a', '--annotate-manual', dest="override_cli",type=str, action='append', default=[], help="Specify annotations to override.  Can be specified multiple times.  E.g. -a 'name1:value1' -a 'name2:value2' ")
    parser.add_argument('-d', '--annotate-default', dest="default_cli",type=str, action='append', default=[], help="Specify default values for annotations.  Can be specified multiple times.  E.g. -d 'name1:value1' -d 'name2:value2' ")
    parser.add_argument('-u', '--cache-url', dest="cache_url", type=str, default=None, help=" URL to use for cache.  See help for examples.")
    parser.add_argument('-r', '--read_only_cache', action='store_true', dest="read_only_cache", default=False, help="Makes the cache read-only")
    parser.add_argument('--tx-mode', dest="tx_mode", default=DEFAULT_TX_MODE, choices=TranscriptProvider.TX_MODE_CHOICES, help="Specify transcript mode for transcript providing datasources that support multiple modes.  [default: %s]" % DEFAULT_TX_MODE)
    parser.add_argument('--infer_genotypes', dest='infer_genotypes', default="false", choices=["yes", "true", "t", "1", "y", "no", "false", "f", "0", "n"],
                        help="Forces the VCF output renderer to populate the output genotypes as heterozygous.  This option should only be used when converting a MAFLITE to a VCF; otherwise, the option has no effect.  [default: %s]" % "false")
    parser.add_argument('--skip-no-alt', dest="skip_no_alt", action='store_true', help="If specified, any mutation with annotation alt_allele_seen of 'False' will not be annotated or rendered.  Do not use if output format is a VCF.  If alt_allele_seen annotation is missing, render the mutation.")
    parser.add_argument('--log_name', dest='log_name', default="oncotator.log", help="Specify log output location.  Default: oncotator.log")
    parser.add_argument('--prepend', dest="prepend", action='store_true', help="If specified for TCGAMAF output, will put a 'i_' in front of fields that are not directly rendered in Oncotator TCGA MAFs")

    # Process arguments
    args = parser.parse_args()
    
    return args
예제 #9
0
def main(argv=None):  # IGNORE:C0111
    """Command line options."""
    from oncotator.utils.OncotatorCLIUtils import OncotatorCLIUtils
    from oncotator.Annotator import Annotator

    if argv is None:
        argv = sys.argv
    else:
        sys.argv.extend(argv)

    program_version = "%s" % __version__
    program_build_date = str(__updated__)
    program_version_message = '%%(prog)s %s' % (program_version)
    program_shortdesc = program_version_message
    program_license = '''%s

    %s

  Copyright 2012 Broad Institute. All rights reserved.
  
  #TODO: License Here
  
  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied.

USAGE
''' % (program_shortdesc, str(__date__))

    try:
        args = parseOptions(program_license, program_version_message)
        verbose = args.verbose
        if verbose > 0:
            print("Verbose mode on")
        
        logFilename = args.log_name  # 'oncotator.log'

        # Create a basic logger to a file
        loggingFormat = '%(asctime)s %(levelname)s [%(name)s:%(lineno)d] %(message)s'
        logging.basicConfig(filename=logFilename, level=logging.INFO, format=loggingFormat)
        
        
        # Add a console logger to the root logger, which means that all loggers generated will have the console dump.  
        #    Output on the console will be the same as what is in the log file. 
        ch = logging.StreamHandler()
        ch.setLevel(logging.WARN)
        formatter = logging.Formatter(loggingFormat)
        ch.setFormatter(formatter)
        
        if verbose:
            ch.setLevel(logging.INFO)
            print("Path:")
            print(sys.path)
            print(" ")
        
        logging.getLogger('').addHandler(ch)
        
        logger = logging.getLogger(__name__)
        logger.info("Oncotator " + program_version)
        logger.info("Args: " + str(args))
        logger.info('Log file: ' + os.path.abspath(logFilename))
        
        if DEBUG:
            logger.setLevel(logging.DEBUG)
        
        # Initiate an Oncotator session.
        inputFilename = os.path.expanduser(args.input_file)
        outputFilename = os.path.expanduser(args.output_file)
        inputFormat = args.input_format.upper()
        outputFormat = args.output_format.upper()

        datasourceDir = os.path.expanduser(args.dbDir)
        cache_url = args.cache_url
        read_only_cache = args.read_only_cache
        tx_mode = args.tx_mode
        is_skip_no_alts = args.skip_no_alt
        genome_build = args.genome_build
        is_no_prepend = not args.prepend

        # Parse annotation overrides
        commandLineManualOverrides = args.override_cli
        overrideConfigFile = args.override_config
        if overrideConfigFile is not None and not os.path.exists(overrideConfigFile):
            logger.warn("Could not find " + overrideConfigFile + "   ... proceeding anyway.")
            overrideConfigFile = None
        manualOverrides = OncotatorCLIUtils.determineAllAnnotationValues(commandLineManualOverrides, overrideConfigFile)

        # Parse default overrides
        commandLineDefaultValues = args.default_cli
        defaultConfigFile = args.default_config
        if defaultConfigFile is not None and not os.path.exists(defaultConfigFile):
            if defaultConfigFile != DEFAULT_DEFAULT_ANNOTATIONS:
                logger.warn("Could not find " + defaultConfigFile + "   ... proceeding anyway.")
            else:
                logger.info("Could not find Broad-specific " + defaultConfigFile + "   ... proceeding without any default annotations.  __UNKNOWN__ may appear in TCGA MAF outputs.")
            defaultConfigFile = None
        defaultValues = OncotatorCLIUtils.determineAllAnnotationValues(commandLineDefaultValues, defaultConfigFile)

        if is_skip_no_alts and (outputFormat == "VCF"):
            logging.getLogger(__name__).warn("--skip-no-alt specified when output is a VCF.  This is likely to generate errors.")
        if is_skip_no_alts and (inputFormat != "VCF"):
            logging.getLogger(__name__).info("--skip-no-alt specified when input is not VCF.  skip-no-alt is not going to do anything.")
        if is_no_prepend and (outputFormat != "TCGAMAF"):
            logging.getLogger(__name__).info("no prepend specified when output is not TCGAMAF.  Ignoring and proceeding.")

        if outputFormat=="TCGAVCF":
            logging.getLogger(__name__).warning("TCGA VCF output is not supported and should be considered experimental when used outside of the Broad Institute.  Outside of the Broad Institute, use of -o VCF is more likely to be desired by users.")

        # Create a run configuration to pass to the Annotator class.
        runConfig = OncotatorCLIUtils.create_run_spec(inputFormat, outputFormat, inputFilename, outputFilename,
                                                      globalAnnotations=manualOverrides, datasourceDir=datasourceDir,
                                                      isMulticore=(not args.noMulticore),
                                                      defaultAnnotations=defaultValues, cacheUrl=cache_url,
                                                      read_only_cache=read_only_cache, tx_mode=tx_mode,
                                                      is_skip_no_alts=is_skip_no_alts, genomeBuild=genome_build,
                                                      other_opts=determineOtherOptions(args, logger))

        annotator = Annotator()
        annotator.initialize(runConfig)
        annotator.annotate()
        
        return 0
    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 0
예제 #10
0
    def create_run_spec(inputFormat,
                        outputFormat,
                        inputFilename,
                        outputFilename,
                        globalAnnotations=None,
                        datasourceDir=None,
                        genomeBuild="hg19",
                        isMulticore=False,
                        numCores=4,
                        defaultAnnotations=None,
                        cacheUrl=None,
                        read_only_cache=True,
                        tx_mode=TranscriptProvider.TX_MODE_CANONICAL,
                        is_skip_no_alts=False,
                        other_opts=None,
                        annotating_type=None):
        """ This is a very simple interface to start an Oncotator session.  As a warning, this interface may notbe supported in future versions.

        If datasourceDir is None, then the default location is used.  TODO: Define default location.

        IMPORTANT: Current implementation attempts to annotate using a default set of datasources.

        TODO: Make sure that this note above is no longer the case.  Current implementation attempts to annotate using a default set of datasources
        TODO: This method may get refactored into a separate class that handles RunConfigutaion objects.
        """
        # TODO: Use dependency injection for list of name value pairs?  Otherwise, set it up as an attribute on this class.
        # TODO: Use dependency injection to return instance of the input/output classes

        globalAnnotations = dict(
        ) if globalAnnotations is None else globalAnnotations
        defaultAnnotations = dict(
        ) if defaultAnnotations is None else defaultAnnotations
        other_opts = dict() if other_opts is None else other_opts

        other_opts[InputMutationCreatorOptions.IS_SKIP_ALTS] = is_skip_no_alts

        # Step 0 Validate given parameters and log messages.  If an error or critical is found, throw an exception.
        validation_messages = RunSpecificationFactory._validate_run_spec_parameters(
            inputFormat, outputFormat, inputFilename, outputFilename,
            globalAnnotations, datasourceDir, genomeBuild, isMulticore,
            numCores, defaultAnnotations, cacheUrl, read_only_cache, tx_mode,
            is_skip_no_alts, other_opts, annotating_type)
        for msg in validation_messages:
            logging.getLogger(__name__).log(msg.level, msg.message)
            if (msg.level == logging.ERROR) or (msg.level == logging.CRITICAL):
                raise RunSpecificationException(msg.message)

        # Step 1 Initialize input and output
        inputCreator = OncotatorCLIUtils.create_input_creator(
            inputFilename, inputFormat, genomeBuild, other_opts)
        outputRenderer = OncotatorCLIUtils.create_output_renderer(
            outputFilename, outputFormat, other_opts)

        # Step 2 Datasources
        if datasourceDir:
            datasource_list = DatasourceFactory.createDatasources(
                datasourceDir,
                genomeBuild,
                isMulticore=isMulticore,
                numCores=numCores,
                tx_mode=tx_mode)
        else:
            datasource_list = []

        #TODO: Refactoring needed here to specify tx-mode (or any option not in a config file) in a cleaner way.
        for ds in datasource_list:
            if isinstance(ds, TranscriptProvider):
                logging.getLogger(__name__).info(
                    "Setting %s %s to tx-mode of %s..." %
                    (ds.title, ds.version, tx_mode))
                ds.set_tx_mode(tx_mode)

                if other_opts.get(
                        OptionConstants.CUSTOM_CANONICAL_TX_LIST_FILE,
                        None) is not None:
                    cc_txs_fp = file(
                        other_opts[
                            OptionConstants.CUSTOM_CANONICAL_TX_LIST_FILE],
                        'r')
                    cc_txs = [tx.rsplit(".", 1)[0] for tx in cc_txs_fp]
                    cc_txs_fp.close()
                    ds.set_custom_canonical_txs(cc_txs)
                    logging.getLogger(__name__).info(
                        str(len(cc_txs)) +
                        " custom canonical transcripts specified.")
                else:
                    logging.getLogger(__name__).info(
                        "No custom canonical transcripts specified.")

        result = RunSpecification()
        result.initialize(inputCreator,
                          outputRenderer,
                          manualAnnotations=globalAnnotations,
                          datasources=datasource_list,
                          isMulticore=isMulticore,
                          numCores=numCores,
                          defaultAnnotations=defaultAnnotations,
                          cacheUrl=cacheUrl,
                          read_only_cache=read_only_cache,
                          is_skip_no_alts=is_skip_no_alts,
                          annotating_type=annotating_type)
        return result