def create_run_spec_given_datasources(input_format, output_format, input_filename, output_filename, global_annotations=None, datasource_list=None, genomeBuild="hg19", is_multicore=False, num_cores=4, default_annotations=None, cache_url=None, read_only_cache=True, tx_mode=TranscriptProvider.TX_MODE_CANONICAL, is_skip_no_alts=False, other_opts=None, annotating_type=None): """Same as create_run_spec, but a list of datasource instances can be used. Typically, this method is only called by automated tests.""" global_annotations = dict() if global_annotations is None else global_annotations default_annotations = dict() if default_annotations is None else default_annotations datasource_list = [] if datasource_list is None else datasource_list other_opts = dict() if other_opts is None else other_opts if input_format == "TCGAMAF" and not other_opts.get(OptionConstants.REANNOTATE_TCGA_MAF_COLS, False): other_opts[OptionConstants.REANNOTATE_TCGA_MAF_COLS] = True other_opts[InputMutationCreatorOptions.IS_SKIP_ALTS] = is_skip_no_alts # Step 0 Validate given parameters and log messages. If an error or critical is found, throw an exception. validation_messages = RunSpecificationFactory._validate_run_spec_parameters(input_format, output_format, input_filename, output_filename, global_annotations, datasource_list, genomeBuild, is_multicore, num_cores, default_annotations, cache_url, read_only_cache, tx_mode, is_skip_no_alts, other_opts, annotating_type) for msg in validation_messages: logging.getLogger(__name__).log(msg.level, msg.message) if (msg.level == logging.ERROR) or (msg.level == logging.CRITICAL): raise RunSpecificationException(msg.message) # Step 1 Initialize input and output is_allow_annotation_overwriting = other_opts.get(OptionConstants.ALLOW_ANNOTATION_OVERWRITING, False) mutation_data_factory = MutationDataFactory(is_allow_annotation_overwriting) inputCreator = OncotatorCLIUtils.create_input_creator(input_filename, input_format, mutation_data_factory, genomeBuild, other_opts) outputRenderer = OncotatorCLIUtils.create_output_renderer(output_filename, output_format, other_opts) result = RunSpecification() result.initialize(inputCreator, outputRenderer, manualAnnotations=global_annotations, datasources=datasource_list, isMulticore=is_multicore, numCores=num_cores, defaultAnnotations=default_annotations, cacheUrl=cache_url, read_only_cache=read_only_cache, is_skip_no_alts=is_skip_no_alts, annotating_type=annotating_type, is_allow_annotation_overwriting=is_allow_annotation_overwriting) return result
def _annotateTest(self, inputFilename, outputFilename, datasource_dir, inputFormat="MAFLITE", outputFormat="TCGAMAF", default_annotations=TCGA_MAF_DEFAULTS, override_annotations=None, is_skip_no_alts=False): self.logger.info("Initializing Annotator...") if override_annotations is None: override_annotations = dict() annotator = Annotator() runSpec = OncotatorCLIUtils.create_run_spec(inputFormat, outputFormat, inputFilename, outputFilename, defaultAnnotations=default_annotations, datasourceDir=datasource_dir, globalAnnotations=override_annotations, is_skip_no_alts=is_skip_no_alts) annotator.initialize(runSpec) self.logger.info("Annotation starting...") return annotator.annotate()
def create_run_spec( inputFormat, outputFormat, inputFilename, outputFilename, globalAnnotations=None, datasourceDir=None, genomeBuild="hg19", isMulticore=False, numCores=4, defaultAnnotations=None, cacheUrl=None, read_only_cache=True, tx_mode=TranscriptProvider.TX_MODE_CANONICAL, is_skip_no_alts=False, other_opts=None, annotating_type=None, ): """ This is a very simple interface to start an Oncotator session. As a warning, this interface may notbe supported in future versions. If datasourceDir is None, then the default location is used. TODO: Define default location. IMPORTANT: Current implementation attempts to annotate using a default set of datasources. TODO: Make sure that this note above is no longer the case. Current implementation attempts to annotate using a default set of datasources TODO: This method may get refactored into a separate class that handles RunConfigutaion objects. """ # TODO: Use dependency injection for list of name value pairs? Otherwise, set it up as an attribute on this class. # TODO: Use dependency injection to return instance of the input/output classes globalAnnotations = dict() if globalAnnotations is None else globalAnnotations defaultAnnotations = dict() if defaultAnnotations is None else defaultAnnotations other_opts = dict() if other_opts is None else other_opts other_opts[InputMutationCreatorOptions.IS_SKIP_ALTS] = is_skip_no_alts # Step 0 Validate given parameters and log messages. If an error or critical is found, throw an exception. validation_messages = RunSpecificationFactory._validate_run_spec_parameters( inputFormat, outputFormat, inputFilename, outputFilename, globalAnnotations, datasourceDir, genomeBuild, isMulticore, numCores, defaultAnnotations, cacheUrl, read_only_cache, tx_mode, is_skip_no_alts, other_opts, annotating_type, ) for msg in validation_messages: logging.getLogger(__name__).log(msg.level, msg.message) if (msg.level == logging.ERROR) or (msg.level == logging.CRITICAL): raise RunSpecificationException(msg.message) # Step 1 Initialize input and output inputCreator = OncotatorCLIUtils.create_input_creator(inputFilename, inputFormat, genomeBuild, other_opts) outputRenderer = OncotatorCLIUtils.create_output_renderer(outputFilename, outputFormat, other_opts) # Step 2 Datasources datasource_list = DatasourceFactory.createDatasources( datasourceDir, genomeBuild, isMulticore=isMulticore, numCores=numCores, tx_mode=tx_mode ) # TODO: Refactoring needed here to specify tx-mode (or any option not in a config file) in a cleaner way. for ds in datasource_list: if isinstance(ds, TranscriptProvider): logging.getLogger(__name__).info("Setting %s %s to tx-mode of %s..." % (ds.title, ds.version, tx_mode)) ds.set_tx_mode(tx_mode) result = RunSpecification() result.initialize( inputCreator, outputRenderer, manualAnnotations=globalAnnotations, datasources=datasource_list, isMulticore=isMulticore, numCores=numCores, defaultAnnotations=defaultAnnotations, cacheUrl=cacheUrl, read_only_cache=read_only_cache, is_skip_no_alts=is_skip_no_alts, annotating_type=annotating_type, ) return result
def main(argv=None): # IGNORE:C0111 """Command line options.""" from oncotator.utils.OncotatorCLIUtils import OncotatorCLIUtils from oncotator.Annotator import Annotator if argv is None: argv = sys.argv else: sys.argv.extend(argv) program_version = "%s" % __version__ program_version_message = '%%(prog)s %s' % program_version try: args = parseOptions(program_version_message) verbose = args.verbose if verbose > 0: print("Verbose mode on") logFilename = args.log_name # 'oncotator.log' # Create a basic logger to a file loggingFormat = '%(asctime)s %(levelname)s [%(name)s:%(lineno)d] %(message)s' logging.basicConfig(filename=logFilename, level=logging.INFO, format=loggingFormat) # Add a console logger to the root logger, which means that all loggers generated will have the console dump. # Output on the console will be the same as what is in the log file. ch = logging.StreamHandler() ch.setLevel(logging.WARN) formatter = logging.Formatter(loggingFormat) ch.setFormatter(formatter) if verbose: ch.setLevel(logging.INFO) print("Path:") print(sys.path) print(" ") logging.getLogger('').addHandler(ch) logger = logging.getLogger(__name__) logger.info("Oncotator " + program_version) logger.info("Args: " + str(args)) logger.info('Log file: ' + os.path.abspath(logFilename)) if DEBUG: logger.setLevel(logging.DEBUG) if not NGSLIB_INSTALLED: logger.warn( "ngslib module not installed. Will be unable to annotate with BigWig datasources." ) # Initiate an Oncotator session. inputFilename = os.path.expanduser(args.input_file) outputFilename = os.path.expanduser(args.output_file) inputFormat = args.input_format.upper() outputFormat = args.output_format.upper() datasourceDir = os.path.expanduser(args.dbDir) cache_url = args.cache_url read_only_cache = args.read_only_cache tx_mode = args.tx_mode is_skip_no_alts = args.skip_no_alt genome_build = args.genome_build is_no_prepend = not args.prepend # Parse annotation overrides commandLineManualOverrides = args.override_cli overrideConfigFile = args.override_config if overrideConfigFile is not None and not os.path.exists( overrideConfigFile): logger.warn("Could not find " + overrideConfigFile + " ... proceeding anyway.") overrideConfigFile = None manualOverrides = OncotatorCLIUtils.determineAllAnnotationValues( commandLineManualOverrides, overrideConfigFile) # Parse default overrides commandLineDefaultValues = args.default_cli defaultConfigFile = args.default_config if defaultConfigFile is not None and not os.path.exists( defaultConfigFile): if defaultConfigFile != DEFAULT_DEFAULT_ANNOTATIONS: logger.warn("Could not find " + defaultConfigFile + " ... proceeding anyway.") else: logger.info( "Could not find Broad-specific " + defaultConfigFile + " ... proceeding without any default annotations. __UNKNOWN__ may appear in TCGA MAF outputs." ) defaultConfigFile = None defaultValues = OncotatorCLIUtils.determineAllAnnotationValues( commandLineDefaultValues, defaultConfigFile) # Create a run configuration to pass to the Annotator class. annotating_type = None if inputFormat == "SEG_FILE": annotating_type = RunSpecification.ANNOTATE_SEGMENTS runConfig = RunSpecificationFactory.create_run_spec( inputFormat, outputFormat, inputFilename, outputFilename, globalAnnotations=manualOverrides, datasourceDir=datasourceDir, isMulticore=(not args.noMulticore), defaultAnnotations=defaultValues, cacheUrl=cache_url, read_only_cache=read_only_cache, tx_mode=tx_mode, is_skip_no_alts=is_skip_no_alts, genomeBuild=genome_build, other_opts=determineOtherOptions(args), annotating_type=annotating_type) annotator = Annotator() annotator.initialize(runConfig) annotator.annotate() return 0 except KeyboardInterrupt: ### handle keyboard interrupt ### return 0
def parseOptions(program_version_message): # Setup argument parser description = program_version_message + ''' Oncotator is a tool for annotating human genomic point mutations and indels with data relevant to cancer researchers. ''' epilog = ''' Example usage ------------- oncotator -v --input_format=MAFLITE --output_format=TCGAMAF myInputFile.maflite myOutputFile.maf.annotated hg19 IMPORTANT NOTE: hg19 is only supported genome build for now. Default values specified by -d or --default_annotation_values are used when an annotation does not exist or is populated with an empty string ("") Both default and override config files and command line specifications stack. Example of an override_config or default_config file: # Create center, source, sequencer, and score annotations, with the values broad.mit.edu, WXS, Illumina GAIIx, and <blank> for all mutations. # This will overwrite all mutations. [manual_annotations] override:center=broad.mit.edu,source=WXS,sequencer=Illumina GAIIx,score= Example of cache urls: # Use a file (/home/user/myfile.cache) ... note the three forward slashes after "file:" for absolute path. -u file:///home/user/myfile.cache -u file://relative_file.cache # memcache -u memcache://localhost:11211 Please note that only VCF input will populate the alt_allele_seen annotation. All other inputs assume that the alternate is present if it appears at all. This feature is to allow users to include or exclude GT of 0/0 or ./. variants when converting VCFs to MAF. If --skip-no-alt is specified, VCF input processing will remove mutations with alt_allele_seen of False entirely (the mutations will not even seen when output format is SIMPLE_TSV). ----- Copyright 2012 Broad Institute. All rights reserved. Distributed on an "AS IS" basis without warranties or conditions of any kind, either express or implied. Oncotator is free for non-profit use. See LICENSE for complete licensing information. ''' parser = ArgumentParser(description=description, formatter_class=RawDescriptionHelpFormatter, epilog=epilog) parser.add_argument("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: 5]", default=5) parser.add_argument('-V', '--version', action='version', version=program_version_message) parser.add_argument( '-i', '--input_format', type=str, default="MAFLITE", choices=OncotatorCLIUtils.getSupportedInputFormats(), help= 'Input format. Note that MAFLITE will work for any tsv file with appropriate headers, so long as all of the required headers (or an alias -- see maflite.config) are present. [default: %s]' % "MAFLITE") parser.add_argument( '--db-dir', dest='dbDir', default=DEFAULT_DB_DIR, help='Main annotation database directory. [default: %s]' % DEFAULT_DB_DIR) parser.add_argument('-o', '--output_format', type=str, default="TCGAMAF", choices=OncotatorCLIUtils.getSupportedOutputFormats(), help='Output format. [default: %s]' % "TCGAMAF") parser.add_argument( '--override_config', type=str, help= "File path to manual annotations in a config file format (section is 'manual_annotations' and annotation:value pairs)." ) parser.add_argument( '--default_config', type=str, help= "File path to default annotation values in a config file format (section is 'manual_annotations' and annotation:value pairs)." ) parser.add_argument('--no-multicore', dest="noMulticore", action='store_true', default=False, help="Disables all multicore functionality.") parser.add_argument( 'input_file', type=str, help='Input file to be annotated. Type is specified through options.') parser.add_argument('output_file', type=str, help='Output file name of annotated file.') parser.add_argument('genome_build', metavar='genome_build', type=str, help="Genome build. For example: hg19", choices=["hg19"]) parser.add_argument( '-a', '--annotate-manual', dest="override_cli", type=str, action='append', default=[], help= "Specify annotations to override. Can be specified multiple times. E.g. -a 'name1:value1' -a 'name2:value2' " ) parser.add_argument( '-d', '--annotate-default', dest="default_cli", type=str, action='append', default=[], help= "Specify default values for annotations. Can be specified multiple times. E.g. -d 'name1:value1' -d 'name2:value2' " ) parser.add_argument('-u', '--cache-url', dest="cache_url", type=str, default=None, help=" URL to use for cache. See help for examples.") parser.add_argument('-r', '--read_only_cache', action='store_true', dest="read_only_cache", default=False, help="Makes the cache read-only") parser.add_argument( '--tx-mode', dest="tx_mode", default=DEFAULT_TX_MODE, choices=TranscriptProvider.TX_MODE_CHOICES, help= "Specify transcript mode for transcript providing datasources that support multiple modes. [default: %s]" % DEFAULT_TX_MODE) parser.add_argument( '--infer_genotypes', dest='infer_genotypes', default="false", choices=["yes", "true", "t", "1", "y", "no", "false", "f", "0", "n"], help= "Forces the VCF output renderer to populate the output genotypes as heterozygous. This option should only be used when converting a MAFLITE to a VCF; otherwise, the option has no effect. [default: %s]" % "false") parser.add_argument( '--skip-no-alt', dest="skip_no_alt", action='store_true', help= "If specified, any mutation with annotation alt_allele_seen of 'False' will not be annotated or rendered. Do not use if output format is a VCF. If alt_allele_seen annotation is missing, render the mutation." ) parser.add_argument( '--log_name', dest='log_name', default="oncotator.log", help="Specify log output location. Default: oncotator.log") parser.add_argument( '--prepend', dest="prepend", action='store_true', help= "If specified for TCGAMAF output, will put a 'i_' in front of fields that are not directly rendered in Oncotator TCGA MAFs" ) parser.add_argument( '--infer-onps', dest="infer_onps", action='store_true', help= "Will merge adjacent SNPs,DNPs,TNPs,etc if they are in the same sample. This assumes that the input file is position sorted. This may cause problems with VCF -> VCF conversion, and does not guarantee input order is maintained." ) parser.add_argument( '-c', '--canonical-tx-file', dest="canonical_tx_file", type=str, help= "Simple text file with list of transcript IDs (one per line) to always select where possible for variants. Transcript IDs must match the ones used by the transcript provider in your datasource (e.g. gencode ENST00000123456). If more than one transcript can be selected for a variant, uses the method defined by --tx-mode to break ties. Using this list means that a transcript will be selected from this list first, possibly superseding a best-effect. Note that transcript version number is not considered, whether included in the list or not." ) # Process arguments args = parser.parse_args() return args
def main(argv=None): # IGNORE:C0111 """Command line options.""" from oncotator.utils.OncotatorCLIUtils import OncotatorCLIUtils from oncotator.Annotator import Annotator if argv is None: argv = sys.argv else: sys.argv.extend(argv) program_version = "%s" % __version__ program_version_message = '%%(prog)s %s' % program_version try: args = parseOptions(program_version_message) verbose = args.verbose if verbose > 0: print("Verbose mode on") logFilename = args.log_name # 'oncotator.log' # Create a basic logger to a file loggingFormat = '%(asctime)s %(levelname)s [%(name)s:%(lineno)d] %(message)s' logging.basicConfig(filename=logFilename, level=logging.INFO, format=loggingFormat) # Add a console logger to the root logger, which means that all loggers generated will have the console dump. # Output on the console will be the same as what is in the log file. ch = logging.StreamHandler() ch.setLevel(logging.WARN) formatter = logging.Formatter(loggingFormat) ch.setFormatter(formatter) if verbose: ch.setLevel(logging.INFO) print("Path:") print(sys.path) print(" ") logging.getLogger('').addHandler(ch) logger = logging.getLogger(__name__) logger.info("Oncotator " + program_version) logger.info("Args: " + str(args)) logger.info('Log file: ' + os.path.abspath(logFilename)) if DEBUG: logger.setLevel(logging.DEBUG) # Initiate an Oncotator session. inputFilename = os.path.expanduser(args.input_file) outputFilename = os.path.expanduser(args.output_file) inputFormat = args.input_format.upper() outputFormat = args.output_format.upper() datasourceDir = os.path.expanduser(args.dbDir) cache_url = args.cache_url read_only_cache = args.read_only_cache tx_mode = args.tx_mode is_skip_no_alts = args.skip_no_alt genome_build = args.genome_build is_no_prepend = not args.prepend # Parse annotation overrides commandLineManualOverrides = args.override_cli overrideConfigFile = args.override_config if overrideConfigFile is not None and not os.path.exists(overrideConfigFile): logger.warn("Could not find " + overrideConfigFile + " ... proceeding anyway.") overrideConfigFile = None manualOverrides = OncotatorCLIUtils.determineAllAnnotationValues(commandLineManualOverrides, overrideConfigFile) # Parse default overrides commandLineDefaultValues = args.default_cli defaultConfigFile = args.default_config if defaultConfigFile is not None and not os.path.exists(defaultConfigFile): if defaultConfigFile != DEFAULT_DEFAULT_ANNOTATIONS: logger.warn("Could not find " + defaultConfigFile + " ... proceeding anyway.") else: logger.info("Could not find Broad-specific " + defaultConfigFile + " ... proceeding without any default annotations. __UNKNOWN__ may appear in TCGA MAF outputs.") defaultConfigFile = None defaultValues = OncotatorCLIUtils.determineAllAnnotationValues(commandLineDefaultValues, defaultConfigFile) # Create a run configuration to pass to the Annotator class. annotating_type = None if inputFormat == "SEG_FILE": annotating_type = RunSpecification.ANNOTATE_SEGMENTS runConfig = RunSpecificationFactory.create_run_spec(inputFormat, outputFormat, inputFilename, outputFilename, globalAnnotations=manualOverrides, datasourceDir=datasourceDir, isMulticore=(not args.noMulticore), defaultAnnotations=defaultValues, cacheUrl=cache_url, read_only_cache=read_only_cache, tx_mode=tx_mode, is_skip_no_alts=is_skip_no_alts, genomeBuild=genome_build, other_opts=determineOtherOptions(args), annotating_type=annotating_type) annotator = Annotator() annotator.initialize(runConfig) annotator.annotate() return 0 except KeyboardInterrupt: ### handle keyboard interrupt ### return 0
def parseOptions(program_version_message): # Setup argument parser description = program_version_message + ''' Oncotator is a tool for annotating human genomic point mutations and indels with data relevant to cancer researchers. ''' epilog = ''' Example usage ------------- oncotator -v --input_format=MAFLITE --output_format=TCGAMAF myInputFile.maflite myOutputFile.maf.annotated hg19 IMPORTANT NOTE: hg19 is only supported genome build for now. Default values specified by -d or --default_annotation_values are used when an annotation does not exist or is populated with an empty string ("") Both default and override config files and command line specifications stack. Example of an override_config or default_config file: # Create center, source, sequencer, and score annotations, with the values broad.mit.edu, WXS, Illumina GAIIx, and <blank> for all mutations. # This will overwrite all mutations. [manual_annotations] override:center=broad.mit.edu,source=WXS,sequencer=Illumina GAIIx,score= Example of cache urls: # Use a file (/home/user/myfile.cache) ... note the three forward slashes after "file:" for absolute path. -u file:///home/user/myfile.cache -u file://relative_file.cache # memcache -u memcache://localhost:11211 Please note that only VCF input will populate the alt_allele_seen annotation. All other inputs assume that the alternate is present if it appears at all. This feature is to allow users to include or exclude GT of 0/0 or ./. variants when converting VCFs to MAF. If --skip-no-alt is specified, VCF input processing will remove mutations with alt_allele_seen of False entirely (the mutations will not even seen when output format is SIMPLE_TSV). ----- Copyright 2012 Broad Institute. All rights reserved. Distributed on an "AS IS" basis without warranties or conditions of any kind, either express or implied. Oncotator is free for non-profit use. See LICENSE for complete licensing information. ''' parser = ArgumentParser(description=description, formatter_class=RawDescriptionHelpFormatter, epilog=epilog) parser.add_argument("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: 5]", default=5) parser.add_argument('-V', '--version', action='version', version=program_version_message) parser.add_argument('-i', '--input_format', type=str, default="MAFLITE", choices=OncotatorCLIUtils.getSupportedInputFormats(), help='Input format. Note that MAFLITE will work for any tsv file with appropriate headers, so long as all of the required headers (or an alias -- see maflite.config) are present. [default: %s]' % "MAFLITE") parser.add_argument('--db-dir', dest='dbDir', default=DEFAULT_DB_DIR, help='Main annotation database directory. [default: %s]' % DEFAULT_DB_DIR) parser.add_argument('-o', '--output_format', type=str, default="TCGAMAF",choices=OncotatorCLIUtils.getSupportedOutputFormats(), help='Output format. [default: %s]' % "TCGAMAF") parser.add_argument('--override_config', type=str, help="File path to manual annotations in a config file format (section is 'manual_annotations' and annotation:value pairs).") parser.add_argument('--default_config', type=str, help="File path to default annotation values in a config file format (section is 'manual_annotations' and annotation:value pairs).") parser.add_argument('--no-multicore', dest="noMulticore", action='store_true', default=False, help="Disables all multicore functionality.") parser.add_argument('input_file', type=str, help='Input file to be annotated. Type is specified through options.') parser.add_argument('output_file', type=str, help='Output file name of annotated file.') parser.add_argument('genome_build', metavar='genome_build', type=str, help="Genome build. For example: hg19", choices=["hg19"]) parser.add_argument('-a', '--annotate-manual', dest="override_cli",type=str, action='append', default=[], help="Specify annotations to override. Can be specified multiple times. E.g. -a 'name1:value1' -a 'name2:value2' ") parser.add_argument('-d', '--annotate-default', dest="default_cli",type=str, action='append', default=[], help="Specify default values for annotations. Can be specified multiple times. E.g. -d 'name1:value1' -d 'name2:value2' ") parser.add_argument('-u', '--cache-url', dest="cache_url", type=str, default=None, help=" URL to use for cache. See help for examples.") parser.add_argument('-r', '--read_only_cache', action='store_true', dest="read_only_cache", default=False, help="Makes the cache read-only") parser.add_argument('--tx-mode', dest="tx_mode", default=DEFAULT_TX_MODE, choices=TranscriptProvider.TX_MODE_CHOICES, help="Specify transcript mode for transcript providing datasources that support multiple modes. [default: %s]" % DEFAULT_TX_MODE) parser.add_argument('--infer_genotypes', dest='infer_genotypes', default="false", choices=["yes", "true", "t", "1", "y", "no", "false", "f", "0", "n"], help="Forces the VCF output renderer to populate the output genotypes as heterozygous. This option should only be used when converting a MAFLITE to a VCF; otherwise, the option has no effect. [default: %s]" % "false") parser.add_argument('--skip-no-alt', dest="skip_no_alt", action='store_true', help="If specified, any mutation with annotation alt_allele_seen of 'False' will not be annotated or rendered. Do not use if output format is a VCF. If alt_allele_seen annotation is missing, render the mutation.") parser.add_argument('--log_name', dest='log_name', default="oncotator.log", help="Specify log output location. Default: oncotator.log") parser.add_argument('--prepend', dest="prepend", action='store_true', help="If specified for TCGAMAF output, will put a 'i_' in front of fields that are not directly rendered in Oncotator TCGA MAFs") # Process arguments args = parser.parse_args() return args
def main(argv=None): # IGNORE:C0111 """Command line options.""" from oncotator.utils.OncotatorCLIUtils import OncotatorCLIUtils from oncotator.Annotator import Annotator if argv is None: argv = sys.argv else: sys.argv.extend(argv) program_version = "%s" % __version__ program_build_date = str(__updated__) program_version_message = '%%(prog)s %s' % (program_version) program_shortdesc = program_version_message program_license = '''%s %s Copyright 2012 Broad Institute. All rights reserved. #TODO: License Here Distributed on an "AS IS" basis without warranties or conditions of any kind, either express or implied. USAGE ''' % (program_shortdesc, str(__date__)) try: args = parseOptions(program_license, program_version_message) verbose = args.verbose if verbose > 0: print("Verbose mode on") logFilename = args.log_name # 'oncotator.log' # Create a basic logger to a file loggingFormat = '%(asctime)s %(levelname)s [%(name)s:%(lineno)d] %(message)s' logging.basicConfig(filename=logFilename, level=logging.INFO, format=loggingFormat) # Add a console logger to the root logger, which means that all loggers generated will have the console dump. # Output on the console will be the same as what is in the log file. ch = logging.StreamHandler() ch.setLevel(logging.WARN) formatter = logging.Formatter(loggingFormat) ch.setFormatter(formatter) if verbose: ch.setLevel(logging.INFO) print("Path:") print(sys.path) print(" ") logging.getLogger('').addHandler(ch) logger = logging.getLogger(__name__) logger.info("Oncotator " + program_version) logger.info("Args: " + str(args)) logger.info('Log file: ' + os.path.abspath(logFilename)) if DEBUG: logger.setLevel(logging.DEBUG) # Initiate an Oncotator session. inputFilename = os.path.expanduser(args.input_file) outputFilename = os.path.expanduser(args.output_file) inputFormat = args.input_format.upper() outputFormat = args.output_format.upper() datasourceDir = os.path.expanduser(args.dbDir) cache_url = args.cache_url read_only_cache = args.read_only_cache tx_mode = args.tx_mode is_skip_no_alts = args.skip_no_alt genome_build = args.genome_build is_no_prepend = not args.prepend # Parse annotation overrides commandLineManualOverrides = args.override_cli overrideConfigFile = args.override_config if overrideConfigFile is not None and not os.path.exists(overrideConfigFile): logger.warn("Could not find " + overrideConfigFile + " ... proceeding anyway.") overrideConfigFile = None manualOverrides = OncotatorCLIUtils.determineAllAnnotationValues(commandLineManualOverrides, overrideConfigFile) # Parse default overrides commandLineDefaultValues = args.default_cli defaultConfigFile = args.default_config if defaultConfigFile is not None and not os.path.exists(defaultConfigFile): if defaultConfigFile != DEFAULT_DEFAULT_ANNOTATIONS: logger.warn("Could not find " + defaultConfigFile + " ... proceeding anyway.") else: logger.info("Could not find Broad-specific " + defaultConfigFile + " ... proceeding without any default annotations. __UNKNOWN__ may appear in TCGA MAF outputs.") defaultConfigFile = None defaultValues = OncotatorCLIUtils.determineAllAnnotationValues(commandLineDefaultValues, defaultConfigFile) if is_skip_no_alts and (outputFormat == "VCF"): logging.getLogger(__name__).warn("--skip-no-alt specified when output is a VCF. This is likely to generate errors.") if is_skip_no_alts and (inputFormat != "VCF"): logging.getLogger(__name__).info("--skip-no-alt specified when input is not VCF. skip-no-alt is not going to do anything.") if is_no_prepend and (outputFormat != "TCGAMAF"): logging.getLogger(__name__).info("no prepend specified when output is not TCGAMAF. Ignoring and proceeding.") if outputFormat=="TCGAVCF": logging.getLogger(__name__).warning("TCGA VCF output is not supported and should be considered experimental when used outside of the Broad Institute. Outside of the Broad Institute, use of -o VCF is more likely to be desired by users.") # Create a run configuration to pass to the Annotator class. runConfig = OncotatorCLIUtils.create_run_spec(inputFormat, outputFormat, inputFilename, outputFilename, globalAnnotations=manualOverrides, datasourceDir=datasourceDir, isMulticore=(not args.noMulticore), defaultAnnotations=defaultValues, cacheUrl=cache_url, read_only_cache=read_only_cache, tx_mode=tx_mode, is_skip_no_alts=is_skip_no_alts, genomeBuild=genome_build, other_opts=determineOtherOptions(args, logger)) annotator = Annotator() annotator.initialize(runConfig) annotator.annotate() return 0 except KeyboardInterrupt: ### handle keyboard interrupt ### return 0
def create_run_spec(inputFormat, outputFormat, inputFilename, outputFilename, globalAnnotations=None, datasourceDir=None, genomeBuild="hg19", isMulticore=False, numCores=4, defaultAnnotations=None, cacheUrl=None, read_only_cache=True, tx_mode=TranscriptProvider.TX_MODE_CANONICAL, is_skip_no_alts=False, other_opts=None, annotating_type=None): """ This is a very simple interface to start an Oncotator session. As a warning, this interface may notbe supported in future versions. If datasourceDir is None, then the default location is used. TODO: Define default location. IMPORTANT: Current implementation attempts to annotate using a default set of datasources. TODO: Make sure that this note above is no longer the case. Current implementation attempts to annotate using a default set of datasources TODO: This method may get refactored into a separate class that handles RunConfigutaion objects. """ # TODO: Use dependency injection for list of name value pairs? Otherwise, set it up as an attribute on this class. # TODO: Use dependency injection to return instance of the input/output classes globalAnnotations = dict( ) if globalAnnotations is None else globalAnnotations defaultAnnotations = dict( ) if defaultAnnotations is None else defaultAnnotations other_opts = dict() if other_opts is None else other_opts other_opts[InputMutationCreatorOptions.IS_SKIP_ALTS] = is_skip_no_alts # Step 0 Validate given parameters and log messages. If an error or critical is found, throw an exception. validation_messages = RunSpecificationFactory._validate_run_spec_parameters( inputFormat, outputFormat, inputFilename, outputFilename, globalAnnotations, datasourceDir, genomeBuild, isMulticore, numCores, defaultAnnotations, cacheUrl, read_only_cache, tx_mode, is_skip_no_alts, other_opts, annotating_type) for msg in validation_messages: logging.getLogger(__name__).log(msg.level, msg.message) if (msg.level == logging.ERROR) or (msg.level == logging.CRITICAL): raise RunSpecificationException(msg.message) # Step 1 Initialize input and output inputCreator = OncotatorCLIUtils.create_input_creator( inputFilename, inputFormat, genomeBuild, other_opts) outputRenderer = OncotatorCLIUtils.create_output_renderer( outputFilename, outputFormat, other_opts) # Step 2 Datasources if datasourceDir: datasource_list = DatasourceFactory.createDatasources( datasourceDir, genomeBuild, isMulticore=isMulticore, numCores=numCores, tx_mode=tx_mode) else: datasource_list = [] #TODO: Refactoring needed here to specify tx-mode (or any option not in a config file) in a cleaner way. for ds in datasource_list: if isinstance(ds, TranscriptProvider): logging.getLogger(__name__).info( "Setting %s %s to tx-mode of %s..." % (ds.title, ds.version, tx_mode)) ds.set_tx_mode(tx_mode) if other_opts.get( OptionConstants.CUSTOM_CANONICAL_TX_LIST_FILE, None) is not None: cc_txs_fp = file( other_opts[ OptionConstants.CUSTOM_CANONICAL_TX_LIST_FILE], 'r') cc_txs = [tx.rsplit(".", 1)[0] for tx in cc_txs_fp] cc_txs_fp.close() ds.set_custom_canonical_txs(cc_txs) logging.getLogger(__name__).info( str(len(cc_txs)) + " custom canonical transcripts specified.") else: logging.getLogger(__name__).info( "No custom canonical transcripts specified.") result = RunSpecification() result.initialize(inputCreator, outputRenderer, manualAnnotations=globalAnnotations, datasources=datasource_list, isMulticore=isMulticore, numCores=numCores, defaultAnnotations=defaultAnnotations, cacheUrl=cacheUrl, read_only_cache=read_only_cache, is_skip_no_alts=is_skip_no_alts, annotating_type=annotating_type) return result