def testInitializingDatasources(self): """ Test initializing a database dir, both single and multicore. This test is RAM intensive and requires default data corpus.""" multiDS = DatasourceFactory.createDatasources(self.config.get( "DEFAULT", "dbDir"), "hg19", isMulticore=True) self.assertTrue(multiDS is not None, "Datasource list was None") self.assertTrue(len(multiDS) != 0, "Datasource list was empty") for i in range(0, len(multiDS)): self.assertTrue(multiDS[i] is not None, "multi core datasource was None: " + str(i)) self.assertTrue(isinstance(multiDS[i], Datasource)) # This test can be memory intensive, so get rid of the multiDS, but record how many datasources were created. numMultiDS = len(multiDS) del multiDS singleCoreDS = DatasourceFactory.createDatasources(self.config.get( "DEFAULT", "dbDir"), "hg19", isMulticore=False) self.assertTrue(singleCoreDS is not None, "Datasource list was None") self.assertTrue(len(singleCoreDS) != 0, "Datasource list was empty") for i in range(0, len(singleCoreDS)): self.assertTrue(singleCoreDS[i] is not None, "single core datasource was None: " + str(i)) self.assertTrue(isinstance(singleCoreDS[i], Datasource)) self.assertTrue( numMultiDS == len(singleCoreDS), "Length of single core datasource list was not the same as multicore" ) del singleCoreDS
def testMulticoreNoDatasources(self): """ If using multicore, does not hang when no datasources are in the db dir""" multiDS = DatasourceFactory.createDatasources('testdata/maflite/', "hg19", True) self.assertTrue( len(multiDS) == 0, "Length of multiDS when there were no datasources was not zero.")
def testAnnotateListOfMutations(self): """Test that we can initialize an Annotator, without an input or output and then feed mutations, one at a time... using a runspec""" # Locate the datasource directory and create a runspec dbDir = self.config.get("DEFAULT", "dbDir") ds = DatasourceFactory.createDatasources(dbDir) runSpec = RunSpecification() runSpec.initialize(None, None, datasources=ds) # Initialize the annotator with the runspec annotator = Annotator() annotator.initialize(runSpec) m = MutationData() m.chr = "1" m.start = "12941796" m.end = "12941796" m.alt_allele = "G" m.ref_allele = "T" muts = [m] muts = annotator.annotate_mutations(muts) m2 = muts.next() self.assertTrue(m2.get("gene", None) is not None)
def testAnnotateListOfMutations(self): """Test that we can initialize an Annotator, without an input or output and then feed mutations, one at a time... using a runspec""" # Locate the datasource directory and create a runspec dbDir = self.config.get("DEFAULT", "dbDir") ds = DatasourceFactory.createDatasources(dbDir) runSpec = RunSpecification() runSpec.initialize(None, None, datasources=ds) # Initialize the annotator with the runspec annotator = Annotator() annotator.initialize(runSpec) m = MutationData() m.chr = "1" m.start = "12941796" m.end = "12941796" m.alt_allele = "G" m.ref_allele = "T" muts = [m] muts = annotator.annotate_mutations(muts) m2 = muts.next() self.assertTrue(m2.get("gene", None) is not None)
def test_simple_transcript_annotation(self): """Test web api backend call /transcript/ """ # http://www.broadinstitute.org/oncotator/transcript/ENST00000215832.6/ datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False) annotator = Annotator() for ds in datasource_list: annotator.addDatasource(ds) tx = annotator.retrieve_transcript_by_id("ENST00000215832.6") self.assertTrue(tx is not None) self.assertTrue(tx.get_gene() == "MAPK1")
def test_querying_transcripts_by_genes(self): """Test that we can get all of the transcripts for a given set of genes. """ datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False) annotator = Annotator() for ds in datasource_list: annotator.addDatasource(ds) # Step 1 get all of the relevant transcripts txs = annotator.retrieve_transcripts_by_genes(["MAPK1", "PIK3CA"]) self.assertTrue(len(txs) > 3)
def test_simple_transcript_annotation(self): """Test web api backend call /transcript/ """ # http://www.broadinstitute.org/oncotator/transcript/ENST00000215832.6/ datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False) annotator = Annotator() for ds in datasource_list: annotator.addDatasource(ds) tx = annotator.retrieve_transcript_by_id("ENST00000215832.6") self.assertTrue(tx is not None) self.assertTrue(tx.get_gene() == "MAPK1")
def test_querying_transcripts_by_genes(self): """Test that we can get all of the transcripts for a given set of genes. """ datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False) annotator = Annotator() for ds in datasource_list: annotator.addDatasource(ds) # Step 1 get all of the relevant transcripts txs = annotator.retrieve_transcripts_by_genes(["MAPK1", "PIK3CA"]) self.assertTrue(len(txs) > 3)
def test_simple_genes_by_gene_annotation(self): """Test web api backend call /gene/ """ # http://www.broadinstitute.org/oncotator/gene/MAPK1/ datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False) annotator = Annotator() for ds in datasource_list: annotator.addDatasource(ds) txs = annotator.retrieve_transcripts_by_genes(["MAPK1"]) self.assertTranscriptsFound(txs) mut_dict = annotator.annotate_genes_given_txs(txs) self.assertTrue(len(mut_dict.keys()) == 1)
def test_simple_genes_by_gene_annotation(self): """Test web api backend call /gene/ """ # http://www.broadinstitute.org/oncotator/gene/MAPK1/ datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False) annotator = Annotator() for ds in datasource_list: annotator.addDatasource(ds) txs = annotator.retrieve_transcripts_by_genes(["MAPK1"]) self.assertTranscriptsFound(txs) mut_dict = annotator.annotate_genes_given_txs(txs) self.assertTrue(len(mut_dict.keys()) == 1)
def testInitializingDatasources(self): """ Test initializing a database dir, both single and multicore. This test is RAM intensive and requires default data corpus.""" multiDS = DatasourceFactory.createDatasources(self.config.get("DEFAULT", "dbDir"), "hg19", isMulticore=True) self.assertTrue(multiDS is not None, "Datasource list was None") self.assertTrue(len(multiDS) != 0, "Datasource list was empty") for i in range(0,len(multiDS)): self.assertTrue(multiDS[i] is not None, "multi core datasource was None: " + str(i)) self.assertTrue(isinstance(multiDS[i],Datasource)) # This test can be memory intensive, so get rid of the multiDS, but record how many datasources were created. numMultiDS = len(multiDS) del multiDS singleCoreDS = DatasourceFactory.createDatasources(self.config.get("DEFAULT", "dbDir"), "hg19", isMulticore=False) self.assertTrue(singleCoreDS is not None, "Datasource list was None") self.assertTrue(len(singleCoreDS) != 0, "Datasource list was empty") for i in range(0,len(singleCoreDS)): self.assertTrue(singleCoreDS[i] is not None, "single core datasource was None: " + str(i)) self.assertTrue(isinstance(singleCoreDS[i],Datasource)) self.assertTrue(numMultiDS == len(singleCoreDS), "Length of single core datasource list was not the same as multicore") del singleCoreDS
def test_simple_genes_by_region_annotation(self): """Test web api backend call /genes/ """ # http://www.broadinstitute.org/oncotator/genes/chr22_22112223_22312558/ # Two genes: chr22:22,112,223-22,312,558 datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False) annotator = Annotator() for ds in datasource_list: annotator.addDatasource(ds) # Here is what the API would call.... txs = annotator.retrieve_transcripts_by_region("22", 22112223, 22312558) self.assertTranscriptsFound(txs) mut_dict = annotator.annotate_genes_given_txs(txs) # Each mut will be for a separate gene for gene in mut_dict.keys(): mut = mut_dict[gene] alt_accessions = mut["UniProt_alt_uniprot_accessions"].split("|") tcgascape_amp_peaks = mut["TCGAScape_Amplification_Peaks"].split("|") tcgascape_del_peaks = mut["TCGAScape_Deletion_Peaks"].split("|") tumorscape_amp_peaks = mut["TUMORScape_Amplification_Peaks"].split("|") tumorscape_del_peaks = mut["TUMORScape_Deletion_Peaks"].split("|") full_name = mut["HGNC_Approved Name"] cosmic = { "tissue_types_affected": mut["COSMIC_Tissue_tissue_types_affected"], "total_alterations_in_gene": mut["COSMIC_Tissue_tissue_types_affected"], } alt_aliases = list( itertools.chain([mut["HGNC_Previous Symbols"].split(", "), mut["HGNC_Synonyms"].split(", ")]) ) location = mut["HGNC_Chromosome"] uniprot_accession = mut["UniProt_uniprot_accession"] transcripts = mut["transcripts"] self.assertTrue(transcripts is not None) self.assertTrue(len(transcripts) > 0) self.assertTrue(transcripts.startswith("ENST")) strand = mut["strand"] klass = mut["class"] uniprot_experimentals = mut["UniProt_AA_experimental_info"].split("|") self.assertTrue(uniprot_experimentals is not None) uniprot_natural_variations = mut["UniProt_AA_natural_variation"].split("|") uniprot_regions = mut["UniProt_AA_region"].split("|") uniprot_sites = mut["UniProt_AA_site"].split("|") uniprot_go_biological_processes = mut["UniProt_GO_Biological_Process"].split("|") uniprot_go_cellular_components = mut["UniProt_GO_Cellular_Component"].split("|") self.assertTrue(uniprot_go_cellular_components is not None) uniprot_go_molecular_functions = mut["UniProt_GO_Molecular_Function"].split("|") pass
def test_querying_transcripts_by_region(self): """Test web api backend call /transcripts/.... """ datasource_list = DatasourceFactory.createDatasources( self._determine_db_dir(), "hg19", isMulticore=False) annotator = Annotator() for ds in datasource_list: annotator.addDatasource(ds) txs = annotator.retrieve_transcripts_by_region("4", 50164411, 60164411) self.assertTranscriptsFound(txs) ## Here is an example of getting enough data to populate the json in doc/transcript_json_commented.json.txt # None of these values are validated. for tx in txs: transcript_id = tx.get_transcript_id() tx_start = tx.determine_transcript_start() tx_end = tx.determine_transcript_stop() gene = tx.get_gene() chr = tx.get_contig() n_exons = len(tx.get_exons()) strand = tx.get_strand() footprint_start, footprint_end = tx.determine_cds_footprint() klass = tx.get_gene_type() cds_start = tx.determine_cds_start() cds_end = tx.determine_cds_stop() id = tx.get_gene_id() genomic_coords = [[exon[0], exon[1]] for exon in tx.get_exons()] transcript_coords = [[ TranscriptProviderUtils.convert_genomic_space_to_exon_space( exon[0] + 1, exon[1], tx) ] for exon in tx.get_exons()] code_len = int(cds_end) - int(cds_start) + 1 # If refseq datasources are not available, this will fail. # Step 2 annotate the transcript, which produces a dummy mutation with the refseq annotations. dummy_mut = annotator.annotate_transcript(tx) refseq_mRNA_id = dummy_mut["gencode_xref_refseq_mRNA_id"] refseq_prot_id = dummy_mut["gencode_xref_refseq_prot_acc"] # Description is unavailable right now description = "" self.assertTrue(refseq_mRNA_id is not None) self.assertTrue(refseq_prot_id is not None) self.assertTrue(len(transcript_coords) == n_exons)
def test_simple_genes_by_region_annotation(self): """Test web api backend call /genes/ """ # http://www.broadinstitute.org/oncotator/genes/chr22_22112223_22312558/ # Two genes: chr22:22,112,223-22,312,558 datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False) annotator = Annotator() for ds in datasource_list: annotator.addDatasource(ds) # Here is what the API would call.... txs = annotator.retrieve_transcripts_by_region("22", 22112223, 22312558) self.assertTranscriptsFound(txs) mut_dict = annotator.annotate_genes_given_txs(txs) # Each mut will be for a separate gene for gene in mut_dict.keys(): mut = mut_dict[gene] alt_accessions = mut['UniProt_alt_uniprot_accessions'].split("|") tcgascape_amp_peaks = mut['TCGAScape_Amplification_Peaks'].split("|") tcgascape_del_peaks = mut['TCGAScape_Deletion_Peaks'].split("|") tumorscape_amp_peaks = mut['TUMORScape_Amplification_Peaks'].split("|") tumorscape_del_peaks = mut['TUMORScape_Deletion_Peaks'].split("|") full_name = mut['HGNC_Approved Name'] cosmic = {"tissue_types_affected": mut['COSMIC_Tissue_tissue_types_affected'], "total_alterations_in_gene": mut["COSMIC_Tissue_tissue_types_affected"]} alt_aliases = list(itertools.chain([mut["HGNC_Previous Symbols"].split(", "), mut["HGNC_Synonyms"].split(", ")])) location = mut["HGNC_Chromosome"] uniprot_accession = mut["UniProt_uniprot_accession"] transcripts = mut['transcripts'] self.assertTrue(transcripts is not None) self.assertTrue(len(transcripts) > 0) self.assertTrue(transcripts.startswith('ENST')) strand = mut['strand'] klass = mut['class'] uniprot_experimentals = mut['UniProt_AA_experimental_info'].split("|") self.assertTrue(uniprot_experimentals is not None) uniprot_natural_variations = mut['UniProt_AA_natural_variation'].split("|") uniprot_regions = mut['UniProt_AA_region'].split("|") uniprot_sites = mut['UniProt_AA_site'].split("|") uniprot_go_biological_processes = mut["UniProt_GO_Biological_Process"].split("|") uniprot_go_cellular_components = mut["UniProt_GO_Cellular_Component"].split("|") self.assertTrue(uniprot_go_cellular_components is not None) uniprot_go_molecular_functions = mut["UniProt_GO_Molecular_Function"].split("|") pass
def test_querying_transcripts_by_region(self): """Test web api backend call /transcripts/.... """ datasource_list = DatasourceFactory.createDatasources(self._determine_db_dir(), "hg19", isMulticore=False) annotator = Annotator() for ds in datasource_list: annotator.addDatasource(ds) txs = annotator.retrieve_transcripts_by_region("4", 50164411, 60164411) self.assertTranscriptsFound(txs) ## Here is an example of getting enough data to populate the json in doc/transcript_json_commented.json.txt # None of these values are validated. for tx in txs: transcript_id = tx.get_transcript_id() tx_start = tx.determine_transcript_start() tx_end = tx.determine_transcript_stop() gene = tx.get_gene() chr = tx.get_contig() n_exons = len(tx.get_exons()) strand = tx.get_strand() footprint_start, footprint_end = tx.determine_cds_footprint() klass = tx.get_gene_type() cds_start = tx.determine_cds_start() cds_end = tx.determine_cds_stop() id = tx.get_gene_id() genomic_coords = [[exon[0], exon[1]] for exon in tx.get_exons()] transcript_coords = [ [TranscriptProviderUtils.convert_genomic_space_to_exon_space(exon[0] + 1, exon[1], tx)] for exon in tx.get_exons() ] code_len = int(cds_end) - int(cds_start) + 1 # If refseq datasources are not available, this will fail. # Step 2 annotate the transcript, which produces a dummy mutation with the refseq annotations. dummy_mut = annotator.annotate_transcript(tx) refseq_mRNA_id = dummy_mut["gencode_xref_refseq_mRNA_id"] refseq_prot_id = dummy_mut["gencode_xref_refseq_prot_acc"] # Description is unavailable right now description = "" self.assertTrue(refseq_mRNA_id is not None) self.assertTrue(refseq_prot_id is not None) self.assertTrue(len(transcript_coords) == n_exons)
def create_run_spec(inputFormat, outputFormat, inputFilename, outputFilename, globalAnnotations=None, datasourceDir=None, genomeBuild="hg19", isMulticore=False, numCores=4, defaultAnnotations=None, cacheUrl=None, read_only_cache=True, tx_mode=TranscriptProvider.TX_MODE_CANONICAL, is_skip_no_alts=False, other_opts=None): """ This is a very simple interface to start an Oncotator session. As a warning, this interface may notbe supported in future versions. If datasourceDir is None, then the default location is used. TODO: Define default location. IMPORTANT: Current implementation attempts to annotate using a default set of datasources. TODO: Make sure that this note above is no longer the case. Current implementation attempts to annotate using a default set of datasources TODO: This method may get refactored into a separate class that handles RunConfigutaion objects. """ # TODO: Use dependency injection for list of name value pairs? Otherwise, set it up as an attribute on this class. # TODO: Use dependency injection to return instance of the input/output classes # TODO: Support more than the default configs. # TODO: On error, list the supported formats (both input and output) # TODO: Make sure that we can pass in both a class and a config file, not just a class. globalAnnotations = dict() if globalAnnotations is None else globalAnnotations defaultAnnotations = dict() if defaultAnnotations is None else defaultAnnotations other_opts = dict() if other_opts is None else other_opts other_opts[InputMutationCreatorOptions.IS_SKIP_ALTS] = is_skip_no_alts # Step 1 Initialize input and output inputCreator = OncotatorCLIUtils.create_input_creator(inputFilename, inputFormat, genomeBuild, other_opts) outputRenderer = OncotatorCLIUtils.create_output_renderer(outputFilename, outputFormat, other_opts) # Step 2 Datasources datasourceList = DatasourceFactory.createDatasources(datasourceDir, genomeBuild, isMulticore=isMulticore, numCores=numCores, tx_mode=tx_mode) #TODO: Refactoring needed here to specify tx-mode (or any option not in a config file) in a cleaner way. for ds in datasourceList: if isinstance(ds, TranscriptProvider): logging.getLogger(__name__).info("Setting %s %s to tx-mode of %s..." % (ds.title, ds.version, tx_mode)) ds.set_tx_mode(tx_mode) result = RunSpecification() result.initialize(inputCreator, outputRenderer, manualAnnotations=globalAnnotations, datasources=datasourceList, isMulticore=isMulticore, numCores=numCores, defaultAnnotations=defaultAnnotations, cacheUrl=cacheUrl, read_only_cache=read_only_cache, is_skip_no_alts=is_skip_no_alts) return result
def create_run_spec(input_format, output_format, input_filename, output_filename, global_annotations=None, datasource_dir=None, genomeBuild="hg19", is_multicore=False, num_cores=4, default_annotations=None, cache_url=None, read_only_cache=True, tx_mode=TranscriptProvider.TX_MODE_CANONICAL, is_skip_no_alts=False, other_opts=None, annotating_type=None): """ This is a very simple interface to start an Oncotator session. As a warning, this interface may notbe supported in future versions. If datasourceDir is None, then no datasources are used """ if datasource_dir: datasource_list = DatasourceFactory.createDatasources(datasource_dir, genomeBuild, isMulticore=is_multicore, numCores=num_cores, tx_mode=tx_mode) else: datasource_list = [] global_annotations = dict() if global_annotations is None else global_annotations default_annotations = dict() if default_annotations is None else default_annotations other_opts = dict() if other_opts is None else other_opts #TODO: Refactoring needed here to specify tx-mode (or any option not in a config file) in a cleaner way. for ds in datasource_list: if isinstance(ds, TranscriptProvider): logging.getLogger(__name__).info("Setting %s %s to tx-mode of %s..." % (ds.title, ds.version, tx_mode)) ds.set_tx_mode(tx_mode) if other_opts.get(OptionConstants.LONGER_OTHER_TX_FIELD, None) is not None: ds.set_longer_other_transcripts(other_opts.get(OptionConstants.LONGER_OTHER_TX_FIELD)) if other_opts.get(OptionConstants.CUSTOM_CANONICAL_TX_LIST_FILE, None) is not None: cc_txs_filename = other_opts[OptionConstants.CUSTOM_CANONICAL_TX_LIST_FILE] cc_txs_fp = file(cc_txs_filename, 'r') cc_txs = [tx.rsplit(".", 1)[0] for tx in cc_txs_fp] cc_txs_fp.close() ds.set_custom_canonical_txs(cc_txs) logging.getLogger(__name__).info(str(len(cc_txs)) + " custom canonical transcripts specified.") else: logging.getLogger(__name__).info("No custom canonical transcripts specified.") return RunSpecificationFactory.create_run_spec_given_datasources(input_format, output_format, input_filename, output_filename, global_annotations, datasource_list, genomeBuild, is_multicore, num_cores, default_annotations, cache_url, read_only_cache, tx_mode, is_skip_no_alts, other_opts, annotating_type)
def create_run_spec(input_format, output_format, input_filename, output_filename, global_annotations=None, datasource_dir=None, genomeBuild="hg19", is_multicore=False, num_cores=4, default_annotations=None, cache_url=None, read_only_cache=True, tx_mode=TranscriptProvider.TX_MODE_CANONICAL, is_skip_no_alts=False, other_opts=None, annotating_type=None): """ This is a very simple interface to start an Oncotator session. As a warning, this interface may notbe supported in future versions. If datasourceDir is None, then no datasources are used """ if datasource_dir: datasource_list = DatasourceFactory.createDatasources(datasource_dir, genomeBuild, isMulticore=is_multicore, numCores=num_cores, tx_mode=tx_mode) else: datasource_list = [] global_annotations = dict() if global_annotations is None else global_annotations default_annotations = dict() if default_annotations is None else default_annotations other_opts = dict() if other_opts is None else other_opts #TODO: Refactoring needed here to specify tx-mode (or any option not in a config file) in a cleaner way. for ds in datasource_list: if isinstance(ds, TranscriptProvider): logging.getLogger(__name__).info("Setting %s %s to tx-mode of %s..." % (ds.title, ds.version, tx_mode)) ds.set_tx_mode(tx_mode) if other_opts.get(OptionConstants.CUSTOM_CANONICAL_TX_LIST_FILE, None) is not None: cc_txs_filename = other_opts[OptionConstants.CUSTOM_CANONICAL_TX_LIST_FILE] cc_txs_fp = file(cc_txs_filename, 'r') cc_txs = [tx.rsplit(".", 1)[0] for tx in cc_txs_fp] cc_txs_fp.close() ds.set_custom_canonical_txs(cc_txs) logging.getLogger(__name__).info(str(len(cc_txs)) + " custom canonical transcripts specified.") else: logging.getLogger(__name__).info("No custom canonical transcripts specified.") return RunSpecificationFactory.create_run_spec_given_datasources(input_format, output_format, input_filename, output_filename, global_annotations, datasource_list, genomeBuild, is_multicore, num_cores, default_annotations, cache_url, read_only_cache, tx_mode, is_skip_no_alts, other_opts, annotating_type)
def _createDatasourceCorpus(self): dbDir = self.config.get('DEFAULT', "dbDir") return DatasourceFactory.createDatasources(dbDir, "hg19", isMulticore=False)
def create_run_spec(inputFormat, outputFormat, inputFilename, outputFilename, globalAnnotations=None, datasourceDir=None, genomeBuild="hg19", isMulticore=False, numCores=4, defaultAnnotations=None, cacheUrl=None, read_only_cache=True, tx_mode=TranscriptProvider.TX_MODE_CANONICAL, is_skip_no_alts=False, other_opts=None, annotating_type=None): """ This is a very simple interface to start an Oncotator session. As a warning, this interface may notbe supported in future versions. If datasourceDir is None, then the default location is used. TODO: Define default location. IMPORTANT: Current implementation attempts to annotate using a default set of datasources. TODO: Make sure that this note above is no longer the case. Current implementation attempts to annotate using a default set of datasources TODO: This method may get refactored into a separate class that handles RunConfigutaion objects. """ # TODO: Use dependency injection for list of name value pairs? Otherwise, set it up as an attribute on this class. # TODO: Use dependency injection to return instance of the input/output classes globalAnnotations = dict( ) if globalAnnotations is None else globalAnnotations defaultAnnotations = dict( ) if defaultAnnotations is None else defaultAnnotations other_opts = dict() if other_opts is None else other_opts other_opts[InputMutationCreatorOptions.IS_SKIP_ALTS] = is_skip_no_alts # Step 0 Validate given parameters and log messages. If an error or critical is found, throw an exception. validation_messages = RunSpecificationFactory._validate_run_spec_parameters( inputFormat, outputFormat, inputFilename, outputFilename, globalAnnotations, datasourceDir, genomeBuild, isMulticore, numCores, defaultAnnotations, cacheUrl, read_only_cache, tx_mode, is_skip_no_alts, other_opts, annotating_type) for msg in validation_messages: logging.getLogger(__name__).log(msg.level, msg.message) if (msg.level == logging.ERROR) or (msg.level == logging.CRITICAL): raise RunSpecificationException(msg.message) # Step 1 Initialize input and output inputCreator = OncotatorCLIUtils.create_input_creator( inputFilename, inputFormat, genomeBuild, other_opts) outputRenderer = OncotatorCLIUtils.create_output_renderer( outputFilename, outputFormat, other_opts) # Step 2 Datasources if datasourceDir: datasource_list = DatasourceFactory.createDatasources( datasourceDir, genomeBuild, isMulticore=isMulticore, numCores=numCores, tx_mode=tx_mode) else: datasource_list = [] #TODO: Refactoring needed here to specify tx-mode (or any option not in a config file) in a cleaner way. for ds in datasource_list: if isinstance(ds, TranscriptProvider): logging.getLogger(__name__).info( "Setting %s %s to tx-mode of %s..." % (ds.title, ds.version, tx_mode)) ds.set_tx_mode(tx_mode) if other_opts.get( OptionConstants.CUSTOM_CANONICAL_TX_LIST_FILE, None) is not None: cc_txs_fp = file( other_opts[ OptionConstants.CUSTOM_CANONICAL_TX_LIST_FILE], 'r') cc_txs = [tx.rsplit(".", 1)[0] for tx in cc_txs_fp] cc_txs_fp.close() ds.set_custom_canonical_txs(cc_txs) logging.getLogger(__name__).info( str(len(cc_txs)) + " custom canonical transcripts specified.") else: logging.getLogger(__name__).info( "No custom canonical transcripts specified.") result = RunSpecification() result.initialize(inputCreator, outputRenderer, manualAnnotations=globalAnnotations, datasources=datasource_list, isMulticore=isMulticore, numCores=numCores, defaultAnnotations=defaultAnnotations, cacheUrl=cacheUrl, read_only_cache=read_only_cache, is_skip_no_alts=is_skip_no_alts, annotating_type=annotating_type) return result
def testMulticoreNoDatasources(self): """ If using multicore, does not hang when no datasources are in the db dir""" multiDS = DatasourceFactory.createDatasources('testdata/maflite/', "hg19", True) self.assertTrue(len(multiDS) == 0, "Length of multiDS when there were no datasources was not zero.")