def test_reannotate_genome_official(self): """ This test takes about 25 minutes to run. It uploads the rhodobacter_gff, runs prokka genome reannotation and then checks to see if a specific feature has been updated correctly :return: """ gfu = GenomeFileUtil(os.environ["SDK_CALLBACK_URL"]) genome_test_file = os.path.join("/kb/module/test/data/", "rhodobacter_genomic.gbff") genome_test_file_scratch = os.path.join("/kb/module/work/tmp", "rhodobacter_genomic.gbff") copyfile(genome_test_file, genome_test_file_scratch) genome_ref_original = gfu.genbank_to_genome({"file": {"path": genome_test_file_scratch}, "workspace_name": self.getWsName(), "genome_name": "rhodobacter_genomic.gbff", "generate_ids_if_needed": 1})["genome_ref"] genome_name = "Rhodoannotated_by_prokka" print("ABOUT TO ANNOTATE GENOME") result = self.getImpl().annotate(self.getContext(), {"object_ref": genome_ref_original, "output_workspace": self.getWsName(), "output_genome_name": genome_name, "evalue": None, "fast": 0, "gcode": 0, "genus": "genus", "kingdom": "Bacteria", "metagenome": 0, "mincontiglen": 1, "norrna": 0, "notrna": 0, "rawproduct": 0, "rfam": 1, "scientific_name": "RhodoBacter" })[0] genome_ref_new = self.getWsName() + "/" + genome_name un_annotated_genome = self.getWsClient().get_objects([{"ref": genome_ref_original}])[0][ "data"] re_annotated_genome = self.getWsClient().get_objects([{"ref": genome_ref_new}])[0]["data"] scratch = "/kb/module/work/tmp/" with open(scratch + "OUTPUT_GENOME_BEFORE.txt", "w+") as outfile: json.dump(un_annotated_genome, outfile) with open(scratch + "OUTPUT_GENOME_AFTER.txt", "w+") as outfile: json.dump(un_annotated_genome, outfile) for feature in un_annotated_genome["features"]: if feature["id"] == "RSP_1441": old_function = feature["functions"] self.assertEqual(old_function, ["regulatory protein, GntR family"]) break for feature in re_annotated_genome["features"]: if feature["id"] == "RSP_1441": new_function = feature["functions"] self.assertEqual(new_function, ["N-acetylglucosamine repressor"]) break
def getGenomeInfo(self, genome_basename, item_i=0): if hasattr(self.__class__, 'genomeInfo_list'): try: info = self.__class__.genomeInfo_list[item_i] name = self.__class__.genomeName_list[item_i] if info != None: if name != genome_basename: self.__class__.genomeInfo_list[item_i] = None self.__class__.genomeName_list[item_i] = None else: return info except: pass # 1) transform genbank to kbase genome object and upload to ws shared_dir = "/kb/module/work/tmp" genome_data_file = 'data/genomes/'+genome_basename+'.gbff.gz' genome_file = os.path.join(shared_dir, os.path.basename(genome_data_file)) shutil.copy(genome_data_file, genome_file) SERVICE_VER = 'release' #SERVICE_VER = 'dev' GFU = GenomeFileUtil(os.environ['SDK_CALLBACK_URL'], token=self.getContext()['token'], service_ver=SERVICE_VER ) print ("UPLOADING genome: "+genome_basename+" to WORKSPACE "+self.getWsName()+" ...") genome_upload_result = GFU.genbank_to_genome({'file': {'path': genome_file }, 'workspace_name': self.getWsName(), 'genome_name': genome_basename }) # })[0] pprint(genome_upload_result) genome_ref = genome_upload_result['genome_ref'] new_obj_info = self.getWsClient().get_object_info_new({'objects': [{'ref': genome_ref}]})[0] # 2) store it if not hasattr(self.__class__, 'genomeInfo_list'): self.__class__.genomeInfo_list = [] self.__class__.genomeName_list = [] for i in range(item_i+1): try: assigned = self.__class__.genomeInfo_list[i] except: self.__class__.genomeInfo_list.append(None) self.__class__.genomeName_list.append(None) self.__class__.genomeInfo_list[item_i] = new_obj_info self.__class__.genomeName_list[item_i] = genome_basename return new_obj_info
def load_genbank_file(callback_url, ws_name, local_file, target_name): """ Loads a Genbank (.gbk/.gbff/etc.) file into a workspace as a Genome object. This has the side effect of building an Assembly to contain the genome sequence. """ gfu = GenomeFileUtil(callback_url) genome_ref = gfu.genbank_to_genome({ "file": { "path": local_file }, "genome_name": target_name, "workspace_name": ws_name, "source": "Ensembl", "type": "User upload", "generate_ids_if_needed": 1 }) return genome_ref.get('genome_ref') # yeah, i know.
def loadGenome(self): if hasattr(self.__class__, 'genome_ref'): return self.__class__.genome_ref genbank_file_path = os.path.join(self.scratch, 'minimal.gbff') shutil.copy(os.path.join('data', 'minimal.gbff'), genbank_file_path) gfu = GenomeFileUtil(self.callback_url) genome_ref = gfu.genbank_to_genome({ 'file': { 'path': genbank_file_path }, 'workspace_name': self.getWsName(), 'genome_name': 'test_genome', 'source': 'Ensembl', 'generate_ids_if_needed': 1, 'generate_missing_genes': 1 })['genome_ref'] self.__class__.genome_ref = genome_ref return genome_ref
def prepare_data(cls): wd = os.getcwd() print('WORKING DIRECTORY', wd) ru = ReadsUtils(cls.callback_url) test_directory_name = 'fama_test_data' cls.test_directory_path = os.path.join(cls.scratch, test_directory_name) print('TEST DIRECTORY', cls.test_directory_path) os.makedirs(cls.test_directory_path) shutil.copy(os.path.join('data', 'test_fastq_pe1.fq'), cls.test_directory_path) shutil.copy(os.path.join('data', 'test_fastq_pe2.fq'), cls.test_directory_path) shutil.copy(os.path.join('data', 'MR-1.gbff'), cls.test_directory_path) shutil.copy(os.path.join('data', 'SB2B.gbff'), cls.test_directory_path) reads_params = { 'fwd_file': os.path.join(cls.test_directory_path, 'test_fastq_pe1.fq'), 'rev_file': os.path.join(cls.test_directory_path, 'test_fastq_pe2.fq'), 'sequencing_tech': 'Illumina', 'wsname': cls.ws_info[1], 'single_genome': 0, 'name': 'Fama_test_pe_input', 'interleaved': 0 } cls.pe_reads_ref = ru.upload_reads(reads_params) se_reads_params = { 'fwd_file': os.path.join(cls.test_directory_path, 'test_fastq_pe1.fq'), 'sequencing_tech': 'Illumina', 'wsname': cls.ws_info[1], 'single_genome': 0, 'name': 'Fama_test_se_input' } cls.se_reads_ref = ru.upload_reads(se_reads_params) gu = GenomeFileUtil(cls.callback_url) genome1_params = { 'file': { 'path': os.path.join(cls.test_directory_path, 'MR-1.gbff') }, 'genome_name': 'Shewanella_oneidensis_MR1', 'workspace_name': cls.ws_info[1] } cls.genome1_ref = gu.genbank_to_genome(genome1_params)['genome_ref'] genome2_params = { 'file': { 'path': os.path.join(cls.test_directory_path, 'SB2B.gbff') }, 'genome_name': 'Shewanella_amazonensis_SB2B', 'workspace_name': cls.ws_info[1] } cls.genome2_ref = gu.genbank_to_genome(genome2_params)['genome_ref'] elements = {} elements[cls.genome1_ref] = dict() elements[cls.genome1_ref]['ref'] = cls.genome1_ref elements[cls.genome2_ref] = dict() elements[cls.genome2_ref]['ref'] = cls.genome2_ref test_GenomeSet = { 'description': 'Test GenomeSet', 'elements': elements } provenance = [{}] provenance[0]['input_ws_objects'] = [cls.genome1_ref, cls.genome2_ref] provenance[0]['service'] = 'kb_SetUtilities' provenance[0]['method'] = 'KButil_Build_GenomeSet' genome_set_info = cls.wsClient.save_objects({ 'workspace': cls.ws_info[1], 'objects': [{ 'type': 'KBaseSearch.GenomeSet', 'data': test_GenomeSet, 'name': 'Test_GenomeSet', 'meta': {}, 'provenance': provenance }] })[0] cls.genomeset_ref = "{}/{}/{}".format(genome_set_info[6], genome_set_info[0], genome_set_info[4]) attribute_mapping_data = { "attributes": [{ "attribute": "name", "source": "Fama" }, { "attribute": "description", "source": "Fama" }, { "attribute": "category", "source": "Fama" }], "instances": { "AmoA_PmoA": [ "AmoA_PmoA", "amoA-pmoA; methane/ammonia monooxygenase subunit A [EC:1.14.18.3 1.14.99.39]", "Ammonium oxidation" ], "AmoB_PmoB": [ "AmoB_PmoB", "amoB-pmoB; methane/ammonia monooxygenase subunit B", "Ammonium oxidation" ], "AmoC_PmoC": [ "AmoC_PmoC", "amoC-pmoC; methane/ammonia monooxygenase subunit C", "Ammonium oxidation" ], "AnfG_VnfG": [ "AnfG_VnfG", "Nitrogenase delta subunit [EC:1.18.6.1]", "Nitrogen fixation" ], "HAO": [ "HAO", "hao; hydroxylamine dehydrogenase [EC:1.7.2.6]", "Anaerobic ammonium oxidation" ], "Hzo": [ "Hzo", "Hydrazine dehydrogenase (EC:1.7.2.8)", "Anaerobic ammonium oxidation" ], "HzsA": [ "HzsA", "Hydrazine synthase subunit A (EC:1.7.2.7)", "Anaerobic ammonium oxidation" ], "HzsB": [ "HzsB", "Hydrazine synthase subunit B (EC:1.7.2.7)", "Anaerobic ammonium oxidation" ], "HzsC": [ "HzsC", "Hydrazine synthase subunit C (EC:1.7.2.7)", "Anaerobic ammonium oxidation" ], "NapA": [ "NapA", "Periplasmic nitrate reductase precursor (EC 1.7.99.4)", "Nitrate dissimilatory reduction" ], "NapB": [ "NapB", "Periplasmic nitrate reductase cytochrome c550-type subunit", "Nitrate dissimilatory reduction" ], "NapC": [ "NapC", "Cytochrome c-type protein NapC", "Nitrate dissimilatory reduction" ], "NapD": [ "NapD", "Periplasmic nitrate reductase component NapD", "Nitrate dissimilatory reduction" ], "NapE": [ "NapE", "Periplasmic nitrate reductase component NapE", "Nitrate dissimilatory reduction" ], "NapF": [ "NapF", "Ferredoxin-type protein NapF (periplasmic nitrate reductase)", "Nitrate dissimilatory reduction" ], "NapG": [ "NapG", "Ferredoxin-type protein NapG (periplasmic nitrate reductase)", "Nitrate dissimilatory reduction" ], "NapH": [ "NapH", "Polyferredoxin NapH (periplasmic nitrate reductase)", "Nitrate dissimilatory reduction" ], "NapK": [ "NapK", "Periplasmic nitrate reductase component NapK", "Nitrate dissimilatory reduction" ], "NapL": [ "NapL", "Periplasmic nitrate reductase component NapL", "Nitrate dissimilatory reduction" ], "NarC": [ "NarC", "Respiratory nitrate reductase subunit, conjectural (EC 1.7.99.4)", "Nitrate dissimilatory reduction" ], "NarG_NxrA": [ "NarG_NxrA", "narG, narZ, nxrA; nitrate reductase / nitrite oxidoreductase, alpha subunit [EC:1.7.5.1 1.7.99.-]", "Nitrate dissimilatory reduction" ], "NarH_NxrB": [ "NarH_NxrB", "narH, narY, nxrB; nitrate reductase / nitrite oxidoreductase, beta subunit [EC:1.7.5.1 1.7.99.-]", "Nitrate dissimilatory reduction" ], "NarI": [ "NarI", "Respiratory nitrate reductase gamma chain (EC 1.7.99.4)", "Nitrate dissimilatory reduction" ], "NarJ": [ "NarJ", "narJ, narW; nitrate reductase molybdenum cofactor assembly chaperone NarJ/NarW", "Nitrate dissimilatory reduction" ], "NasA": [ "NasA", "Assimilatory nitrate reductase large subunit (EC:1.7.99.4)", "Nitrate assimilatory reduction" ], "NasB": [ "NasB", "nasB; assimilatory nitrate reductase NADH oxidase subunit [EC:1.7.99.-]", "Nitrate assimilatory reduction" ], "NasI": [ "NasI", "assimilatory nitrate reductase, clostridial, electron transfer subunit [EC:1.7.99.-]", "Nitrite assimilation" ], "NasJ": [ "NasJ", "assimilatory nitrate reductase, clostridial, NADH oxidase subunit [EC:1.7.99.-]", "Nitrite assimilation" ], "NifB": [ "NifB", "nifB; nitrogen fixation protein NifB", "Nitrogen fixation" ], "NifD_AnfD_VnfD": [ "NifD_AnfD_VnfD", "Nitrogenase alpha chain (EC 1.18.6.1) NifD/AnfD/VnfD", "Nitrogen fixation" ], "NifH_AnfH_VnfH": [ "NifH_AnfH_VnfH", "Nitrogenase reductase and maturation protein NifH/AnfH/VnfH", "Nitrogen fixation" ], "NifK_AnfK_VnfK": [ "NifK_AnfK_VnfK", "Nitrogenase beta chain (EC 1.18.6.1) NifK/AnfK/VnfK", "Nitrogen fixation" ], "NirA": [ "NirA", "nirA; ferredoxin-nitrite reductase [EC:1.7.7.1]", "Nitrite assimilation" ], "NirB": [ "NirB", "nirB; nitrite reductase (NADH) large subunit [EC:1.7.1.15]", "Nitrite assimilation" ], "NirB3": [ "NirB3", "Cytochrome c-552 precursor NirB", "Denitrification" ], "NirC": ["NirC", "nirC; cytochrome c55X", "Denitrification"], "NirD": [ "NirD", "nirD; nitrite reductase (NADH) small subunit [EC:1.7.1.15]", "Nitrite assimilation" ], "NirK": [ "NirK", "nirK; nitrite reductase (NO-forming) [EC:1.7.2.1]", "Denitrification" ], "NirM": ["NirM", "Cytochrome c551 NirM", "Denitrification"], "NirN": [ "NirN", "Nitrite reductase associated c-type cytochorome NirN", "Denitrification" ], "NirS": [ "NirS", "nirS; Cytochrome cd1 nitrite reductase (NO-forming) / hydroxylamine reductase [EC:1.7.2.1 1.7.99.1]", "Denitrification" ], "NirT": ["NirT", "Cytochrome c-type protein NirT", "Denitrification"], "NirU": [ "NirU", "assimilatory nitrite reductase, putative NADH oxidase subunit [EC:1.7.99.-]", "Nitrite assimilation" ], "NosZ": [ "NosZ", "nosZ; nitrous-oxide reductase [EC:1.7.2.4]", "Denitrification" ], "NrfA": [ "NrfA", "nrfA; nitrite reductase (cytochrome c-552) [EC:1.7.2.2]", "Ammonification" ], "NrfB": [ "NrfB", "nrfB; cytochrome c-type protein NrfB", "Ammonification" ], "NrfC": ["NrfC", "nrfC; protein NrfC", "Ammonification"], "NrfD": ["NrfD", "nrfD; protein NrfD", "Ammonification"], "NrfH": [ "NrfH", "nrfH; cytochrome c nitrite reductase small subunit", "Ammonification" ], "UreA": ["UreA", "ureA; urease subunit gamma [EC:3.5.1.5]", "Urease"], "UreB": ["UreB", "ureB; urease subunit beta [EC:3.5.1.5]", "Urease"], "UreC": ["UreC", "ureC; urease subunit alpha [EC:3.5.1.5]", "Urease"], "cNor-C": [ "cNor-C", "Nitric-oxide reductase subunit C (EC 1.7.99.7)", "Denitrification" ], "cNorB_qNor": [ "cNorB_qNor", "Nitric-oxide reductase (EC 1.7.99.7)", "Denitrification" ] }, "ontology_mapping_method": "User curation" } am_info = cls.wsClient.save_objects({ 'workspace': cls.ws_info[1], 'objects': [{ 'type': 'KBaseExperiments.AttributeMapping', 'data': attribute_mapping_data, 'name': 'Test_row_AttributeMapping', 'meta': {}, 'provenance': [{}] }] })[0] row_attribute_mapping_ref = "{}/{}/{}".format(am_info[6], am_info[0], am_info[4]) attribute_mapping_data = { "attributes": [{ "attribute": "sample_id", "source": "KBase" }], "instances": { "Fama_test_dummy_id1": ["Fama_test_dummy_id1"], "Fama_test_dummy_id2": ["Fama_test_dummy_id2"] }, "ontology_mapping_method": "User curation" } am_info = cls.wsClient.save_objects({ 'workspace': cls.ws_info[1], 'objects': [{ 'type': 'KBaseExperiments.AttributeMapping', 'data': attribute_mapping_data, 'name': 'Test_col_AttributeMapping', 'meta': {}, 'provenance': [{}] }] })[0] col_attribute_mapping_ref = "{}/{}/{}".format(am_info[6], am_info[0], am_info[4]) trait_matrix_data = { "col_attributemapping_ref": col_attribute_mapping_ref, "data": { "col_ids": ["Fama_test_dummy_id1", "Fama_test_dummy_id2"], "row_ids": [ "AmoA_PmoA", "AmoB_PmoB", "AmoC_PmoC", "HAO", "HzsA", "NapA", "NapB", "NapC", "NapD", "NapF", "NapG", "NapH", "NapL", "NarC", "NarG_NxrA", "NarH_NxrB", "NarI", "NarJ", "NasA", "NasB", "NasI", "NasJ", "NifB", "NifD_AnfD_VnfD", "NifH_AnfH_VnfH", "NifK_AnfK_VnfK", "NirA", "NirB", "NirC", "NirD", "NirK", "NirM", "NirN", "NirS", "NirT", "NirU", "NosZ", "NrfA", "NrfB", "NrfC", "NrfD", "NrfH", "UreA", "UreB", "UreC", "cNor-C", "cNorB_qNor" ], "values": [[29.0, 1862.0], [9.0, 1502.0], [20.0, 1775.0], [1.0, 6.0], [0.0, 1.0], [10.0, 335.0], [0.0, 11.0], [1.0, 16.0], [0.0, 10.0], [1.0, 27.0], [4.0, 47.0], [0.0, 26.0], [0.0, 1.0], [5.0, 424.0], [86.0, 8420.0], [46.0, 5446.0], [7.0, 128.0], [3.0, 45.0], [157.0, 5582.0], [12.0, 168.0], [0.0, 2.0], [0.0, 2.0], [1.0, 0.0], [0.0, 3.0], [1.0, 0.0], [2.0, 3.0], [129.0, 9531.0], [73.0, 994.0], [0.0, 2.0], [19.0, 1622.0], [103.0, 10225.0], [0.0, 2.0], [0.0, 9.0], [0.0, 16.0], [0.0, 3.0], [8.0, 22.0], [4.0, 149.0], [7.0, 76.0], [0.0, 1.0], [0.0, 83.0], [0.0, 18.0], [0.0, 34.0], [80.0, 2687.0], [83.0, 2689.0], [246.0, 10558.0], [4.0, 159.0], [19.0, 523.0]] }, "row_attributemapping_ref": row_attribute_mapping_ref, "scale": "raw" } tm_info = cls.wsClient.save_objects({ 'workspace': cls.ws_info[1], 'objects': [{ 'type': 'KBaseMatrices.TraitMatrix', 'data': trait_matrix_data, 'name': 'Test_TraitMatrix', 'meta': {}, 'provenance': [{}] }] })[0] trait_matrix_ref = "{}/{}/{}".format(tm_info[6], tm_info[0], tm_info[4]) func_profile_data = { "base_object_ref": trait_matrix_ref, "data": { "col_ids": ["Fama_test_dummy_id1", "Fama_test_dummy_id2"], "row_ids": [ "AmoA_PmoA", "AmoB_PmoB", "AmoC_PmoC", "HAO", "HzsA", "NapA", "NapB", "NapC", "NapD", "NapF", "NapG", "NapH", "NapL", "NarC", "NarG_NxrA", "NarH_NxrB", "NarI", "NarJ", "NasA", "NasB", "NasI", "NasJ", "NifB", "NifD_AnfD_VnfD", "NifH_AnfH_VnfH", "NifK_AnfK_VnfK", "NirA", "NirB", "NirC", "NirD", "NirK", "NirM", "NirN", "NirS", "NirT", "NirU", "NosZ", "NrfA", "NrfB", "NrfC", "NrfD", "NrfH", "UreA", "UreB", "UreC", "cNor-C", "cNorB_qNor" ], "values": [[0.5877623222166435, 0.6194552807793001], [0.18772393783341546, 0.5158019621326551], [0.42093693109943287, 0.6389877038101813], [0.01052927042506581, 9.44285005365384E-4], [0.0, 1.163272453732729E-4], [0.07533916331087316, 0.03985967408079665], [0.0, 0.004004703811676381], [0.023525707440798897, 0.005726992499611547], [0.0, 0.005001872240847319], [0.03374102192943467, 0.012378733183834723], [0.09317287278448243, 0.01636684276395418], [0.0, 0.007156293503795473], [0.0, 2.558288744954014E-4], [0.09551482894790636, 0.11359136011797127], [0.4838004401128097, 0.7117012180568053], [0.606624219369047, 1.1214863307558443], [0.14386930938552234, 0.039556844643411124], [0.06703428893835078, 0.013033272059964287], [1.2377859668457405, 0.6541740711814014], [0.1554388223417535, 0.025585775284706288], [0.0, 4.157415511565216E-4], [0.0, 8.79384674326921E-4], [0.011357985203073509, 0.0], [0.0, 5.408864203344862E-4], [0.017785876274710746, 0.0], [0.023945607737196006, 5.321452308325661E-4], [1.3691533051493565, 1.5159715196738617], [0.5575869192146357, 0.11053408671640833], [0.0, 0.001003764424584737], [0.6020807778521705, 0.7245394400294567], [1.2760844699978113, 2.028265195299676], [0.0, 8.562698274159332E-4], [0.0, 0.0016013558654137605], [0.0, 0.002541269397417341], [0.0, 0.001148325643963228], [0.08602893305399109, 0.0036767738796032027], [0.035240090231966045, 0.020453010032479176], [0.08339398252207368, 0.013841118202571863], [0.0, 3.631862195001992E-4], [0.0, 0.02686595241534706], [0.0, 0.004760523591057187], [0.0, 0.013633043040963725], [2.4590146922690415, 1.1212330338379586], [2.1843782229213273, 1.0658240082728703], [2.5770680795310428, 1.662618906136703], [0.08799907015919287, 0.050780650063447744], [0.17980007974121962, 0.06499031332293298]] }, "profile_category": "community", "profile_type": "sequence reads" } fp_info = cls.wsClient.save_objects({ 'workspace': cls.ws_info[1], 'objects': [{ 'type': 'KBaseProfile.FunctionalProfile', 'data': func_profile_data, 'name': 'Test_FunctionalProfile', 'meta': {}, 'provenance': [{}] }] })[0] cls.func_profile_ref = "{}/{}/{}".format(fp_info[6], fp_info[0], fp_info[4])
class ImportGenbankUtil: def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.scratch = os.path.join(config['scratch'], 'import_GenBank_' + str(uuid.uuid4())) handler_utils._mkdir_p(self.scratch) self.dfu = DataFileUtil(self.callback_url) self.gfu = GenomeFileUtil(self.callback_url, service_ver='beta') self.uploader_utils = UploaderUtil(config) def import_genbank_from_staging(self, params): ''' import_genbank_from_staging: wrapper method for GenomeFileUtil.genbank_to_genome required params: staging_file_subdir_path - subdirectory file path e.g. for file: /data/bulk/user_name/file_name staging_file_subdir_path is file_name for file: /data/bulk/user_name/subdir_1/subdir_2/file_name staging_file_subdir_path is subdir_1/subdir_2/file_name genome_name - becomes the name of the object workspace_name - the name of the workspace it gets saved to. source - Source of the file typically something like RefSeq or Ensembl optional params: release - Release or version number of the data per example Ensembl has numbered releases of all their data: Release 31 generate_ids_if_needed - If field used for feature id is not there, generate ids (default behavior is raising an exception) genetic_code - Genetic code of organism. Overwrites determined GC from taxon object type - Reference, Representative or User upload return: genome_ref: return object reference ''' logging.info( '--->\nrunning ImportGenbankUtil.import_genbank_from_staging\n' + f'params:\n{json.dumps(params, indent=1)}') self.validate_import_genbank_from_staging_params(params) download_staging_file_params = { 'staging_file_subdir_path': params.get('staging_file_subdir_path') } scratch_file_path = self.dfu.download_staging_file( download_staging_file_params).get('copy_file_path') file = {'path': scratch_file_path} import_genbank_params = params import_genbank_params['file'] = file del import_genbank_params['staging_file_subdir_path'] returnVal = self.gfu.genbank_to_genome(import_genbank_params) """ Update the workspace object related meta-data for staged file """ #self.uploader_utils.update_staging_service( # download_staging_file_params.get('staging_file_subdir_path'), # returnVal['genome_ref']) return returnVal def validate_import_genbank_from_staging_params(self, params): """ validate_import_genbank_from_staging_params: validates params passed to import_genbank_from_staging method """ # check for required parameters for p in [ 'staging_file_subdir_path', 'genome_name', 'workspace_name', 'source' ]: if p not in params: raise ValueError('"' + p + '" parameter is required, but missing') def generate_html_report(self, genome_ref, params): """ _generate_html_report: generate html summary report """ logging.info('start generating html report') genome_obj = self.dfu.get_objects({'object_refs': [genome_ref]}) html_report = list() tmp_dir = os.path.join(self.scratch, str(uuid.uuid4())) handler_utils._mkdir_p(tmp_dir) result_file_path = os.path.join(tmp_dir, 'report.html') genome_name = str(genome_obj.get('data')[0].get('info')[1]) genome_file = params.get('staging_file_subdir_path') genome_data = genome_obj.get('data')[0].get('data') genome_info = genome_obj.get('data')[0].get('info') source = genome_info[10].get('Source') num_contigs = genome_info[10].get('Number contigs') size = genome_info[10].get('Size') gc_content = genome_info[10].get('GC content') warnings = genome_data.get('warnings', []) feature_counts = sorted( list(genome_data.get('feature_counts', {}).items())) genome_overview_data = collections.OrderedDict() genome_overview_data['Name'] = '{} ({})'.format( genome_name, genome_ref) #genome_overview_data['Uploaded File'] = genome_file genome_overview_data['Date Uploaded'] = time.strftime("%c") genome_overview_data['Source'] = source genome_overview_data['Number of Contigs'] = num_contigs genome_overview_data['Size'] = size genome_overview_data['GC Content'] = gc_content genome_overview_data['Warnings'] = "\n".join(warnings) genome_overview_data.update(feature_counts) overview_content = '' overview_content += '<br/><table>\n' for key, val in genome_overview_data.items(): overview_content += '<tr><td><b>{}</b></td>'.format(key) overview_content += '<td>{}</td>'.format(val) overview_content += '</tr>\n' overview_content += '</table>' feature_content = str( [[str(k), v] for k, v in list(genome_data.get('feature_counts', {}).items()) if k != 'gene']) contig_content = str( [[str(c), l] for c, l in zip(genome_data.get('contig_ids', []), genome_data.get('contig_lengths', []))]) with open(result_file_path, 'w') as result_file: with open( os.path.join(os.path.dirname(__file__), 'report_template', 'report_template_genome.html'), 'r') as report_template_file: report_template = report_template_file.read() report_template = report_template.replace( '<p>Overview_Content</p>', overview_content) report_template = report_template.replace( '*FEATURE_DATA*', feature_content) report_template = report_template.replace( '*CONTIG_DATA*', contig_content) result_file.write(report_template) result_file.close() report_shock_id = self.dfu.file_to_shock({ 'file_path': tmp_dir, 'pack': 'zip' })['shock_id'] html_report.append({ 'shock_id': report_shock_id, 'name': os.path.basename(result_file_path), 'label': os.path.basename(result_file_path), 'description': 'HTML summary report for imported Genome' }) return html_report def generate_report(self, genome_ref, params): """ :param genome_ref: Return Val from GenomeFileUtil for Uploaded genome Need to get report warnings and message from it. :return: """ uuid_string = str(uuid.uuid4()) objects_created = [{ 'ref': genome_ref, 'description': 'Imported Genome' }] output_html_files = self.generate_html_report(genome_ref, params) report_params = { 'message': '', 'workspace_name': params.get('workspace_name'), 'objects_created': objects_created, 'html_links': output_html_files, 'direct_html_link_index': 0, 'html_window_height': 300, 'report_object_name': 'kb_genome_upload_report_' + uuid_string } kbase_report_client = KBaseReport(self.callback_url, token=self.token) output = kbase_report_client.create_extended_report(report_params) report_output = { 'report_name': output['name'], 'report_ref': output['ref'] } return report_output