def loadReadsSet(self, fwd_reads, rev_reads, params): if hasattr(self.__class__, 'reads_set_ref'): return self.__class__.reads_set_ref pe_reads_ref = self.loadPairedEndReads(fwd_reads, rev_reads, params) reads_set_name = 'TestReadsSet' # create the set object reads_set_data = { 'description': 'Reads Set for testing Bwa', 'items': [{ 'ref': pe_reads_ref, 'label': 'rs1' }] } # test a save set_api = SetAPI(self.srv_wiz_url) res = set_api.save_reads_set_v1({ 'data': reads_set_data, 'output_object_name': reads_set_name, 'workspace': params["workspace_name"] }) reads_set_ref = res['set_ref'] # reads_set_ref = '5264/52/1' print('Loaded ReadsSet: ' + reads_set_ref) return reads_set_ref
def __init__(self, scratch_dir, callback_url, workspace_url, srv_wiz_url): self.scratch_dir = scratch_dir self.rau = ReadsAlignmentUtils(callback_url) self.kbr = KBaseReport(callback_url) self.dfu = DataFileUtil(callback_url) self.gfu = GenomeFileUtil(callback_url) self.set_api = SetAPI(srv_wiz_url) self.ws = Workspace(workspace_url) self.valid_commands = ['bamqc', 'multi-bamqc']
def test_list_object_with_sets(self): ws_name1 = self.createWs() reads_obj_ref = self.__class__.example_reads_ref set_obj_name = "MyReadsSet.1" sapi = SetAPI(self.__class__.serviceWizardURL, token=self.getContext()['token'], service_ver=self.__class__.SetAPI_version) sapi.save_reads_set_v1({'workspace': ws_name1, 'output_object_name': set_obj_name, 'data': {'description': '', 'items': [{'ref': reads_obj_ref}]}}) list_ret = self.getImpl().list_objects_with_sets(self.getContext(), {"ws_name": ws_name1})[0] ret = list_ret['data'] self.assertTrue(len(ret) > 0) set_count = 0 for item in ret: self.assertTrue("object_info" in item) if "set_items" in item: set_count += 1 set_items = item["set_items"]["set_items_info"] self.assertEqual(1, len(set_items)) self.assertEqual(1, set_count) self.assertNotIn('data_palette_refs', list_ret) ws_id = self.getWsClient().get_workspace_info({"workspace": ws_name1})[0] list_ret = self.getImpl().list_objects_with_sets(self.getContext(), {"ws_name": ws_name1, "include_data_palettes": 1})[0] ret = list_ret['data'] self.assertTrue(len(ret) > 0) set_count = 0 for item in ret: self.assertTrue("object_info" in item) if "set_items" in item: set_count += 1 set_items = item["set_items"]["set_items_info"] self.assertEqual(1, len(set_items)) self.assertEqual(1, set_count) self.assertIn('data_palette_refs', list_ret) ws_id = self.getWsClient().get_workspace_info({"workspace": ws_name1})[0] ret2 = self.getImpl().list_objects_with_sets(self.getContext(), {"ws_id": ws_id})[0]["data"] self.assertEqual(len(ret), len(ret2)) type_filter = "KBaseSets.ReadsSet" ret3 = self.getImpl().list_objects_with_sets(self.getContext(), {"types": [type_filter], "workspaces": [str(ws_id)]})[0]["data"] self.assertTrue(len(ret3) > 0) for item in ret3: info = item['object_info'] obj_type = info[2].split('-')[0] self.assertEqual(type_filter, obj_type) type_filter = "KBaseGenomes.Genome" ret4 = self.getImpl().list_objects_with_sets(self.getContext(), {"types": [type_filter], "workspaces": [str(ws_id)]})[0]["data"] self.assertTrue(len(ret4) == 0)
def load_reads_set(srv_wiz_url, ws_name, reads_set, target_name): """ Combine a list of reads references into a ReadsSet. if file_rev is None or not a present key, then this is treated as a single end reads. """ set_client = SetAPI(srv_wiz_url) set_output = set_client.save_reads_set_v1({ "workspace": ws_name, "output_object_name": target_name, "data": { "description": "reads set for testing", "items": reads_set } }) return set_output["set_ref"]
def fetch_reads_refs_from_sampleset(ref, ws_url, srv_wiz_url): """ From the given object ref, return a list of all reads objects that are a part of that object. E.g., if ref is a ReadsSet, return a list of all PairedEndLibrary or SingleEndLibrary refs that are a member of that ReadsSet. This is returned as a list of dictionaries as follows: { "ref": reads object reference, "condition": condition string associated with that reads object, "name": reads object name (needed for saving an AlignmentSet) } The only one required is "ref", all other keys may or may not be present, based on the reads object or object type in initial ref variable. E.g. a RNASeqSampleSet might have condition info for each reads object, but a single PairedEndLibrary may not have that info. If ref is already a Reads library, just returns a list with ref as a single element. """ obj_type = get_object_type(ref, ws_url) refs = list() if "KBaseSets.ReadsSet" in obj_type or "KBaseRNASeq.RNASeqSampleSet" in obj_type: print("Looking up reads references in ReadsSet object") set_client = SetAPI(srv_wiz_url) reads_set = set_client.get_reads_set_v1({ "ref": ref, "include_item_info": 0, "include_set_item_ref_paths": 1 }) print("Got results from ReadsSet object") pprint(reads_set) ref_list = [r["ref_path"] for r in reads_set["data"]["items"]] reads_names = get_object_names(ref_list, ws_url) for reads in reads_set["data"]["items"]: ref = reads["ref_path"] refs.append({ "ref": ref, "condition": reads["label"], "name": reads_names[ref] }) elif ("KBaseAssembly.SingleEndLibrary" in obj_type or "KBaseFile.SingleEndLibrary" in obj_type or "KBaseAssembly.PairedEndLibrary" in obj_type or "KBaseFile.PairedEndLibrary" in obj_type): refs.append({"ref": ref, "name": get_object_names([ref], ws_url)[ref]}) else: raise ValueError("Unable to fetch reads reference from object {} " "which is a {}".format(ref, obj_type)) return refs
def test_two_users_set_inside_dp(self): ws_name1_1 = self.createWs() # Injecting reads object (real copy) into workspace1 orig_reads_obj_ref = self.__class__.example_reads_ref reads_obj_name = "TestReads" self.getWsClient().copy_object({'from': {'ref': orig_reads_obj_ref}, 'to': {'workspace': ws_name1_1, 'name': reads_obj_name}}) copy_reads_obj_ref = ws_name1_1 + '/' + reads_obj_name ws_name1_2 = self.createWs() set_obj_name = "MyReadsSet.1" sapi = SetAPI(self.__class__.serviceWizardURL, token=self.getContext()['token'], service_ver=self.__class__.SetAPI_version) sapi.save_reads_set_v1({'workspace': ws_name1_2, 'output_object_name': set_obj_name, 'data': {'description': '', 'items': [{'ref': copy_reads_obj_ref}]}}) orig_set_ref = ws_name1_2 + '/' + set_obj_name # Making DP-copy of reads set object by user2 ws_name2 = self.createWs2() # Let's share workspace containing set with user2 self.getWsClient().set_permissions({'workspace': ws_name1_2, 'new_permission': 'r', 'users': [self.getContext2()['user_id']]}) # Import reads set ref into DataPalette of third workspace dps = DataPaletteService(self.__class__.serviceWizardURL, token=self.getContext2()['token'], service_ver=self.__class__.DataPalette_version) dps.add_to_palette({'workspace': ws_name2, 'new_refs': [{'ref': orig_set_ref}]}) dp_ref_map = dps.list_data({'workspaces': [ws_name2]})['data_palette_refs'] set_ref_path = next(iter(dp_ref_map.values())) + ';' + orig_set_ref reads_ref_path = set_ref_path + ';' + copy_reads_obj_ref # Un-share original workspace self.getWsClient().set_permissions({'workspace': ws_name1_2, 'new_permission': 'n', 'users': [self.getContext2()['user_id']]}) # Let's check that we can list set and see reads object as set item ret = self.getImpl().list_objects_with_sets(self.getContext2(), {"ws_name": ws_name2})[0]["data"] self.assertEqual(1, len(ret)) item = ret[0] self.assertTrue('set_items' in item) self.assertTrue('set_items_info' in item['set_items']) self.assertEqual(1, len(item['set_items']['set_items_info'])) # Check access to reads and to set objects info = self.getWsClient2().get_object_info_new({'objects': [{'ref': set_ref_path}]})[0] self.assertEqual(set_obj_name, info[1]) info = self.getWsClient2().get_object_info_new({'objects': [{'ref': reads_ref_path}]})[0] self.assertEqual(reads_obj_name, info[1])
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.scratch = config['scratch'] self.user_id = config['USER_ID'] self.dfu = DataFileUtil(self.callback_url) self.genbank_import = ImportGenbankUtil(config) self.gff_fasta_import = ImportGFFFastaUtil(config) self.fasta_import = ImportAssemblyUtil(config) self.set_client = SetAPI(config['srv-wiz-url'])
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('kb_Msuite'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({'token': token, 'user_id': user_id, 'provenance': [ {'service': 'kb_Msuite', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = workspaceService(cls.wsURL) cls.serviceImpl = kb_Msuite(cls.cfg) cls.callback_url = os.environ['SDK_CALLBACK_URL'] cls.scratch = cls.cfg['scratch'] cls.suffix = int(time.time() * 1000) #cls.scratch = cls.cfg['scratch']+'_'+str(suffix) #cls.cfg['scratch'] = cls.scratch #if not os.path.exists(cls.scratch): # os.mkdir(cls.scratch) cls.checkm_runner = CheckMUtil(cls.cfg, cls.ctx) cls.wsName = "test_kb_Msuite_" + str(cls.suffix) cls.ws_info = cls.wsClient.create_workspace({'workspace': cls.wsName}) cls.au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) cls.setAPI = SetAPI(url=cls.cfg['srv-wiz-url'], token=cls.ctx['token']) cls.gfu = GenomeFileUtil(os.environ['SDK_CALLBACK_URL'], service_ver='dev') cls.mu = MetagenomeUtils(os.environ['SDK_CALLBACK_URL']) # stage an input and output directory """ cls.input_dir = os.path.join(cls.scratch, 'input_1') cls.output_dir = os.path.join(cls.scratch, 'output_1') cls.all_seq_fasta = os.path.join(cls.scratch, 'all_seq.fna') shutil.copytree(os.path.join('data', 'example_out', 'input'), cls.input_dir) shutil.copytree(os.path.join('data', 'example_out', 'output'), cls.output_dir) shutil.copy(os.path.join('data', 'example_out', 'all_seq.fna'), cls.all_seq_fasta) """ # prepare WS data cls.prepare_data()
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) test_time_stamp = int(time.time() * 1000) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('kb_Msuite'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'kb_Msuite', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = Workspace(cls.wsURL) cls.serviceImpl = kb_Msuite(cls.cfg) cls.callback_url = os.environ['SDK_CALLBACK_URL'] cls.scratch = cls.cfg['scratch'] cls.appdir = cls.cfg['appdir'] cls.test_data_dir = os.path.join(cls.scratch, 'test_data') cls.suffix = test_time_stamp cls.checkm_runner = CheckMUtil(cls.cfg, cls.ctx) cls.wsName = "test_kb_Msuite_" + str(cls.suffix) cls.ws_info = cls.wsClient.create_workspace({'workspace': cls.wsName}) cls.au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) cls.gfu = GenomeFileUtil(os.environ['SDK_CALLBACK_URL'], service_ver='dev') cls.mu = MetagenomeUtils(os.environ['SDK_CALLBACK_URL']) cls.setAPI = SetAPI(url=cls.cfg['srv-wiz-url'], token=cls.ctx['token']) cls.kr = KBaseReport(os.environ['SDK_CALLBACK_URL']) cls.data_loaded = False
def upload_alignment_set(self, alignment_items, alignmentset_name, ws_name): """ Compiles and saves a set of alignment references (+ other stuff) into a KBaseRNASeq.RNASeqAlignmentSet. Returns the reference to the new alignment set. alignment_items: [{ "ref": alignment_ref, "label": condition label. }] # alignment_info = dict like this: # { # reads_ref: { # "ref": alignment_ref # } # } # reads_info = dict like this: # { # reads_ref: { # "condition": "some condition" # } # } # input_params = global input params to HISAT2, also has ws_name for the target workspace. # alignmentset_name = name of final set object. """ print("Uploading completed alignment set") alignment_set = { "description": "Alignments using HISAT2, v.{}".format(HISAT_VERSION), "items": alignment_items } set_api = SetAPI(self.srv_wiz_url) set_info = set_api.save_reads_alignment_set_v1({ "workspace": ws_name, "output_object_name": alignmentset_name, "data": alignment_set }) return set_info["set_ref"]
def test_unique_items(self): # Create original workspace with reads object + ReadsSet object ws_name1 = self.createWs() foft = FakeObjectsForTests(os.environ['SDK_CALLBACK_URL']) reads_obj_name = "testreads1" foft.create_fake_reads({'ws_name': ws_name1, 'obj_names': [reads_obj_name]}) reads_obj_ref = ws_name1 + '/' + reads_obj_name set_obj_name = "testreads_set1" sapi = SetAPI(self.__class__.serviceWizardURL, token=self.getContext()['token'], service_ver=self.__class__.SetAPI_version) sapi.save_reads_set_v1({'workspace': ws_name1, 'output_object_name': set_obj_name, 'data': {'description': '', 'items': [{'ref': reads_obj_ref}]}}) set_obj_ref = ws_name1 + '/' + set_obj_name # Create workspace with DataPalette copy of Reads object and copy of ReadsSet ws_name2 = self.createWs() dps = DataPaletteService(self.__class__.serviceWizardURL, token=self.getContext()['token'], service_ver=self.__class__.DataPalette_version) dps.add_to_palette({'workspace': ws_name2, 'new_refs': [{'ref': reads_obj_ref}, {'ref': set_obj_ref}]}) # Check if listing in both these workspaces at the same time gives unique items ret = self.getImpl().list_objects_with_sets(self.getContext(), {"workspaces": [ws_name1, ws_name2]})[0]["data"] self.assertEqual(2, len(ret))
def __init__(self, config): self.ws_url = config["workspace-url"] self.callback_url = config["SDK_CALLBACK_URL"] self.token = config["KB_AUTH_TOKEN"] self.shock_url = config["shock-url"] self.srv_wiz_url = config["srv-wiz-url"] self.scratch = config["scratch"] self.dfu = DataFileUtil(self.callback_url) self.gfu = GenomeFileUtil(self.callback_url) self.rau = ReadsAlignmentUtils(self.callback_url) self.au = AssemblyUtil(self.callback_url) self.eu = ExpressionUtils(self.callback_url) self.ws = Workspace(self.ws_url, token=self.token) self.set_client = SetAPI(self.srv_wiz_url, service_ver="dev")
def fetch_reads_refs_from_sampleset(self, ref, info, validated_params): """ Note: adapted from kbaseapps/kb_hisat2 - file_util.py From the given object ref, return a list of all reads objects that are a part of that object. E.g., if ref is a ReadsSet, return a list of all PairedEndLibrary or SingleEndLibrary refs that are a member of that ReadsSet. This is returned as a list of dictionaries as follows: { "ref": reads object reference, "condition": condition string associated with that reads object } The only one required is "ref", all other keys may or may not be present, based on the reads object or object type in initial ref variable. E.g. a RNASeqSampleSet might have condition info for each reads object, but a single PairedEndLibrary may not have that info. If ref is already a Reads library, just returns a list with ref as a single element. """ obj_type = self.get_type_from_obj_info(info) refs = list() refs_for_ws_info = list() if "KBaseSets.ReadsSet" in obj_type or "KBaseRNASeq.RNASeqSampleSet" in obj_type: print("Looking up reads references in ReadsSet object") set_api = SetAPI(self.srv_wiz_url) reads_set = set_api.get_reads_set_v1({ 'ref': ref, 'include_item_info': 0, 'include_set_item_ref_paths': 1 }) for reads in reads_set["data"]["items"]: refs.append({ 'ref': reads['ref_path'], 'condition': reads['label'] }) refs_for_ws_info.append({'ref': reads['ref_path']}) else: raise ValueError("Unable to fetch reads reference from object {} " "which is a {}".format(ref, obj_type)) # get object info so we can name things properly infos = self.ws.get_object_info3({'objects': refs_for_ws_info})['infos'] name_ext = '_alignment' if 'output_alignment_suffix' in validated_params \ and validated_params['output_alignment_suffix'] is not None: ext = validated_params['output_alignment_suffix'].replace(' ', '') if ext: name_ext = ext unique_name_lookup = {} for k in range(0, len(refs)): refs[k]['info'] = infos[k] name = infos[k][1] if name not in unique_name_lookup: unique_name_lookup[name] = 1 else: unique_name_lookup[name] += 1 name = name + '_' + str(unique_name_lookup[name]) name = name + name_ext refs[k]['alignment_output_name'] = name return refs
def prepare_data(cls): workspace_id = cls.dfu.ws_name_to_id(cls.wsName) # upload genome object genbank_file_name = 'minimal.gbff' genbank_file_path = os.path.join(cls.scratch, genbank_file_name) shutil.copy(os.path.join('data', genbank_file_name), genbank_file_path) genome_object_name = 'test_Genome' cls.genome_ref = cls.gfu.genbank_to_genome({ 'file': { 'path': genbank_file_path }, 'workspace_name': cls.wsName, 'genome_name': genome_object_name })['genome_ref'] print('TEST genome_ref=' + cls.genome_ref) # upload assembly object file_name = 'test.fna' fasta_path = os.path.join(cls.scratch, file_name) shutil.copy(os.path.join('data', file_name), fasta_path) assembly_name = 'test_assembly' cls.assembly_ref = cls.au.save_assembly_from_fasta({ 'file': { 'path': fasta_path }, 'workspace_name': cls.wsName, 'assembly_name': assembly_name }) print('TEST assembly_ref=' + cls.assembly_ref) # upload reads object reads_file_name = 'Sample1.fastq' reads_file_path = os.path.join(cls.scratch, reads_file_name) shutil.copy(os.path.join('data', reads_file_name), reads_file_path) reads_object_name_1 = 'test_Reads_1' cls.reads_ref_1 = cls.ru.upload_reads({ 'fwd_file': reads_file_path, 'wsname': cls.wsName, 'sequencing_tech': 'Unknown', 'interleaved': 0, 'name': reads_object_name_1 })['obj_ref'] print(('TEST reads_ref_1=' + cls.reads_ref_1)) reads_object_name_2 = 'test_Reads_2' cls.reads_ref_2 = cls.ru.upload_reads({ 'fwd_file': reads_file_path, 'wsname': cls.wsName, 'sequencing_tech': 'Unknown', 'interleaved': 0, 'name': reads_object_name_2 })['obj_ref'] print('TEST reads_ref_2=' + cls.reads_ref_2) # upload alignment object alignment_file_name = 'accepted_hits.bam' alignment_file_path = os.path.join(cls.scratch, alignment_file_name) shutil.copy(os.path.join('data', alignment_file_name), alignment_file_path) alignment_object_name_1 = 'test_Alignment_1' cls.condition_1 = 'test_condition_1' cls.alignment_ref_1 = cls.rau.upload_alignment({ 'file_path': alignment_file_path, 'destination_ref': cls.wsName + '/' + alignment_object_name_1, 'read_library_ref': cls.reads_ref_1, 'condition': cls.condition_1, 'library_type': 'single_end', 'assembly_or_genome_ref': cls.genome_ref })['obj_ref'] print('TEST alignment_ref_1=' + cls.alignment_ref_1) alignment_object_name_2 = 'test_Alignment_2' cls.condition_2 = 'test_condition_2' cls.alignment_ref_2 = cls.rau.upload_alignment({ 'file_path': alignment_file_path, 'destination_ref': cls.wsName + '/' + alignment_object_name_2, 'read_library_ref': cls.reads_ref_2, 'condition': cls.condition_2, 'library_type': 'single_end', 'assembly_or_genome_ref': cls.genome_ref })['obj_ref'] print('TEST alignment_ref_2=' + cls.alignment_ref_2) alignment_object_name_3 = 'test_Alignment_3' cls.condition_3 = 'test_condition_3' cls.alignment_ref_3 = cls.rau.upload_alignment({ 'file_path': alignment_file_path, 'destination_ref': cls.wsName + '/' + alignment_object_name_3, 'read_library_ref': cls.reads_ref_2, 'condition': cls.condition_3, 'library_type': 'single_end', 'assembly_or_genome_ref': cls.assembly_ref })['obj_ref'] print('TEST alignment_ref_3=' + cls.alignment_ref_3) # upload sample_set object sample_set_object_name = 'test_Sample_Set' sample_set_data = { 'sampleset_id': sample_set_object_name, 'sample_ids': [cls.reads_ref_1, cls.reads_ref_2], 'sampleset_desc': 'test sampleset object', 'Library_type': 'SingleEnd', 'condition': [cls.condition_1, cls.condition_2], 'domain': 'Unknown', 'num_samples': 2, 'platform': 'Unknown' } save_object_params = { 'id': workspace_id, 'objects': [{ 'type': 'KBaseRNASeq.RNASeqSampleSet', 'data': sample_set_data, 'name': sample_set_object_name }] } dfu_oi = cls.dfu.save_objects(save_object_params)[0] cls.sample_set_ref = str(dfu_oi[6]) + '/' + str(dfu_oi[0]) + '/' + str( dfu_oi[4]) print(('TEST sample_set_ref=' + cls.sample_set_ref)) # upload alignment_set object object_type = 'KBaseRNASeq.RNASeqAlignmentSet' alignment_set_object_name = 'test_Alignment_Set' alignment_set_data = { 'genome_id': cls.genome_ref, 'read_sample_ids': [reads_object_name_1, reads_object_name_2], 'mapped_rnaseq_alignments': [{ reads_object_name_1: alignment_object_name_1 }, { reads_object_name_2: alignment_object_name_2 }], 'mapped_alignments_ids': [{ reads_object_name_1: cls.alignment_ref_1 }, { reads_object_name_2: cls.alignment_ref_2 }], 'sample_alignments': [cls.alignment_ref_1, cls.alignment_ref_2], 'sampleset_id': cls.sample_set_ref } save_object_params = { 'id': workspace_id, 'objects': [{ 'type': object_type, 'data': alignment_set_data, 'name': alignment_set_object_name }] } dfu_oi = cls.dfu.save_objects(save_object_params)[0] cls.old_alignment_set_ref = str(dfu_oi[6]) + '/' + str( dfu_oi[0]) + '/' + str(dfu_oi[4]) print('TEST (legacy) KBaseRNASeq.alignment_set_ref=' + cls.old_alignment_set_ref) # Save the alignment set items = [{ 'ref': cls.alignment_ref_1, 'label': 'c1' }, { 'ref': cls.alignment_ref_2, 'label': 'c2' }] alignment_set_data = {'description': '', 'items': items} alignment_set_save_params = { 'data': alignment_set_data, 'workspace': cls.wsName, 'output_object_name': 'MyReadsAlignmentSet' } set_api = SetAPI(cls.srv_wiz_url) save_result = set_api.save_reads_alignment_set_v1( alignment_set_save_params) cls.new_alignment_set_ref = save_result['set_ref'] print('TEST KBaseSet.alignment_set_ref=') print(cls.new_alignment_set_ref)
def stage_input(self, input_ref, fasta_file_extension): ''' Stage input based on an input data reference for CheckM input_ref can be a reference to an Assembly, BinnedContigs, or (not yet implemented) a Genome This method creates a directory in the scratch area with the set of Fasta files, names will have the fasta_file_extension parameter tacked on. ex: staged_input = stage_input('124/15/1', 'fna') staged_input {"input_dir": '...'} ''' # config #SERVICE_VER = 'dev' SERVICE_VER = 'release' [OBJID_I, NAME_I, TYPE_I, SAVE_DATE_I, VERSION_I, SAVED_BY_I, WSID_I, WORKSPACE_I, CHSUM_I, SIZE_I, META_I] = range(11) # object_info tuple ws = Workspace(self.ws_url) # 1) generate a folder in scratch to hold the input suffix = str(int(time.time() * 1000)) input_dir = os.path.join(self.scratch, 'bins_' + suffix) all_seq_fasta = os.path.join(self.scratch, 'all_sequences_' + suffix + '.' + fasta_file_extension) if not os.path.exists(input_dir): os.makedirs(input_dir) # 2) based on type, download the files obj_name = self.get_data_obj_name (input_ref) type_name = self.get_data_obj_type (input_ref) # auClient try: auClient = AssemblyUtil(self.callbackURL, token=self.ctx['token'], service_ver=SERVICE_VER) except Exception as e: raise ValueError('Unable to instantiate auClient with callbackURL: '+ self.callbackURL +' ERROR: ' + str(e)) # setAPI_Client try: #setAPI_Client = SetAPI (url=self.callbackURL, token=self.ctx['token']) # for SDK local. local doesn't work for SetAPI setAPI_Client = SetAPI (url=self.serviceWizardURL, token=self.ctx['token']) # for dynamic service except Exception as e: raise ValueError('Unable to instantiate setAPI_Client with serviceWizardURL: '+ self.serviceWizardURL +' ERROR: ' + str(e)) # mguClient try: mguClient = MetagenomeUtils(self.callbackURL, token=self.ctx['token'], service_ver=SERVICE_VER) except Exception as e: raise ValueError('Unable to instantiate mguClient with callbackURL: '+ self.callbackURL +' ERROR: ' + str(e)) # Standard Single Assembly # if type_name in ['KBaseGenomeAnnotations.Assembly', 'KBaseGenomes.ContigSet']: # create file data filename = os.path.join(input_dir, obj_name + '.' + fasta_file_extension) auClient.get_assembly_as_fasta({'ref': input_ref, 'filename': filename}) if not os.path.isfile(filename): raise ValueError('Error generating fasta file from an Assembly or ContigSet with AssemblyUtil') # make sure fasta file isn't empty min_fasta_len = 1 if not self.fasta_seq_len_at_least(filename, min_fasta_len): raise ValueError('Assembly or ContigSet is empty in filename: '+str(filename)) # AssemblySet # elif type_name == 'KBaseSets.AssemblySet': # read assemblySet try: assemblySet_obj = setAPI_Client.get_assembly_set_v1 ({'ref':input_ref, 'include_item_info':1}) except Exception as e: raise ValueError('Unable to get object from workspace: (' + input_ref +')' + str(e)) assembly_refs = [] assembly_names = [] for assembly_item in assemblySet_obj['data']['items']: this_assembly_ref = assembly_item['ref'] # assembly obj info try: this_assembly_info = ws.get_object_info_new ({'objects':[{'ref':this_assembly_ref}]})[0] this_assembly_name = this_assembly_info[NAME_I] except Exception as e: raise ValueError('Unable to get object from workspace: (' + this_assembly_ref +'): ' + str(e)) assembly_refs.append(this_assembly_ref) assembly_names.append(this_assembly_name) # create file data (name for file is what's reported in results) for ass_i,assembly_ref in enumerate(assembly_refs): this_name = assembly_names[ass_i] filename = os.path.join(input_dir, this_name + '.' + fasta_file_extension) auClient.get_assembly_as_fasta({'ref': assembly_ref, 'filename': filename}) if not os.path.isfile(filename): raise ValueError('Error generating fasta file from an Assembly or ContigSet with AssemblyUtil') # make sure fasta file isn't empty min_fasta_len = 1 if not self.fasta_seq_len_at_least(filename, min_fasta_len): raise ValueError('Assembly or ContigSet is empty in filename: '+str(filename)) # Binned Contigs # elif type_name == 'KBaseMetagenomes.BinnedContigs': # download the bins as fasta and set the input folder name bin_file_dir = mguClient.binned_contigs_to_file({'input_ref': input_ref, 'save_to_shock': 0})['bin_file_directory'] os.rename(bin_file_dir, input_dir) # make sure fasta file isn't empty self.set_fasta_file_extensions(input_dir, fasta_file_extension) for (dirpath, dirnames, filenames) in os.walk(input_dir): for fasta_file in filenames: fasta_path = os.path.join (input_dir,fasta_file) min_fasta_len = 1 if not self.fasta_seq_len_at_least(fasta_path, min_fasta_len): raise ValueError('Binned Assembly is empty for fasta_path: '+str(fasta_path)) break # Genome and GenomeSet # elif type_name == 'KBaseGenomes.Genome' or type_name == 'KBaseSearch.GenomeSet': genome_obj_names = [] genome_sci_names = [] genome_assembly_refs = [] if type_name == 'KBaseGenomes.Genome': genomeSet_refs = [input_ref] else: # get genomeSet_refs from GenomeSet object genomeSet_refs = [] try: genomeSet_object = ws.get_objects2({'objects':[{'ref':input_ref}]})['data'][0]['data'] except Exception as e: raise ValueError('Unable to fetch '+str(input_ref)+' object from workspace: ' + str(e)) #to get the full stack trace: traceback.format_exc() # iterate through genomeSet members for genome_id in genomeSet_object['elements'].keys(): if 'ref' not in genomeSet_object['elements'][genome_id] or \ genomeSet_object['elements'][genome_id]['ref'] == None or \ genomeSet_object['elements'][genome_id]['ref'] == '': raise ValueError('genome_ref not found for genome_id: '+str(genome_id)+' in genomeSet: '+str(input_ref)) else: genomeSet_refs.append(genomeSet_object['elements'][genome_id]['ref']) # genome obj data for i,this_input_ref in enumerate(genomeSet_refs): try: objects = ws.get_objects2({'objects':[{'ref':this_input_ref}]})['data'] genome_obj = objects[0]['data'] genome_obj_info = objects[0]['info'] genome_obj_names.append(genome_obj_info[NAME_I]) genome_sci_names.append(genome_obj['scientific_name']) except: raise ValueError ("unable to fetch genome: "+this_input_ref) # Get genome_assembly_ref if ('contigset_ref' not in genome_obj or genome_obj['contigset_ref'] == None) \ and ('assembly_ref' not in genome_obj or genome_obj['assembly_ref'] == None): msg = "Genome "+genome_obj_names[i]+" (ref:"+input_ref+") "+genome_sci_names[i]+" MISSING BOTH contigset_ref AND assembly_ref. Cannot process. Exiting." raise ValueError (msg) continue elif 'assembly_ref' in genome_obj and genome_obj['assembly_ref'] != None: msg = "Genome "+genome_obj_names[i]+" (ref:"+input_ref+") "+genome_sci_names[i]+" USING assembly_ref: "+str(genome_obj['assembly_ref']) print (msg) genome_assembly_refs.append(genome_obj['assembly_ref']) elif 'contigset_ref' in genome_obj and genome_obj['contigset_ref'] != None: msg = "Genome "+genome_obj_names[i]+" (ref:"+input_ref+") "+genome_sci_names[i]+" USING contigset_ref: "+str(genome_obj['contigset_ref']) print (msg) genome_assembly_refs.append(genome_obj['contigset_ref']) # create file data (name for file is what's reported in results) for ass_i,assembly_ref in enumerate(genome_assembly_refs): this_name = genome_obj_names[ass_i] filename = os.path.join(input_dir, this_name + '.' + fasta_file_extension) auClient.get_assembly_as_fasta({'ref': assembly_ref, 'filename': filename}) if not os.path.isfile(filename): raise ValueError('Error generating fasta file from an Assembly or ContigSet with AssemblyUtil') # make sure fasta file isn't empty min_fasta_len = 1 if not self.fasta_seq_len_at_least(filename, min_fasta_len): raise ValueError('Assembly or ContigSet is empty in filename: '+str(filename)) # Unknown type slipped through # else: raise ValueError('Cannot stage fasta file input directory from type: ' + type_name) # create summary fasta file with all bins self.cat_fasta_files(input_dir, fasta_file_extension, all_seq_fasta) return {'input_dir': input_dir, 'folder_suffix': suffix, 'all_seq_fasta': all_seq_fasta}
def test_fractiontate_contigs_ASSEMBLY_ASSEMBLYSET_07(self): method = 'fractionate_contigs_pos_filter_ASSEMBLY_ASSEMBLYSET_07' print("\n\nRUNNING: test_" + method + "()") print("==========================================================\n\n") # upload test data try: auClient = AssemblyUtil(self.callback_url, token=self.getContext()['token']) except Exception as e: raise ValueError( 'Unable to instantiate auClient with callbackURL: ' + self.callback_url + ' ERROR: ' + str(e)) try: setAPI_Client = SetAPI(self.serviceWizardURL, token=self.getContext()['token']) except Exception as e: raise ValueError( 'Unable to instantiate setAPI_Client with serviceWizardURL: ' + self.serviceWizardURL + ' ERROR: ' + str(e)) base_1 = 'assembly_1plus2' base_2a = 'assembly_2a' base_2b = 'assembly_2b' type_1 = 'Assembly' type_2a = 'Assembly' type_2b = 'Assembly' ass_file_1_fa = base_1 + '.fa.gz' ass_file_2a_fa = base_2a + '.fa.gz' ass_file_2b_fa = base_2b + '.fa.gz' ass_path_1_fa = os.path.join(self.scratch, ass_file_1_fa) ass_path_2a_fa = os.path.join(self.scratch, ass_file_2a_fa) ass_path_2b_fa = os.path.join(self.scratch, ass_file_2b_fa) shutil.copy(os.path.join("data", ass_file_1_fa), ass_path_1_fa) shutil.copy(os.path.join("data", ass_file_2a_fa), ass_path_2a_fa) shutil.copy(os.path.join("data", ass_file_2b_fa), ass_path_2b_fa) ass_ref_1 = auClient.save_assembly_from_fasta({ 'file': { 'path': ass_path_1_fa }, 'workspace_name': self.getWsName(), 'assembly_name': base_1 + '.' + type_1 }) ass_ref_2a = auClient.save_assembly_from_fasta({ 'file': { 'path': ass_path_2a_fa }, 'workspace_name': self.getWsName(), 'assembly_name': base_2a + '.' + type_2a }) ass_ref_2b = auClient.save_assembly_from_fasta({ 'file': { 'path': ass_path_2b_fa }, 'workspace_name': self.getWsName(), 'assembly_name': base_2b + '.' + type_2b }) # AssemblySet assemblySet_items = [{ 'ref': ass_ref_2a, 'label': 'assembly_2a' }, { 'ref': ass_ref_2b, 'label': 'assembly_2b' }] assemblySet_obj = { 'description': 'test assemblySet', 'items': assemblySet_items } assemblySet_ref = setAPI_Client.save_assembly_set_v1({ 'workspace_name': self.getWsName(), 'output_object_name': 'assembly_2a2b.AssemblySet', 'data': assemblySet_obj })['set_ref'] # run method base_output_name = method + '_output' fractionate_mode = 'neg' params = { 'workspace_name': self.getWsName(), 'input_assembly_ref': ass_ref_1, 'input_pos_filter_obj_refs': [assemblySet_ref], 'fractionate_mode': fractionate_mode, 'output_name': 'test_fractionated' + '-' + base_1 + '.' + type_1 + '-' + 'assemblyset_2a2b' + '-' + fractionate_mode } result = self.getImpl().run_fractionate_contigs( self.getContext(), params) print('RESULT:') pprint(result) pass
class QualiMapRunner: QUALIMAP_PATH = '/kb/module/qualimap-bin/qualimap' JAVA_MEM_DEFAULT_SIZE = '16G' LARGE_BAM_FILE_SIZE = 20 * 1024 * 1024 * 1024 # 20 GB TIMEOUT = 72 * 60 * 60 # 72 hours def _get_file_size(self, file_path): file_size = os.path.getsize(file_path) print('File size: {} -- {}'.format(file_size, file_path)) return file_size def _large_file(self, file_path): filename, file_extension = os.path.splitext(file_path) multiplier = 0 if file_extension == '.txt': total_file_size = 0 with open(file_path, 'r') as f: for line in f: bam_file_path = line.split('\t')[1] total_file_size += self._get_file_size(bam_file_path) print('Total file size: {}'.format(total_file_size)) multiplier = int(total_file_size) // int(self.LARGE_BAM_FILE_SIZE) else: multiplier = int(self._get_file_size(file_path)) // int( self.LARGE_BAM_FILE_SIZE) print('setting number of windows multiplier to: {}'.format(multiplier)) return multiplier def _timeout_handler(self, signum, frame): print('Signal handler called with signal', signum) raise ValueError('QualiMap takes too long') def __init__(self, scratch_dir, callback_url, workspace_url, srv_wiz_url): self.scratch_dir = scratch_dir self.rau = ReadsAlignmentUtils(callback_url) self.kbr = KBaseReport(callback_url) self.dfu = DataFileUtil(callback_url) self.gfu = GenomeFileUtil(callback_url) self.set_api = SetAPI(srv_wiz_url) self.ws = Workspace(workspace_url) self.valid_commands = ['bamqc', 'multi-bamqc'] def run_app(self, params): self.validate_params(params) print('Validated Params = ') pprint(params) run_info = self.get_run_info(params) if run_info.get('mode') not in ['single', 'multi']: raise ValueError( 'Error in fetching the type to determine run settings.') run_error = False try: signal.signal(signal.SIGALRM, self._timeout_handler) signal.alarm(self.TIMEOUT) if run_info['mode'] == 'single': result = self.run_bamqc(params['input_ref'], run_info['input_info']) elif run_info['mode'] == 'multi': result = self.run_multi_sample_qc(params['input_ref'], run_info['input_info']) signal.alarm(0) except Exception: run_error = True workdir = os.path.join(self.scratch_dir, 'qualimap_' + str(int(time.time() * 10000))) os.makedirs(workdir) with open(os.path.join(workdir, 'qualimapReport.html'), 'w') as report: report.write('<html><body><p></p></body></html>') package_info = self.package_output_folder( workdir, 'QualiMap_report', 'EMPTY HTML report directory for QualiMap BAM QC', 'qualimapReport.html') result = { 'qc_result_folder_path': workdir, 'qc_result_zip_info': package_info, 'shock_id': None } error_msg = 'Running QualiMap returned an error:\n{}\n'.format( traceback.format_exc()) error_msg += 'Generating simple report instead\n' print(error_msg) if params['create_report']: result = self.create_report(result, params['output_workspace'], run_error, params['input_ref']) return result def create_report(self, result, output_workspace, run_error=None, input_ref=None): if run_error: objects_created = [] info = self.get_obj_info(input_ref) obj_type = self.get_type_from_obj_info(info) if obj_type in ['KBaseRNASeq.RNASeqAlignment']: objects_created.append({ 'ref': input_ref, 'description': 'Alignment' }) if obj_type in [ 'KBaseRNASeq.RNASeqAlignmentSet', 'KBaseSets.ReadsAlignmentSet' ]: objects_created.append({ 'ref': input_ref, 'description': 'AlignmentSet' }) reads_alignment_info = self.get_alignments_from_set(input_ref) for alignment in reads_alignment_info: alignment_ref = alignment.get('ref') objects_created.append({ 'ref': alignment_ref, 'description': 'Alignment' }) report_info = self.kbr.create_extended_report({ 'message': ' ', 'objects_created': objects_created, 'report_object_name': 'qualimap_report' + str(uuid.uuid4()), 'workspace_name': output_workspace }) result['report_name'] = report_info['name'] result['report_ref'] = report_info['ref'] return result qc_result_zip_info = result['qc_result_zip_info'] report_info = self.kbr.create_extended_report({ 'message': '', 'objects_created': [], 'direct_html_link_index': 0, 'html_links': [{ 'shock_id': qc_result_zip_info['shock_id'], 'name': qc_result_zip_info['index_html_file_name'], 'label': qc_result_zip_info['name'] }], 'report_object_name': 'qualimap_report' + str(uuid.uuid4()), 'workspace_name': output_workspace }) result['report_name'] = report_info['name'] result['report_ref'] = report_info['ref'] return result def get_gtf_file(self, input_ref, set_op=False): print('Start fetching GFF file from genome') if set_op: set_data = self.set_api.get_reads_alignment_set_v1({ 'ref': input_ref, 'include_item_info': 1 }) input_ref = set_data['data']['items'][0]['ref'] obj_data = self.dfu.get_objects({"object_refs": [input_ref]})['data'][0]['data'] genome_ref = obj_data.get('genome_id') if not genome_ref: raise ValueError( 'Alignment is not associated with a Genome object') result_directory = os.path.join(self.scratch_dir, str(uuid.uuid4())) os.makedirs(result_directory) genome_gtf_file = self.gfu.genome_to_gff({ 'genome_ref': genome_ref, 'is_gtf': True, 'target_dir': result_directory })['file_path'] return genome_gtf_file def run_bamqc(self, input_ref, input_info): # download the input and setup a working dir alignment_info = self.rau.download_alignment({'source_ref': input_ref}) bam_file_path = self.find_my_bam_file( alignment_info['destination_dir']) try: gtf_file = self.get_gtf_file(input_ref) except: gtf_file = '' workdir = os.path.join(self.scratch_dir, 'qualimap_' + str(int(time.time() * 10000))) options = [ '-bam', bam_file_path, '-c', '-outdir', workdir, '-outformat', 'html' ] if gtf_file: options += ['-gff', gtf_file] options.append('--java-mem-size={}'.format( self.JAVA_MEM_DEFAULT_SIZE)) # always use large mem multiplier = self._large_file(bam_file_path) if multiplier: window_size = multiplier * 400 print(f'using larger window size: {window_size} and Java memory: ' f'{self.JAVA_MEM_DEFAULT_SIZE}') options.append( '-nw {}'.format(window_size)) # increase size of windows self.run_cli_command('bamqc', options) package_info = self.package_output_folder( workdir, 'QualiMap_report', 'HTML report directory for QualiMap BAM QC', 'qualimapReport.html') return { 'qc_result_folder_path': workdir, 'qc_result_zip_info': package_info } def run_multi_sample_qc(self, input_ref, input_info): # download the input and setup a working dir reads_alignment_info = self.get_alignments_from_set(input_ref) try: gtf_file = self.get_gtf_file(input_ref, set_op=True) except: gtf_file = '' suffix = 'qualimap_' + str(int(time.time() * 10000)) workdir = os.path.join(self.scratch_dir, suffix) os.makedirs(workdir) input_file_path = self.create_multi_qualimap_cfg( reads_alignment_info, workdir) options = [ '-d', input_file_path, '-r', '-c', '-outdir', workdir, '-outformat', 'html' ] if gtf_file: options += ['-gff', gtf_file] multiplier = self._large_file(input_file_path) if multiplier: window_size = multiplier * 400 print(f'using larger window size: {window_size} and Java memory: ' f'{self.JAVA_MEM_DEFAULT_SIZE}') options.append(f'-nw {window_size}') # increase size of windows options.append(f'--java-mem-size={self.JAVA_MEM_DEFAULT_SIZE}') self.run_cli_command('multi-bamqc', options) package_info = self.package_output_folder( workdir, 'QualiMap_report', 'HTML report directory for QualiMap Multi-sample BAM QC', 'multisampleBamQcReport.html') return { 'qc_result_folder_path': workdir, 'qc_result_zip_info': package_info } def get_alignments_from_set(self, alignment_set_ref): set_data = self.set_api.get_reads_alignment_set_v1({ 'ref': alignment_set_ref, 'include_item_info': 1 }) items = set_data['data']['items'] reads_alignment_data = [] for alignment in items: alignment_info = self.rau.download_alignment( {'source_ref': alignment['ref']}) bam_file_path = self.find_my_bam_file( alignment_info['destination_dir']) label = None if 'label' in alignment: label = alignment['label'] reads_alignment_data.append({ 'bam_file_path': bam_file_path, 'ref': alignment['ref'], 'label': label, 'info': alignment['info'] }) return reads_alignment_data def create_multi_qualimap_cfg(self, reads_alignment_info, workdir): # Group by labels if there is at least one defined use_labels = False for alignment in reads_alignment_info: if alignment['label']: use_labels = True break # write the file input_file_path = os.path.join(workdir, 'multi_input.txt') input_file = open(input_file_path, 'w') name_lookup = {} for alignment in reads_alignment_info: name = alignment['info'][1] if name in name_lookup: name_lookup[name] += 1 name = name + '_' + str(name_lookup[name]) else: name_lookup[name] = 1 input_file.write(name + '\t' + alignment['bam_file_path']) if use_labels: if alignment['label']: input_file.write('\t' + alignment['label']) else: input_file.write('\tunlabeled') input_file.write('\n') input_file.close() return input_file_path def get_run_info(self, params): info = self.get_obj_info(params['input_ref']) obj_type = self.get_type_from_obj_info(info) if obj_type in ['KBaseRNASeq.RNASeqAlignment']: return {'mode': 'single', 'input_info': info} if obj_type in [ 'KBaseRNASeq.RNASeqAlignmentSet', 'KBaseSets.ReadsAlignmentSet' ]: return {'mode': 'multi', 'input_info': info} raise ValueError('Object type of input_ref is not valid, was: ' + str(obj_type)) def validate_params(self, params): if 'input_ref' not in params: raise ValueError( 'required parameter field "input_ref" was not set') create_report = False if 'create_report' in params: if int(params['create_report']) == 1: if 'output_workspace' not in params: raise ValueError( 'If "create_report" was set, then "output_workspace" is required' ) if not params['output_workspace']: raise ValueError( 'If "create_report" was set, then "output_workspace" is required' ) create_report = True params['create_report'] = create_report def run_cli_command(self, command, options, cwd=None): if command not in self.valid_commands: raise ValueError('Invalid QualiMap command: ' + str(command)) command = [self.QUALIMAP_PATH, command] + options print('Running: ' + ' '.join(command)) if not cwd: cwd = self.scratch_dir p = subprocess.Popen(command, cwd=cwd, shell=False) exitCode = p.wait() if exitCode == 0: print('Success, exit code was: ' + str(exitCode)) else: raise ValueError('Error running command: ' + ' '.join(command) + '\n' + 'Exit Code: ' + str(exitCode)) def find_my_bam_file(self, dirpath): bam_path = None for f in os.listdir(dirpath): fullpath = os.path.join(dirpath, f) if os.path.isfile(fullpath) and f.lower().endswith('.bam'): if bam_path is not None: raise ValueError( 'Error! Too many BAM files were downloaded for this alignment!' ) bam_path = fullpath if bam_path is None: raise ValueError( 'Error! No BAM files were downloaded for this alignment!') return bam_path def package_output_folder(self, folder_path, zip_file_name, zip_file_description, index_html_file): """ Simple utility for packaging a folder and saving to shock """ output = self.dfu.file_to_shock({ 'file_path': folder_path, 'make_handle': 0, 'pack': 'zip' }) return { 'shock_id': output['shock_id'], 'name': zip_file_name, 'description': zip_file_description, 'index_html_file_name': index_html_file } def get_type_from_obj_info(self, info): return info[2].split('-')[0] def get_obj_info(self, ref): return self.ws.get_object_info3({'objects': [{ 'ref': ref }]})['infos'][0]
def process_batch_result(self, batch_result, validated_params, reads, input_set_info): n_jobs = len(batch_result['results']) n_success = 0 n_error = 0 ran_locally = 0 ran_njsw = 0 # reads alignment set items items = [] objects_created = [] for k in range(0, len(batch_result['results'])): job = batch_result['results'][k] result_package = job['result_package'] if job['is_error']: n_error += 1 else: n_success += 1 print(result_package['result']) print(result_package['result'][0]) print(result_package['result'][0]['output_info']) output_info = result_package['result'][0]['output_info'] ra_ref = output_info['upload_results']['obj_ref'] # Note: could add a label to the alignment here? items.append({'ref': ra_ref, 'label': reads[k]['condition']}) objects_created.append({'ref': ra_ref}) if result_package['run_context']['location'] == 'local': ran_locally += 1 if result_package['run_context']['location'] == 'njsw': ran_njsw += 1 # Save the alignment set alignment_set_data = {'description': '', 'items': items} alignment_set_save_params = { 'data': alignment_set_data, 'workspace': validated_params['output_workspace'], 'output_object_name': str(input_set_info[1]) + validated_params['output_obj_name_suffix'] } set_api = SetAPI(self.srv_wiz_url) save_result = set_api.save_reads_alignment_set_v1( alignment_set_save_params) print('Saved ReadsAlignment=') pprint(save_result) objects_created.append({ 'ref': save_result['set_ref'], 'description': 'Set of all reads alignments generated' }) set_name = save_result['set_info'][1] # run qualimap qualimap_report = self.qualimap.run_bamqc( {'input_ref': save_result['set_ref']}) qc_result_zip_info = qualimap_report['qc_result_zip_info'] # create the report report_text = 'Ran on SampleSet or ReadsSet.\n\n' report_text = 'Created ReadsAlignmentSet: ' + str(set_name) + '\n\n' report_text += 'Total ReadsLibraries = ' + str(n_jobs) + '\n' report_text += ' Successful runs = ' + str(n_success) + '\n' report_text += ' Failed runs = ' + str(n_error) + '\n' report_text += ' Ran on main node = ' + str(ran_locally) + '\n' report_text += ' Ran on remote worker = ' + str(ran_njsw) + '\n\n' print('Report text=') print(report_text) kbr = KBaseReport(self.callback_url) report_info = kbr.create_extended_report({ 'message': report_text, 'objects_created': objects_created, 'report_object_name': 'kb_Bwa_' + str(uuid.uuid4()), 'direct_html_link_index': 0, 'html_links': [{ 'shock_id': qc_result_zip_info['shock_id'], 'name': qc_result_zip_info['index_html_file_name'], 'label': qc_result_zip_info['name'] }], 'workspace_name': validated_params['output_workspace'] }) result = { 'report_info': { 'report_name': report_info['name'], 'report_ref': report_info['ref'] } } result['batch_output_info'] = batch_result return result