def get_ws_admins(ws_id, ws_url, admin_token): ws = Workspace(url=ws_url, token=admin_token) perms = ws.administer({ "command": "getPermissionsMass", "params": { "workspaces": [{ "id": ws_id }] } })['perms'][0] admins = list() for u in perms: if perms[u] == "a": admins.append(u) return admins
def __init__(self, config): #BEGIN_CONSTRUCTOR self.callback_url = os.environ['SDK_CALLBACK_URL'] self.shared_folder = config['scratch'] self.du = DownloadUtils(self.callback_url) self.su = SimUtils() self.ru = ReadsUtils(self.callback_url) self.vu = VariationUtil(self.callback_url) self.eu = VcfEvalUtils() self.hu = htmlreportutils() self.ws_url = config['workspace-url'] self.wsc = Workspace(self.ws_url) logging.basicConfig(format='%(created)s %(levelname)s: %(message)s', level=logging.INFO) #END_CONSTRUCTOR pass
def __init__(self, config, varfiles): self.dfu = DataFileUtil(config['SDK_CALLBACK_URL']) # TODO: input variable for workspace url self.wsc = Workspace("https://appdev.kbase.us/services/ws") self.scratch = config["scratch"] self._process_varfiles(varfiles)
def setUpClass(cls): token = os.environ.get('KB_AUTH_TOKEN', None) config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('MotifFindermfmd'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'MotifFindermfmd', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = Workspace(cls.wsURL) cls.serviceImpl = MotifFindermfmd(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL']
def get_annotated_metagenome_assembly(self, ctx, params): """ :param params: instance of type "getAnnotatedMetagenomeAssemblyParams" (ref - workspace reference to AnnotatedMetagenomeAssembly Object included_fields - The fields to include from the Object included_feature_fields -) -> structure: parameter "ref" of String, parameter "included_fields" of list of String, parameter "included_feature_fields" of list of String :returns: instance of type "getAnnotatedMetagenomeAssemblyOutput" -> structure: """ # ctx is the context object # return variables are: output #BEGIN get_annotated_metagenome_assembly ws = Workspace(self.config['workspace-url'], token=ctx['token']) ama_utils = AMAUtils(ws) output = ama_utils.get_annotated_metagenome_assembly(params) #END get_annotated_metagenome_assembly # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError( 'Method get_annotated_metagenome_assembly return value ' + 'output is not type dict as required.') # return the results return [output]
def setUpClass(cls): token = os.environ.get('KB_AUTH_TOKEN', None) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({'token': token, 'provenance': [ {'service': 'GenomeFileUtil', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) # type: ignore for nameval in config.items('GenomeFileUtil'): cls.cfg[nameval[0]] = nameval[1] cls.wsURL = cls.cfg['workspace-url'] cls.ws = Workspace(cls.wsURL, token=token) cls.gfu = GenomeFileUtil(cls.cfg) # create one WS for all tests suffix = int(time.time() * 1000) cls.ws_name = "test_GenomeAnnotationAPI_" + str(suffix) ws_info = cls.ws.create_workspace({'workspace': cls.ws_name}) cls.ws_id = ws_info[0]
def setUpClass(cls): token = os.environ.get('KB_AUTH_TOKEN', None) config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('kb_model_analysis'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({'token': token, 'user_id': user_id, 'provenance': [ {'service': 'kb_model_analysis', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1}) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = Workspace(cls.wsURL) cls.serviceImpl = kb_model_analysis(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] suffix = int(time.time() * 1000) cls.wsName = "test_ContigFilter_" + str(suffix) ret = cls.wsClient.create_workspace({'workspace': cls.wsName}) # noqa cls.wsId = ret[0]
def get_object_names(ref_list, ws_url): """ From a list of workspace references, returns a mapping from ref -> name of the object. """ ws = Workspace(ws_url) obj_ids = list() for ref in ref_list: obj_ids.append({"ref": ref}) info = ws.get_object_info3({"objects": obj_ids}) name_map = dict() # might be in a data palette, so we can't just use the ref. # we already have the refs as passed previously, so use those for mapping, as they're in # the same order as what's returned. for i in range(len(info["infos"])): name_map[ref_list[i]] = info["infos"][i][1] return name_map
def __init__(self, prj_dir, config): self.workspace_url = config['workspace-url'] self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] if 'shock-url' in config: self.shock_url = config['shock-url'] if 'handle-service-url' in config: self.handle_url = config['handle-service-url'] self.ws_client = Workspace(self.workspace_url, token=self.token) self.ru = ReadsUtils(self.callback_url, token=self.token) self.au = AssemblyUtil(self.callback_url, token=self.token) self.kbr = KBaseReport(self.callback_url) self.kbq = kb_quast(self.callback_url) self.proj_dir = prj_dir self.prog_runner = Program_Runner(self.MaSuRCA_BIN, self.proj_dir)
def read_narrative(ref: NarrativeRef, ws_client: Workspace) -> Dict: """ Fetches a Narrative and its object info from the Workspace If content is False, this only returns the Narrative's info and metadata, otherwise, it returns the whole workspace object. This is mainly a wrapper around Workspace.get_objects2(), except that it always returns a dict. If content is False, it returns a dict containing a single key: 'info', with the object info and, optionally, metadata. Can the following errors: ValueError (if ref isn't a Narrative object), WorkspaceError if there's a Workspace issue (ref isn't valid, or token isn't valid) :param ref: a NarrativeRef :param content: if True, returns the narrative document, otherwise just the metadata :param include_metadata: if True, includes the object metadata when returning """ try: narr_data = ws_client.get_objects2({'objects': [{'ref': str(ref)}]}) nar = narr_data['data'][0] _validate_narr_type(nar['info'][2], ref) # nar['data'] = update_narrative(nar['data']) return nar['data'] except ServerError as err: raise WorkspaceError(err, ref.wsid)
def extract_dna_sequences(self, token, params): """Takes an assembly/contig set ref and one or more locations and returns the DNA sequence from the assembly at that location while caching the assembly for efficiency""" if not params.get('ref'): raise ValueError("'ref', a reference to an assembly must be provided") ref = params['ref'] locs = params.get('locations', []) ws = Workspace(self.ws_url, token=token) # This is also a cheap way to ensure that the object exists and that the user has access obj_type = ws.get_object_info3({'objects': [{'ref': ref}]})['infos'][0][2] if obj_type.split('-')[0] not in self.valid_types: raise ValueError(f'{obj_type} is not a valid input type for this function') assembly_dir = os.path.join(self.cache_dir, ref.replace('/', ':')) if not os.path.exists(assembly_dir): self._cache_assembly(ws, token, ref, assembly_dir) return [_extract_sequence(assembly_dir, l) for l in locs]
def search_orthologs_from_pangenome(self, token, ref, query, sort_by, start, limit, num_found): search_object = 'orthologs' info_included = [ 'id', 'type', 'function', 'md5', 'protein_translation', 'orthologs' ] table_indexer = TableIndexer(token, self.ws_url) ret = table_indexer.run_search(ref, self.pangenome_index_dir, self.ORTHOLOGS_SUFFIX, search_object, info_included, query, sort_by, start, limit, num_found, self.debug) for orthologs in ret['orthologs']: orthologs_string = orthologs['orthologs'] if orthologs_string: orthologs['orthologs'] = list(eval(orthologs_string)) if not isinstance(orthologs['orthologs'][0], list): orthologs['orthologs'] = [orthologs['orthologs']] ws = Workspace(self.ws_url, token=token) genome_feature_function_map = {} for orthologs in ret['orthologs']: for orthologs_obj in orthologs['orthologs']: gene_id = orthologs_obj[0] if gene_id in genome_feature_function_map: orthologs_obj.append( genome_feature_function_map.get(gene_id)) else: included = ["/features/[*]/function", "/features/[*]/id"] object_info = ws.get_objects2({ 'objects': [{ 'ref': orthologs_obj[2], 'included': included }] })['data'][0]['data'] for feature in object_info['features']: genome_feature_function_map.update( {feature.get('id'): feature.get('function')}) orthologs_obj.append( genome_feature_function_map.get(gene_id)) return ret
def __init__(self, prj_dir, config): self.workspace_url = config['workspace-url'] self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] if 'shock-url' in config: self.shock_url = config['shock-url'] if 'handle-service-url' in config: self.handle_url = config['handle-service-url'] self.ws_client = Workspace(self.workspace_url, token=self.token) self.ru = ReadsUtils(self.callback_url, token=self.token, service_ver='release') self.au = AssemblyUtil(self.callback_url, token=self.token, service_ver='release') self.kbr = KBaseReport(self.callback_url) self.kbq = kb_quast(self.callback_url) self.proj_dir = prj_dir self.spades_version = 'SPAdes-' + os.environ['SPADES_VERSION']
def build_bin_summary_file_from_binnedcontigs_obj(self, input_ref, bin_dir, bin_basename, fasta_extension): # read bin info from obj ws = Workspace(self.ws_url) try: binned_contig_obj = ws.get_objects2({'objects':[{'ref':input_ref}]})['data'][0]['data'] except Exception as e: raise ValueError('Unable to fetch '+str(input_ref)+' object from workspace: ' + str(e)) #to get the full stack trace: traceback.format_exc() bin_summary_info = dict() # bid in object is full name of contig fasta file. want just the number for bin_item in binned_contig_obj['bins']: #print ("BIN_ITEM[bid]: "+bin_item['bid']) # DEBUG bin_ID = re.sub ('^[^\.]+\.', '', bin_item['bid'].replace('.'+fasta_extension,'')) #print ("BIN_ID: "+bin_ID) # DEBUG bin_summary_info[bin_ID] = { 'n_contigs': bin_item['n_contigs'], 'gc': round (100.0 * float(bin_item['gc']), 1), 'sum_contig_len': bin_item['sum_contig_len'], 'cov': round (100.0 * float(bin_item['cov']), 1) } # write summary file for just those bins present in bin_dir header_line = ['Bin name', 'Completeness', 'Genome size', 'GC content'] bin_fasta_files_by_bin_ID = self.get_bin_fasta_files(bin_dir, fasta_extension) bin_IDs = [] for bin_ID in sorted(bin_fasta_files_by_bin_ID.keys()): bin_ID = re.sub('^[^\.]+\.', '', bin_ID.replace('.'+fasta_extension,'')) bin_IDs.append(bin_ID) summary_file_path = os.path.join (bin_dir, bin_basename+'.'+'summary') print ("writing filtered binned contigs summary file "+summary_file_path) with open (summary_file_path, 'w') as summary_file_handle: print ("\t".join(header_line)) summary_file_handle.write("\t".join(header_line)+"\n") for bin_ID in bin_IDs: #print ("EXAMINING BIN SUMMARY INFO FOR BIN_ID: "+bin_ID) # DEBUG bin_summary_info_line = [ bin_basename+'.'+str(bin_ID)+'.'+fasta_extension, str(bin_summary_info[bin_ID]['cov'])+'%', str(bin_summary_info[bin_ID]['sum_contig_len']), str(bin_summary_info[bin_ID]['gc']) ] print ("\t".join(bin_summary_info_line)) summary_file_handle.write("\t".join(bin_summary_info_line)+"\n") return summary_file_path
def __init__(self, config): self.ws_url = config["workspace-url"] self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.shock_url = config['shock-url'] self.ws = Workspace(self.ws_url, token=self.token) self.dfu = DataFileUtil(self.callback_url) self.scratch = config['scratch']
def set_up_test_env(self): self.logger.info('setting up test environment...') token = os.environ.get('KB_AUTH_TOKEN', None) config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None) suffix = int(time.time() * 1000) self.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('Templatomatic'): self.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = self.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) self.ctx = MethodContext(None) self.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'kb_Msuite', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) self.callback_url = os.environ['SDK_CALLBACK_URL'] self.scratch = self.cfg['scratch'] self.appdir = self.cfg['appdir'] self.wsURL = self.cfg['workspace-url'] self.wsClient = Workspace(self.wsURL) self.wsName = "test_Templatomatic_" + str(suffix) self.ws_info = self.wsClient.create_workspace( {'workspace': self.wsName}) self.ws_id = self.ws_info[6] self.dfu = DataFileUtil(self.callback_url) self.logger.info('set up new workspace: ' + self.wsName) self.env_set_up = True self.logger.info('Finished test environment set up')
def __init__(self, config): self.cfg = config self.gi = GenomeInterface(config) self.dfu = DataFileUtil(config.callbackURL) self.aUtil = AssemblyUtil(config.callbackURL) self.ws = Workspace(config.workspaceURL) self._messages = [] self.time_string = str( datetime.datetime.fromtimestamp( time.time()).strftime('%Y_%m_%d_%H_%M_%S')) yml_text = open('/kb/module/kbase.yml').read() self.version = re.search("module-version:\n\W+(.+)\n", yml_text).group(1) self.generate_parents = False self.generate_ids = False self.genes = OrderedDict() self.mrnas = OrderedDict() self.cdss = OrderedDict() self.noncoding = [] self.ontologies_present = defaultdict(dict) self.ontology_events = list() self.skiped_features = Counter() self.feature_counts = Counter() self.orphan_types = Counter() self.contig_seq = {} self.circ_contigs = set() self.features_spaning_zero = set() self.genome_warnings = [] self.genome_suspect = False self.defects = Counter() self.spoofed_genes = 0 self.excluded_features = ('source', 'exon', 'fasta_record') self.ont_mappings = load_ontology_mappings('/kb/module/data') self.code_table = 11 self.re_api_url = config.re_api_url # dict with feature 'id's that have been used more than once. self.used_twice_identifiers = {} self.default_params = { 'source': 'Genbank', 'taxon_wsname': self.cfg.raw['taxon-workspace-name'], 'taxon_lookup_obj_name': self.cfg.raw['taxon-lookup-object-name'], 'ontology_wsname': self.cfg.raw['ontology-workspace-name'], 'ontology_GO_obj_name': self.cfg.raw['ontology-gene-ontology-obj-name'], 'ontology_PO_obj_name': self.cfg.raw['ontology-plant-ontology-obj-name'], 'release': None, 'genetic_code': 11, 'generate_ids_if_needed': 0, 'metadata': {} }
def check_assembly_cache(self, ref, token): ws = Workspace(self.ws_url, token=token) info = ws.get_object_info3({"objects": [{"ref": ref}]})['infos'][0] inner_chsum = info[8] index_file = os.path.join(self.assembly_index_dir, inner_chsum + self.ASSEMBLY_SUFFIX + ".tsv.gz") if not os.path.isfile(index_file): if self.debug: print(" Loading WS object...") t1 = time.time() if 'KBaseGenomeAnnotations.Assembly' in info[2]: included = ["/contigs"] assembly_data = ws.get_objects2( {'objects': [{'ref': ref, 'included': included}]})['data'][0]['data'] contigs = list(assembly_data['contigs'].values()) self.save_assembly_tsv(contigs, inner_chsum) elif 'KBaseGenomes.ContigSet' in info[2]: included = ["/contigs/[*]/id", "/contigs/[*]/length", "/contigs/[*]/md5", "/contigs/[*]/description"] cs_data = ws.get_objects2( {'objects': [{'ref': ref, 'included': included}]})['data'][0]['data'] contigs = [] for c in cs_data['contigs']: this_contig_data = {'contig_id': ''} if 'id' in c: this_contig_data['contig_id'] = c['id'] if 'md5' in c: this_contig_data['md5'] = c['md5'] if 'length' in c: this_contig_data['length'] = c['length'] if 'description' in c: this_contig_data['description'] = c['description'] contigs.append(this_contig_data) self.save_assembly_tsv(contigs, inner_chsum) else: raise ValueError('The "ref" is not an Assembly or ContigSet data object. ' 'It was a ' + info[2]) if self.debug: print(f" (time={time.time() - t1})") return inner_chsum
def create_variation_report(self, params): ''' Create a table report with contig_id, length, number_variation, density/mb :param variation_ref: ''' ws = Workspace(self.ws_url) subset = ws.get_object_subset([{ 'included': ['/numgenotypes', 'numvariants'], 'ref': params['variation_ref'] }]) numgenotypes = subset[0]['data']['numgenotypes'] numvariants = subset[0]['data']['numvariants'] variation_table = """ <table> <thead> <tr> <td>Number of strains/genotypes</td> <td> ##numgenotypes##</td> </tr> </thead> <tbody> <tr> <td>Number of variants</td> <td>##numvariants##</td> </tr> </tbody> </table> """ variation_table = variation_table.replace("##numgenotypes##", str(numgenotypes)) variation_table = variation_table.replace("##numvariants##", str(numvariants)) session = str(uuid.uuid4()) htmlreport_dir = (os.path.join(self.scratch, session)) os.mkdir(htmlreport_dir) index_html_path = os.path.join(htmlreport_dir, "index.html") with open(index_html_path, "w") as f: f.write(variation_table) return (htmlreport_dir)
def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.scratch = config['scratch'] self.token = config['KB_AUTH_TOKEN'] self.user_id = config['USER_ID'] self.dfu = DataFileUtil(self.callback_url) self.hs = AbstractHandle(config['handle-service-url']) self.ws_client = Workspace(config['workspace-url']) self.shock_url = config['shock-url']
def test_read_narrative_bad_client(self, rqm): ws_id = 908 mock_ws_bad(rqm, "Can't fetch object") with self.assertRaises(WorkspaceError) as e: read_narrative( NarrativeRef.parse("908/1/1"), Workspace(url=self.cfg["workspace-url"], token=self.token)) self.assertIn(str(ws_id), str(e.exception)) self.assertIn("Can't fetch object", str(e.exception))
def __init__(self, config): self.ws_url = config['workspace-url'] self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.shock_url = config['shock-url'] self.scratch = config['scratch'] self.dfu = DataFileUtil(self.callback_url) self.gsu = GenomeSearchUtil(self.callback_url) self.ws = Workspace(self.ws_url, token=self.token)
def export_genome_features_protein_to_fasta(self, ctx, params): """ :param params: instance of type "ExportParams" (input and output structure functions for standard downloaders) -> structure: parameter "input_ref" of String :returns: instance of type "ExportOutput" -> structure: parameter "shock_id" of String """ # ctx is the context object # return variables are: output #BEGIN export_genome_features_protein_to_fasta print('export_genome_features_protein_to_fasta -- paramaters = ') # validate parameters if 'input_ref' not in params: raise ValueError( 'Cannot run export_genome_features_protein_to_fasta - no "input_ref" field defined.' ) # get WS metadata to get ws_name and obj_name ws = Workspace(url=self.cfg.workspaceURL) info = ws.get_object_info_new({ 'objects': [{ 'ref': params['input_ref'] }], 'includeMetadata': 0, 'ignoreErrors': 0 })[0] genome_to_protein_fasta_params = {'genome_ref': params['input_ref']} # export to file (building from KBase Genome Object) result = self.genome_to_genbank( ctx, genome_to_protein_fasta_params)[0]['genbank_file'] #END export_genome_features_protein_to_fasta # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError( 'Method export_genome_features_protein_to_fasta return value ' + 'output is not type dict as required.') # return the results return [output]
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('kb_functional_enrichment_1'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'kb_functional_enrichment_1', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = Workspace(cls.wsURL) cls.serviceImpl = kb_functional_enrichment_1(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] cls.fe1_runner = FunctionalEnrichmentUtil(cls.cfg) cls.dfu = DataFileUtil(cls.callback_url) cls.gaa = GenomeAnnotationAPI(cls.callback_url) cls.ws = Workspace(cls.wsURL, token=token) suffix = int(time.time() * 1000) cls.wsName = "test_kb_functional_enrichment_1_" + str(suffix) cls.wsClient.create_workspace({'workspace': cls.wsName}) cls.prepare_data()
def __init__(self, config): self.endpoint = config['kbase-endpoint'] self.callback_url = config['SDK_CALLBACK_URL'] self.scratch = config['scratch'] self.token = config['KB_AUTH_TOKEN'] self.dfu = DataFileUtil(self.callback_url) self.ws_client = Workspace(config['workspace-url']) self.auth_client = KBaseAuth(config['auth-service-url']) logging.basicConfig(format='%(created)s %(levelname)s: %(message)s', level=logging.INFO)
def setUpClass(cls): token = os.environ.get('KB_AUTH_TOKEN', None) config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('KBaseReport'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'KBaseReport', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = Workspace(cls.wsURL) cls.serviceImpl = KBaseReport(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] # Custom stuff below dirname = os.path.dirname(__file__) cls.dfu = DataFileUtil(cls.callback_url) cls.a_html_path = os.path.join(cls.scratch, 'a_html') cls.b_html_path = os.path.join(cls.scratch, 'b_html') shutil.copytree(os.path.join(dirname, 'data', 'a_html'), cls.a_html_path) shutil.copytree(os.path.join(dirname, 'data', 'b_html'), cls.b_html_path) cls.a_file_path = os.path.join(cls.scratch, 'a.txt') cls.b_file_path = os.path.join(cls.scratch, 'b.txt') shutil.copy2(os.path.join(dirname, 'data/a.txt'), cls.a_file_path) shutil.copy2(os.path.join(dirname, 'data/b.txt'), cls.b_file_path) # Upload files to shock cls.a_file_shock = cls.dfu.file_to_shock({ 'file_path': cls.a_file_path, 'make_handle': 0 }) cls.b_file_shock = cls.dfu.file_to_shock({ 'file_path': cls.b_file_path, 'make_handle': 0 })
def test_read_narrative_not_narrative(self, rqm): ref = "43666/3/1" ref_to_file = {ref: "data/43666/report-43666.3.1.json"} set_up_ok_mocks(rqm, ref_to_file=ref_to_file) with self.assertRaises(ValueError) as e: read_narrative( NarrativeRef.parse(ref), Workspace(url=self.cfg["workspace-url"], token=self.token)) self.assertIn( f"Expected a Narrative object with reference {ref}, got a KBaseReport.Report-3.0", str(e.exception))
def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url, provenance): self.scratch_dir = scratch_dir self.workspace_url = workspace_url self.callback_url = callback_url self.srv_wiz_url = srv_wiz_url self.provenance = provenance # from the provenance, extract out the version to run by exact hash if possible self.my_version = 'release' if len(provenance) > 0: if 'subactions' in provenance[0]: self.my_version = self.get_version_from_subactions( 'kb_Bwa', provenance[0]['subactions']) print('Running kb_Bwa version = ' + self.my_version) self.ws = Workspace(self.workspace_url) self.bwa = BwaRunner(self.scratch_dir) self.parallel_runner = KBParallel(self.callback_url) self.qualimap = kb_QualiMap(self.callback_url)
def test_read_narrative_ok(self, rqm): ref = "43666/1/18" ref_to_file = {ref: "data/43666/narrative-43666.1.18.json"} set_up_ok_mocks(rqm, ref_to_file=ref_to_file) nar = read_narrative( NarrativeRef.parse("43666/1/18"), Workspace(url=self.cfg["workspace-url"], token=self.token)) # spot check that it's loaded and formatted self.assertIsNotNone(nar) self.assertIn("cells", nar) self.assertEqual(len(nar["cells"]), 9)
def setUpClass(cls): cls.token = environ.get('KB_AUTH_TOKEN') cls.callbackURL = environ.get('SDK_CALLBACK_URL') print('CB URL: ' + cls.callbackURL) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': cls.token, 'provenance': [{ 'service': 'kb_unicycler', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('kb_unicycler'): cls.cfg[nameval[0]] = nameval[1] cls.cfg["SDK_CALLBACK_URL"] = cls.callbackURL cls.cfg["KB_AUTH_TOKEN"] = cls.token cls.wsURL = cls.cfg['workspace-url'] cls.shockURL = cls.cfg['shock-url'] cls.hs = HandleService(url=cls.cfg['handle-service-url'], token=cls.token) # cls.wsClient = workspaceService(cls.wsURL, token=cls.token) cls.wsClient = Workspace(cls.wsURL, token=cls.token) wssuffix = int(time.time() * 1000) wsName = "test_kb_unicycler_" + str(wssuffix) cls.wsinfo = cls.wsClient.create_workspace({'workspace': wsName}) print('created workspace ' + cls.getWsName()) cls.PROJECT_DIR = 'unicycler_outputs' cls.scratch = cls.cfg['scratch'] if not os.path.exists(cls.scratch): os.makedirs(cls.scratch) cls.prjdir = os.path.join(cls.scratch, cls.PROJECT_DIR) if not os.path.exists(cls.prjdir): os.makedirs(cls.prjdir) cls.serviceImpl = kb_unicycler(cls.cfg) cls.readUtilsImpl = ReadsUtils(cls.callbackURL, token=cls.token) cls.dfuClient = DataFileUtil(url=cls.callbackURL, token=cls.token) cls.staged = {} cls.nodes_to_delete = [] cls.handles_to_delete = [] cls.setupTestData() print( '\n\n=============== Starting Unicycler tests ==================')
def check_object_cache(self, ref, search_object, info_included, index_dir, object_suffix, debug): ws = Workspace(self.ws_url, token=self.token) info = ws.get_object_info3({"objects": [{"ref": ref}]})['infos'][0] inner_chsum = info[8] index_file = os.path.join(index_dir, inner_chsum + object_suffix + ".tsv.gz") if not os.path.isfile(index_file): if debug: print(" Loading WS object...") t1 = time.time() included = self.build_info_included(search_object, info_included) object = ws.get_objects2({'objects': [{'ref': ref, 'included': included}]})['data'][0]['data'] self.save_object_tsv(object[search_object], inner_chsum, info_included, index_dir, object_suffix) if debug: print(" (time=" + str(time.time() - t1) + ")") return inner_chsum
def verify_public_narrative(workspace_url: str, ws_id: int) -> None: """ Raises a PermissionError if the workspace is not public (i.e. user '*' has 'r' access). Creating a stating Narrative is only permitted on public Narratives. If the Narrative is public, this returns None. Raises a WorkspaceError if anything goes wrong with the lookup. :param workspace_url: str - the workspace endpoint url :param ws_id: int - the workspace to check """ ws_client = Workspace(url=workspace_url) try: perms = ws_client.get_permissions({"id": ws_id}) except ServerError as err: raise WorkspaceError(err, ws_id) if perms.get("*", "n") not in ["r", "w", "a"]: err = f"Workspace {ws_id} must be publicly readable to make a Static Narrative" logging.getLogger("StaticNarrative").error(err) raise PermissionError(err)
def __init__(self, config): os.makedirs(self.workdir, exist_ok=True) self.config = config self.timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S") self.callback_url = config['SDK_CALLBACK_URL'] self.scratch = config['scratch'] self.genome_api = GenomeAnnotationAPI(self.callback_url) self.dfu = DataFileUtil(self.callback_url) self.gfu = GenomeFileUtil(self.callback_url) self.kbr = KBaseReport(self.callback_url) self.ws_client = Workspace(config["workspace-url"])
def __init__(self, config): self.cfg = config self.scratch = config['scratch'] self.gsu = GenomeSearchUtil(os.environ['SDK_CALLBACK_URL']) self.dfu = DataFileUtil(os.environ['SDK_CALLBACK_URL']) self.ws = Workspace(config["workspace-url"])
class AveExpressionMatrixBuilder: def _validate_calculate_average_expression_matrix_params(self, params): """ _validate_calculate_average_expression_matrix_params: validates params passed to calculate_average_expression_matrix method """ log('start validating calculate_average_expression_matrix params') # check for required parameters for p in ['expression_matrix_ref', 'output_suffix', 'workspace_name']: if p not in params: raise ValueError('"{}" parameter is required, but missing'.format(p)) def _generate_report(self, expression_matrix_ref, workspace_name): """ _generate_report: generate report """ objects_created = [{'ref': expression_matrix_ref, 'description': 'Average ExpressionMatrix'}] report_params = {'message': '', 'workspace_name': workspace_name, 'objects_created': objects_created, # 'html_links': output_html_files, # 'direct_html_link_index': 0, 'html_window_height': 366, 'report_object_name': 'kb_ave_expr_matrix_report_' + str(uuid.uuid4())} kbase_report_client = KBaseReport(self.callback_url, token=self.token) output = kbase_report_client.create_extended_report(report_params) report_output = {'report_name': output['name'], 'report_ref': output['ref']} return report_output def _save_expression_matrix(self, em_data, em_obj_name, workspace_name): """ _save_expression_matrix: saving ExpressionMatrix """ try: log('saving ExpressionMatrix [{}]'.format(em_obj_name)) data_type = 'KBaseFeatureValues.ExpressionMatrix' obj_info = self.dfu.save_objects({'id': self.dfu.ws_name_to_id(workspace_name), 'objects': [{'type': data_type, 'data': em_data, 'name': em_obj_name}]})[0] except Exception as e: log(e) raise Exception('Failed Saving ExpressionMatrix to Workspace') expression_matrix_ref = str(obj_info[6]) + '/' + str(obj_info[0]) + '/' + str(obj_info[4]) return expression_matrix_ref def __init__(self, config): self.ws_url = config["workspace-url"] self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.shock_url = config['shock-url'] self.ws = Workspace(self.ws_url, token=self.token) self.dfu = DataFileUtil(self.callback_url) self.scratch = config['scratch'] def calculate_average_expression_matrix(self, params): """ calculate_average_expression_matrix: create an average ExpressionMatrix object from a ExpressionMatrix object required params: expression_matrix_ref: ExpressionMatrix object reference output_suffix: output average ExpressionMatrix name suffix workspace_name: the name of the workspace it gets saved to return: average_expression_matrix_ref: generated average ExpressionMatrix object reference report_name: report name generated by KBaseReport report_ref: report reference generated by KBaseReport """ log('--->\nrunning AveExpressionMatrixBuilder.calculate_average_expression_matrix\n' + 'params:\n{}'.format(json.dumps(params, indent=1))) self._validate_calculate_average_expression_matrix_params(params) expression_matrix_ref = params.get('expression_matrix_ref') expression_matrix = self.ws.get_objects2({'objects': [{'ref': expression_matrix_ref}]})['data'][0] expression_matrix_data = expression_matrix['data'] expression_matrix_info = expression_matrix['info'] condition_map = expression_matrix_data['condition_mapping'] ori_data = expression_matrix_data['data'] ori_col_ids = ori_data['col_ids'] ori_row_ids = ori_data['row_ids'] ori_values = ori_data['values'] labels = list(condition_map.keys()) if set(labels) != set(ori_col_ids): error_msg = 'available labels: {}\n'.format(ori_col_ids) error_msg += 'labels in condition_mapping: {}'.format(labels) raise ValueError(error_msg) condition_pos = {} for label, condition in condition_map.items(): if condition not in condition_pos: condition_pos.update({condition: [ori_col_ids.index(label)]}) else: condition_list = condition_pos[condition] condition_list.append(ori_col_ids.index(label)) condition_pos.update({condition: condition_list}) conditions = list(condition_pos.keys()) ave_values = [] for ori_value in ori_values: ave_value = [None] * len(conditions) for condition, poss in condition_pos.items(): ave_pos = conditions.index(condition) sum_value = 0.0 for pos in poss: sum_value += round(float(ori_value[pos]), 3) average = sum_value / len(poss) ave_value[ave_pos] = round(average, 2) ave_values.append(ave_value) average_data = {} average_data.update({'row_ids': ori_row_ids}) average_data.update({'col_ids': conditions}) average_data.update({'values': ave_values}) em_data = {} genome_ref = expression_matrix_data.get('genome_ref') if genome_ref: em_data.update({'genome_ref': genome_ref}) em_data.update({'scale': expression_matrix_data.get('scale')}) em_data.update({'type': expression_matrix_data.get('type')}) em_data.update({'feature_mapping': expression_matrix_data.get('feature_mapping')}) em_data.update({'condition_mapping': expression_matrix_data.get('condition_mapping')}) em_data.update({'data': average_data}) expression_matrix_name = expression_matrix_info[1] ave_expression_matrix_name = expression_matrix_name + params.get('output_suffix') workspace_name = params.get('workspace_name') ave_expression_matrix_ref = self._save_expression_matrix(em_data, ave_expression_matrix_name, workspace_name) returnVal = {'average_expression_matrix_ref': ave_expression_matrix_ref} report_output = self._generate_report(ave_expression_matrix_ref, workspace_name) returnVal.update(report_output) return returnVal
class FeatureSetBuilder: def _mkdir_p(self, path): """ _mkdir_p: make directory for given path """ if not path: return try: os.makedirs(path) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise def _validate_upload_featureset_from_diff_expr_params(self, params): """ _validate_upload_featureset_from_diff_expr_params: validates params passed to upload_featureset_from_diff_expr method """ log('start validating upload_featureset_from_diff_expr params') # check for required parameters for p in ['diff_expression_ref', 'workspace_name', 'p_cutoff', 'q_cutoff', 'fold_change_cutoff']: if p not in params: raise ValueError('"{}" parameter is required, but missing'.format(p)) p = params.get('fold_scale_type') if p and p != 'logarithm': raise ValueError('"fold_scale_type" parameter must be set to "logarithm", if used') @staticmethod def validate_params(params, expected, opt_param=set()): """Validates that required parameters are present. Warns if unexpected parameters appear""" expected = set(expected) opt_param = set(opt_param) pkeys = set(params) if expected - pkeys: raise ValueError("Required keys {} not in supplied parameters" .format(", ".join(expected - pkeys))) defined_param = expected | opt_param for param in params: if param not in defined_param: logging.warning("Unexpected parameter {} supplied".format(param)) def _generate_report(self, up_feature_set_ref_list, down_feature_set_ref_list, filtered_expression_matrix_ref_list, workspace_name): """ _generate_report: generate summary report """ log('start creating report') output_html_files = self._generate_html_report(up_feature_set_ref_list, down_feature_set_ref_list) objects_created = list() for up_feature_set_ref in up_feature_set_ref_list: objects_created += [{'ref': up_feature_set_ref, 'description': 'Upper FeatureSet Object'}] for down_feature_set_ref in down_feature_set_ref_list: objects_created += [{'ref': down_feature_set_ref, 'description': 'Lower FeatureSet Object'}] for filtered_expression_matrix_ref in filtered_expression_matrix_ref_list: objects_created += [{'ref': filtered_expression_matrix_ref, 'description': 'Filtered ExpressionMatrix Object'}] report_params = {'message': '', 'workspace_name': workspace_name, 'objects_created': objects_created, 'html_links': output_html_files, 'direct_html_link_index': 0, 'html_window_height': 333, 'report_object_name': 'kb_FeatureSetUtils_report_' + str(uuid.uuid4())} kbase_report_client = KBaseReport(self.callback_url) output = kbase_report_client.create_extended_report(report_params) report_output = {'report_name': output['name'], 'report_ref': output['ref']} return report_output def _generate_html_report(self, up_feature_set_ref_list, down_feature_set_ref_list): """ _generate_html_report: generate html summary report """ log('start generating html report') html_report = list() output_directory = os.path.join(self.scratch, str(uuid.uuid4())) self._mkdir_p(output_directory) result_file_path = os.path.join(output_directory, 'report.html') uppper_feature_content = '' for up_feature_set_ref in up_feature_set_ref_list: feature_set_obj = self.ws.get_objects2({'objects': [{'ref': up_feature_set_ref}]})['data'][0] feature_set_data = feature_set_obj['data'] feature_set_info = feature_set_obj['info'] feature_set_name = feature_set_info[1] elements = feature_set_data.get('elements') feature_ids = list(elements.keys()) uppper_feature_content += '<tr><td>{}</td><td>{}</td></tr>'.format(feature_set_name, len(feature_ids)) lower_feature_content = '' for down_feature_set_ref in down_feature_set_ref_list: feature_set_obj = self.ws.get_objects2({'objects': [{'ref': down_feature_set_ref}]})['data'][0] feature_set_data = feature_set_obj['data'] feature_set_info = feature_set_obj['info'] feature_set_name = feature_set_info[1] elements = feature_set_data.get('elements') feature_ids = list(elements.keys()) lower_feature_content += '<tr><td>{}</td><td>{}</td></tr>'.format(feature_set_name, len(feature_ids)) with open(result_file_path, 'w') as result_file: with open(os.path.join(os.path.dirname(__file__), 'report_template.html'), 'r') as report_template_file: report_template = report_template_file.read() report_template = report_template.replace('<tr><td>Upper_FeatureSet</td></tr>', uppper_feature_content) report_template = report_template.replace('<tr><td>Lower_FeatureSet</td></tr>', lower_feature_content) result_file.write(report_template) html_report.append({'path': result_file_path, 'name': os.path.basename(result_file_path), 'label': os.path.basename(result_file_path), 'description': 'HTML summary report'}) return html_report def _process_diff_expression(self, diff_expression_set_ref, result_directory, condition_label_pair): """ _process_diff_expression: process differential expression object info """ log('start processing differential expression object') diff_expr_set_data = self.ws.get_objects2({'objects': [{'ref': diff_expression_set_ref}]})['data'][0]['data'] set_items = diff_expr_set_data['items'] diff_expr_matrix_file_name = 'gene_results.csv' diff_expr_matrix_file = os.path.join(result_directory, diff_expr_matrix_file_name) with open(diff_expr_matrix_file, 'w') as csvfile: fieldnames = ['gene_id', 'log2_fold_change', 'p_value', 'q_value'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for set_item in set_items: diff_expression_ref = set_item['ref'] diff_expression_data = self.ws.get_objects2({'objects': [{'ref': diff_expression_ref}]})['data'][0]['data'] label_string = set_item['label'] label_list = [x.strip() for x in label_string.split(',')] condition_1 = label_list[0] condition_2 = label_list[1] if condition_1 in condition_label_pair and condition_2 in condition_label_pair: genome_id = diff_expression_data['genome_ref'] matrix_data = diff_expression_data['data'] selected_diff_expression_ref = diff_expression_ref with open(diff_expr_matrix_file, 'a') as csvfile: row_ids = matrix_data.get('row_ids') row_values = matrix_data.get('values') writer = csv.DictWriter(csvfile, fieldnames=fieldnames) for pos, row_id in enumerate(row_ids): row_value = row_values[pos] writer.writerow({'gene_id': row_id, 'log2_fold_change': row_value[0], 'p_value': row_value[1], 'q_value': row_value[2]}) return diff_expr_matrix_file, genome_id, selected_diff_expression_ref def _generate_feature_set(self, feature_ids, genome_id, workspace_name, feature_set_name): """ _generate_feature_set: generate FeatureSet object KBaseCollections.FeatureSet type: typedef structure { string description; list<feature_id> element_ordering; mapping<feature_id, list<genome_ref>> elements; } FeatureSet; """ log('start saving KBaseCollections.FeatureSet object') if isinstance(workspace_name, int) or workspace_name.isdigit(): workspace_id = workspace_name else: workspace_id = self.dfu.ws_name_to_id(workspace_name) elements = {feature_id: [genome_id] for feature_id in feature_ids} feature_set_data = {'description': 'Generated FeatureSet from DifferentialExpression', 'element_ordering': feature_ids, 'elements': elements} object_type = 'KBaseCollections.FeatureSet' save_object_params = { 'id': workspace_id, 'objects': [{'type': object_type, 'data': feature_set_data, 'name': feature_set_name}]} dfu_oi = self.dfu.save_objects(save_object_params)[0] feature_set_obj_ref = "{}/{}/{}".format(dfu_oi[6], dfu_oi[0], dfu_oi[4]) return feature_set_obj_ref def _process_matrix_file(self, diff_expr_matrix_file, comp_p_value, comp_q_value, comp_fold_change_cutoff): """ _process_matrix_file: filter matrix file by given cutoffs """ log('start processing matrix file') up_feature_ids = [] down_feature_ids = [] if comp_fold_change_cutoff < 0: comp_fold_change_cutoff = -comp_fold_change_cutoff with open(diff_expr_matrix_file, 'r') as file: reader = csv.DictReader(file) for row in reader: feature_id = row['gene_id'] row_p_value = row['p_value'] row_q_value = row['q_value'] row_fold_change_cutoff = row['log2_fold_change'] null_value = {'NA', 'null', ''} col_value = {row_p_value, row_q_value, row_fold_change_cutoff} if not col_value.intersection(null_value): p_value_condition = float(row_p_value) <= comp_p_value q_value_condition = float(row_q_value) <= comp_q_value up_matches_condition = (p_value_condition and q_value_condition and (float(row_fold_change_cutoff) >= comp_fold_change_cutoff)) down_matches_condition = (p_value_condition and q_value_condition and (float(row_fold_change_cutoff) <= -comp_fold_change_cutoff)) if up_matches_condition: up_feature_ids.append(feature_id) elif down_matches_condition: down_feature_ids.append(feature_id) return list(set(up_feature_ids)), list(set(down_feature_ids)) def _filter_expression_matrix(self, expression_matrix_ref, feature_ids, workspace_name, filtered_expression_matrix_suffix="", diff_expression_matrix_ref=None, filtered_expression_matrix_name=None): """ _filter_expression_matrix: generated filtered expression matrix """ log('start saving ExpressionMatrix object') if isinstance(workspace_name, int) or workspace_name.isdigit(): workspace_id = workspace_name else: workspace_id = self.dfu.ws_name_to_id(workspace_name) expression_matrix_obj = self.dfu.get_objects({'object_refs': [expression_matrix_ref]})['data'][0] expression_matrix_info = expression_matrix_obj['info'] expression_matrix_data = expression_matrix_obj['data'] expression_matrix_name = expression_matrix_info[1] if not filtered_expression_matrix_name: if re.match('.*_*[Ee]xpression_*[Mm]atrix', expression_matrix_name): filtered_expression_matrix_name = re.sub('_*[Ee]xpression_*[Mm]atrix', filtered_expression_matrix_suffix, expression_matrix_name) else: filtered_expression_matrix_name = expression_matrix_name + \ filtered_expression_matrix_suffix filtered_expression_matrix_data = expression_matrix_data.copy() data = filtered_expression_matrix_data['data'] row_ids = data['row_ids'] values = data['values'] filtered_data = data.copy() filtered_row_ids = list() filtered_values = list() for pos, row_id in enumerate(row_ids): if row_id in feature_ids: filtered_row_ids.append(row_id) filtered_values.append(values[pos]) filtered_data['row_ids'] = filtered_row_ids filtered_data['values'] = filtered_values filtered_expression_matrix_data['data'] = filtered_data expression_obj = {'type': expression_matrix_info[2], 'data': filtered_expression_matrix_data, 'name': filtered_expression_matrix_name} # we now save the filtering DEM in a EM field added for this purpose if diff_expression_matrix_ref: expression_obj['data']['diff_expr_matrix_ref'] = diff_expression_matrix_ref expression_obj['extra_provenance_input_refs'] = [diff_expression_matrix_ref] save_object_params = { 'id': workspace_id, 'objects': [expression_obj]} dfu_oi = self.dfu.save_objects(save_object_params)[0] filtered_expression_matrix_ref = "{}/{}/{}".format(dfu_oi[6], dfu_oi[0], dfu_oi[4]) return filtered_expression_matrix_ref def _xor(self, a, b): return bool(a) != bool(b) def _check_input_labels(self, condition_pairs, available_condition_labels): """ _check_input_labels: check input condition pairs """ checked = True for condition_pair in condition_pairs: label_string = condition_pair['label_string'][0].strip() label_list = [x.strip() for x in label_string.split(',')] first_label = label_list[0] second_label = label_list[1] if first_label not in available_condition_labels: error_msg = 'Condition: {} is not availalbe. '.format(first_label) error_msg += 'Available conditions: {}'.format(available_condition_labels) raise ValueError(error_msg) if second_label not in available_condition_labels: error_msg = 'Condition: {} is not availalbe. '.format(second_label) error_msg += 'Available conditions: {}'.format(available_condition_labels) raise ValueError(error_msg) if first_label == second_label: raise ValueError('Input conditions are the same') return checked def _get_condition_labels(self, diff_expression_set_ref): """ _get_condition_labels: get all possible condition label pairs """ log('getting all possible condition pairs') condition_label_pairs = list() available_condition_labels = set() diff_expression_set_obj = self.ws.get_objects2({'objects': [{'ref': diff_expression_set_ref}] })['data'][0] diff_expression_set_data = diff_expression_set_obj['data'] items = diff_expression_set_data.get('items') for item in items: label_string = item['label'] label_list = [x.strip() for x in label_string.split(',')] condition_label_pairs.append(label_list) available_condition_labels |= set(label_list) log('all possible condition pairs:\n{}'.format(condition_label_pairs)) return condition_label_pairs, available_condition_labels def _get_feature_ids(self, genome_ref, ids): """ _get_feature_ids: get feature ids from genome """ genome_features = self.gsu.search({'ref': genome_ref, 'limit': len(ids), 'structured_query': {"$or": [{"feature_id": x} for x in ids]}, 'sort_by': [['feature_id', True]]})['features'] features_ids = set((feature.get('feature_id') for feature in genome_features)) return features_ids def _build_fs_obj(self, params): new_feature_set = { 'description': '', 'element_ordering': [], 'elements': {} } genome_ref = params['genome'] if params.get('base_feature_sets', []) and None not in params['base_feature_sets']: base_feature_sets = self.dfu.get_objects( {'object_refs': params['base_feature_sets']} )['data'] for ret in base_feature_sets: base_set = ret['data'] base_set_name = ret['info'][1] new_feature_set['element_ordering'] += [x for x in base_set['element_ordering'] if x not in new_feature_set['elements']] for element, genome_refs in base_set['elements'].items(): if element in new_feature_set['elements']: new_feature_set['elements'][element] += [x for x in genome_refs if x not in new_feature_set['elements'][ element]] else: new_feature_set['elements'][element] = genome_refs new_feature_set['description'] += 'From FeatureSet {}: {}\n'.format( base_set_name, base_set.get('description')) new_feature_ids = [] if params.get('feature_ids'): if isinstance(params['feature_ids'], str): new_feature_ids += params['feature_ids'].split(',') else: new_feature_ids += params['feature_ids'] if params.get('feature_ids_custom'): new_feature_ids += params['feature_ids_custom'].split(',') if new_feature_ids: genome_feature_ids = self._get_feature_ids(genome_ref, new_feature_ids) for new_feature in new_feature_ids: if new_feature not in genome_feature_ids: raise ValueError('Feature ID {} does not exist in the supplied genome {}'.format( new_feature, genome_ref)) if new_feature in new_feature_set['elements']: if genome_ref not in new_feature_set['elements'][new_feature]: new_feature_set['elements'][new_feature].append(genome_ref) else: new_feature_set['elements'][new_feature] = [genome_ref] new_feature_set['element_ordering'].append(new_feature) if params.get('description'): new_feature_set['description'] = params['description'] return new_feature_set def __init__(self, config): self.ws_url = config["workspace-url"] self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.shock_url = config['shock-url'] self.ws = Workspace(self.ws_url, token=self.token) self.dfu = DataFileUtil(self.callback_url) self.gsu = GenomeSearchUtil(self.callback_url) self.scratch = config['scratch'] def upload_featureset_from_diff_expr(self, params): """ upload_featureset_from_diff_expr: create FeatureSet from RNASeqDifferentialExpression based on given threshold cutoffs required params: diff_expression_ref: DifferetialExpressionMatrixSet object reference expression_matrix_ref: ExpressionMatrix object reference p_cutoff: p value cutoff q_cutoff: q value cutoff fold_scale_type: one of ["linear", "log2+1", "log10+1"] fold_change_cutoff: fold change cutoff feature_set_suffix: Result FeatureSet object name suffix filtered_expression_matrix_suffix: Result ExpressionMatrix object name suffix workspace_name: the name of the workspace it gets saved to return: result_directory: folder path that holds all files generated up_feature_set_ref_list: list of generated upper FeatureSet object reference down_feature_set_ref_list: list of generated down FeatureSet object reference filtered_expression_matrix_ref_list: list of generated filtered ExpressionMatrix object ref report_name: report name generated by KBaseReport report_ref: report reference generated by KBaseReport """ self._validate_upload_featureset_from_diff_expr_params(params) diff_expression_set_ref = params.get('diff_expression_ref') diff_expression_set_info = self.ws.get_object_info3({"objects": [{"ref": diff_expression_set_ref}]} )['infos'][0] diff_expression_set_name = diff_expression_set_info[1] result_directory = os.path.join(self.scratch, str(uuid.uuid4())) self._mkdir_p(result_directory) (available_condition_label_pairs, available_condition_labels) = self._get_condition_labels(diff_expression_set_ref) run_all_combinations = params.get('run_all_combinations') condition_pairs = params.get('condition_pairs') if not self._xor(run_all_combinations, condition_pairs): error_msg = "Invalid input:\nselect 'Run All Paired Condition Combinations' " error_msg += "or provide partial condition pairs. Don't do both or neither" raise ValueError(error_msg) if run_all_combinations: condition_label_pairs = available_condition_label_pairs else: if self._check_input_labels(condition_pairs, available_condition_labels): condition_label_pairs = list() for condition_pair in condition_pairs: label_string = condition_pair['label_string'][0].strip() condition_labels = [x.strip() for x in label_string.split(',')] condition_label_pairs.append(condition_labels) up_feature_set_ref_list = list() down_feature_set_ref_list = list() filtered_expression_matrix_ref_list = list() for condition_label_pair in condition_label_pairs: condition_string = '-'.join(reversed(condition_label_pair)) diff_expr_matrix_file, genome_id, diff_expr_matrix_ref = self._process_diff_expression( diff_expression_set_ref, result_directory, condition_label_pair) up_feature_ids, down_feature_ids = self._process_matrix_file( diff_expr_matrix_file, params.get('p_cutoff'), params.get('q_cutoff'), params.get('fold_change_cutoff')) filtered_em_name = _sanitize_name(condition_string) + params.get('filtered_expression_matrix_suffix') if params.get('expression_matrix_ref'): filtered_expression_matrix_ref = self._filter_expression_matrix( params.get('expression_matrix_ref'), up_feature_ids + down_feature_ids, params.get('workspace_name'), "", diff_expr_matrix_ref, filtered_em_name) filtered_expression_matrix_ref_list.append(filtered_expression_matrix_ref) feature_set_suffix = params.get('feature_set_suffix', "") up_feature_set_name = "{}_{}_up{}".format( diff_expression_set_name, _sanitize_name(condition_string), feature_set_suffix) up_feature_set_ref = self._generate_feature_set(up_feature_ids, genome_id, params.get('workspace_name'), up_feature_set_name) up_feature_set_ref_list.append(up_feature_set_ref) down_feature_set_name = "{}_{}_down{}".format( diff_expression_set_name, _sanitize_name(condition_string), feature_set_suffix) down_feature_set_ref = self._generate_feature_set(down_feature_ids, genome_id, params.get('workspace_name'), down_feature_set_name) down_feature_set_ref_list.append(down_feature_set_ref) returnVal = {'result_directory': result_directory, 'up_feature_set_ref_list': up_feature_set_ref_list, 'down_feature_set_ref_list': down_feature_set_ref_list, 'filtered_expression_matrix_ref_list': filtered_expression_matrix_ref_list} report_output = self._generate_report(up_feature_set_ref_list, down_feature_set_ref_list, filtered_expression_matrix_ref_list, params.get('workspace_name')) returnVal.update(report_output) return returnVal def filter_matrix_with_fs(self, params): self.validate_params(params, ('feature_set_ref', 'workspace_name', 'expression_matrix_ref', 'filtered_expression_matrix_suffix')) ret = self.dfu.get_objects( {'object_refs': [params['feature_set_ref']]} )['data'][0] feature_set = ret['data'] feature_set_name = ret['info'][1] feature_ids = set(feature_set['elements'].keys()) filtered_matrix_ref = self._filter_expression_matrix( params['expression_matrix_ref'], feature_ids, params['workspace_name'], params['filtered_expression_matrix_suffix']) objects_created = [{'ref': filtered_matrix_ref, 'description': 'Filtered ExpressionMatrix Object'}] message = "Filtered Expression Matrix based of the {} feature ids present in {}"\ .format(len(feature_ids), feature_set_name) report_params = {'message': message, 'workspace_name': params['workspace_name'], 'objects_created': objects_created, 'report_object_name': 'kb_FeatureSetUtils_report_' + str(uuid.uuid4())} kbase_report_client = KBaseReport(self.callback_url) output = kbase_report_client.create_extended_report(report_params) return {'filtered_expression_matrix_ref': filtered_matrix_ref, 'report_name': output['name'], 'report_ref': output['ref']} def build_feature_set(self, params): self.validate_params(params, {'output_feature_set', 'workspace_name', }, {'genome', 'feature_ids', 'feature_ids_custom', 'base_feature_sets', 'description'}) feature_sources = ('feature_ids', 'feature_ids_custom', 'base_feature_sets') if not any([params.get(x) for x in feature_sources]): raise ValueError("You must supply at least one feature source: {}".format( ", ".join(feature_sources))) workspace_id = self.dfu.ws_name_to_id(params['workspace_name']) new_feature_set = self._build_fs_obj(params) save_object_params = { 'id': workspace_id, 'objects': [{'type': 'KBaseCollections.FeatureSet', 'data': new_feature_set, 'name': params['output_feature_set']}]} dfu_oi = self.dfu.save_objects(save_object_params)[0] feature_set_obj_ref = '{}/{}/{}'.format(dfu_oi[6], dfu_oi[0], dfu_oi[4]) objects_created = [{'ref': feature_set_obj_ref, 'description': 'Feature Set'}] message = 'A new feature set containing {} features was created.'.format( len(new_feature_set['elements'])) report_params = {'message': message, 'workspace_name': params['workspace_name'], 'objects_created': objects_created, 'report_object_name': 'kb_FeatureSetUtils_report_' + str(uuid.uuid4())} kbase_report_client = KBaseReport(self.callback_url) output = kbase_report_client.create_extended_report(report_params) return {'feature_set_ref': feature_set_obj_ref, 'report_name': output['name'], 'report_ref': output['ref']}
class FeatureSetDownload: def __init__(self, config): self.cfg = config self.scratch = config['scratch'] self.gsu = GenomeSearchUtil(os.environ['SDK_CALLBACK_URL']) self.dfu = DataFileUtil(os.environ['SDK_CALLBACK_URL']) self.ws = Workspace(config["workspace-url"]) @staticmethod def validate_params(params, expected={"workspace_name", "featureset_name"}): expected = set(expected) pkeys = set(params) if expected - pkeys: raise ValueError("Required keys {} not in supplied parameters" .format(", ".join(expected - pkeys))) def to_tsv(self, params): working_dir = os.path.join(self.scratch, 'featureset-download-'+str(uuid.uuid4())) os.makedirs(working_dir) header = ['Feature Id', 'Aliases', 'Genome', 'Type', 'Function'] fs_name, fs_dicts = self.make_featureset_dict(params['featureset_ref']) files = {'file_path': "{}/{}.tsv".format(working_dir, fs_name)} writer = csv.DictWriter(open(files['file_path'], 'w'), header, delimiter='\t', lineterminator='\n') writer.writeheader() for feat in fs_dicts: writer.writerow(feat) return fs_name, files def make_featureset_dict(self, fs_ref): features = [] ret = self.dfu.get_objects({'object_refs': [fs_ref]})['data'][0] feat_set = ret['data'] fs_name = ret['info'][1] feat_by_genome = defaultdict(list) for k, v in feat_set['elements'].items(): feat_by_genome[v[0]].append(k) for genome, fids in feat_by_genome.items(): genome_name = self.ws.get_object_info3({'objects': [{'ref': genome}]})['infos'][0][1] res = self.gsu.search({'ref': genome, 'structured_query': {'feature_id': fids}, 'sort_by': [['contig_id', 1]], 'start': 0, 'limit': len(fids) }) for feat in res['features']: features.append({'Feature Id': feat['feature_id'], 'Aliases': ", ".join(sorted(feat['aliases'].keys())), 'Genome': "{} ({})".format(genome_name, genome), 'Type': feat['feature_type'], 'Function': feat['function'] }) return fs_name, features def export(self, files, name, params): export_package_dir = os.path.join(self.scratch, name+str(uuid.uuid4())) os.makedirs(export_package_dir) for file in files: shutil.move(file, os.path.join(export_package_dir, os.path.basename(file))) # package it up and be done package_details = self.dfu.package_for_download({ 'file_path': export_package_dir, 'ws_refs': [params['featureset_ref']] }) return {'shock_id': package_details['shock_id']}