def __init__(self, config): self.ws_url = config["workspace-url"] self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.shock_url = config['shock-url'] self.ws = Workspace(self.ws_url, token=self.token) self.dfu = DataFileUtil(self.callback_url) self.scratch = config['scratch']
def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url, provenance): self.scratch_dir = scratch_dir self.workspace_url = workspace_url self.callback_url = callback_url self.srv_wiz_url = srv_wiz_url self.provenance = provenance # from the provenance, extract out the version to run by exact hash if possible self.my_version = 'release' if len(provenance) > 0: if 'subactions' in provenance[0]: self.my_version = self.get_version_from_subactions('kb_Bowtie2', provenance[0]['subactions']) print('Running kb_Bowtie2 version = ' + self.my_version) self.ws = Workspace(self.workspace_url) self.bowtie2 = Bowtie2Runner(self.scratch_dir) self.parallel_runner = KBParallel(self.callback_url) self.qualimap = kb_QualiMap(self.callback_url)
def get_contig_info(self, ctx, params): """ :param params: instance of type "GetContigInfoParams" -> structure: parameter "ref" of String, parameter "contig_id" of String :returns: instance of type "GetContigInfoResult" -> structure: parameter "contig" of type "contig" (contig_id - identifier of contig feature_count - number of features associated with contig length - the dna sequence length of the contig) -> structure: parameter "contig_id" of String, parameter "feature_count" of Long, parameter "length" of Long """ # ctx is the context object # return variables are: result #BEGIN get_contig_info if 'ref' not in params: raise RuntimeError(f"'ref' argument required for get_contig_info") if 'contig_id' not in params: raise RuntimeError( f"'contig_id' argument required for get_contig_info") contig_id = params['contig_id'] ws = Workspace(self.config['workspace-url'], token=ctx['token']) ama_utils = AMAUtils(ws) params['included_fields'] = ['contig_ids', 'contig_lengths'] data = ama_utils.get_annotated_metagenome_assembly( params)['genomes'][0]['data'] contig_ids = data['contig_ids'] contig_lengths = data['contig_lengths'] for i, c in enumerate(contig_ids): if c == contig_id: length = contig_lengths[i] break if self.msu.status_good: feature_count = self.msu.search_contig_feature_count( ctx["token"], params.get("ref"), contig_id) result = { 'contig': { "contig_id": contig_id, "length": length, "feature_count": feature_count } } else: result = { 'contig': { "contig_id": contig_id, "length": length, "feature_count": 0 } } #END get_contig_info # At some point might do deeper type checking... if not isinstance(result, dict): raise ValueError('Method get_contig_info return value ' + 'result is not type dict as required.') # return the results return [result]
def __init__(self, config): self.ws_url = config["workspace-url"] self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.shock_url = config['shock-url'] self.df = DataFileUtil(self.callback_url) self.gsu = GenomeSearchUtil(self.callback_url) self.ws = Workspace(self.ws_url, token=self.token) self.scratch = config['scratch']
def setUpClass(cls): cls.token = environ.get('KB_AUTH_TOKEN', None) cls.callbackURL = environ.get('SDK_CALLBACK_URL') config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('ExpressionUtils'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(cls.token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': cls.token, 'user_id': user_id, 'provenance': [{ 'service': 'ExpressionUtils', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.shockURL = cls.cfg['shock-url'] cls.wsURL = cls.cfg['workspace-url'] cls.service_wizard_url = cls.cfg['srv-wiz-url'] cls.wsClient = workspaceService(cls.wsURL) cls.ws = Workspace(cls.wsURL, token=cls.token) cls.hs = HandleService(url=cls.cfg['handle-service-url'], token=cls.token) # create workspace wssuffix = int(time.time() * 1000) wsname = "test_expression_" + str(wssuffix) cls.wsinfo = cls.wsClient.create_workspace({'workspace': wsname}) print('created workspace ' + cls.getWsName()) cls.serviceImpl = ExpressionUtils(cls.cfg) cls.readUtils = ReadsUtils(cls.callbackURL) cls.dfu = DataFileUtil(cls.callbackURL, service_ver='dev') cls.dfu.ws_name_to_id(wsname) cls.assemblyUtil = AssemblyUtil(cls.callbackURL) cls.gfu = GenomeFileUtil(cls.callbackURL) cls.gaAPI = GenomeAnnotationAPI(cls.service_wizard_url) cls.rau = ReadsAlignmentUtils(cls.callbackURL) cls.scratch = cls.cfg['scratch'] cls.staged = {} cls.nodes_to_delete = [] cls.handles_to_delete = [] cls.setupTestData()
def list_data(self, ctx, params): ''' ''' token = self._extract_token(ctx) if 'workspaces' not in params: raise ValueError( 'missing required field "workspaces" in parameters to list_data' ) if not isinstance(params['workspaces'], list): raise ValueError('"workspaces" field must be a list') workspaces = params['workspaces'] include_metadata = params.get('include_metadata', 0) ws = Workspace(self.ws_url, token=token) ws_info_list = [] if len(workspaces) == 1: workspace = workspaces[0] list_params = {} if str(workspace).isdigit(): list_params['id'] = int(workspace) else: list_params['workspace'] = str(workspace) ws_info_list.append(ws.get_workspace_info(list_params)) else: ws_map = {key: True for key in workspaces} for ws_info in ws.list_workspace_info({'perm': 'r'}): if ws_info[1] in ws_map or str(ws_info[0]) in ws_map: ws_info_list.append(ws_info) data = [] dp_list_filter = {'include_metadata': include_metadata} data_palette_refs = {} for ws_info in ws_info_list: dp = DataPalette(None, ws_info=ws_info, ws=ws) data = data + dp.list(dp_list_filter) dp_ref = dp._get_root_data_palette_ref() if dp_ref: data_palette_refs[str(ws_info[0])] = dp_ref data = self._remove_duplicate_data(data) return {'data': data, 'data_palette_refs': data_palette_refs}
def check_object_cache(self, ref, search_object, info_included, index_dir, object_suffix, debug): ws = Workspace(self.ws_url, token=self.token) info = ws.get_object_info3({"objects": [{"ref": ref}]})['infos'][0] inner_chsum = info[8] index_file = os.path.join(index_dir, inner_chsum + object_suffix + ".tsv.gz") if not os.path.isfile(index_file): if debug: print(" Loading WS object...") t1 = time.time() included = self.build_info_included(search_object, info_included) object = ws.get_objects2({'objects': [{'ref': ref, 'included': included}]})['data'][0]['data'] self.save_object_tsv(object[search_object], inner_chsum, info_included, index_dir, object_suffix) if debug: print(" (time=" + str(time.time() - t1) + ")") return inner_chsum
def __init__(self, config): #BEGIN_CONSTRUCTOR self.config = config self.scratch = config['scratch'] self.callback_url = os.environ['SDK_CALLBACK_URL'] self.ws_url = config['workspace-url'] self.ws_client = Workspace(self.ws_url) self.dfu = DataFileUtil(self.callback_url) self.demu = GenDiffExprMatrix(config) #END_CONSTRUCTOR pass
def __init__(self, config): #BEGIN_CONSTRUCTOR self.workspaceURL = config['workspace-url'] self.ws = Workspace(self.workspaceURL) self.shockURL = config['shock-url'] self.logger = logging.getLogger() log_handler = logging.StreamHandler() log_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")) self.logger.addHandler(log_handler) #END_CONSTRUCTOR pass
def __init__(self, config): self.ws_url = config.workspaceURL self.handle_url = config.handleURL self.shock_url = config.shockURL self.sw_url = config.srvWizURL self.token = config.token self.auth_service_url = config.authServiceUrl self.callback_url = config.callbackURL self.ws = Workspace(self.ws_url, token=self.token) self.auth_client = _KBaseAuth(self.auth_service_url) self.dfu = DataFileUtil(self.callback_url)
def _put_cached_index(self, assembly_info, index_files_basename, output_dir, ws_for_cache): if not ws_for_cache: print('WARNING: bowtie2 index cannot be cached because "ws_for_cache" field not set') return False try: dfu = DataFileUtil(self.callback_url) result = dfu.file_to_shock({'file_path': output_dir, 'make_handle': 1, 'pack': 'targz'}) bowtie2_index = {'handle': result['handle'], 'size': result['size'], 'assembly_ref': assembly_info['ref'], 'index_files_basename': index_files_basename} ws = Workspace(self.ws_url) save_params = {'objects': [{'hidden': 1, 'provenance': self.provenance, 'name': os.path.basename(output_dir), 'data': bowtie2_index, 'type': 'KBaseRNASeq.Bowtie2IndexV2' }] } if ws_for_cache.strip().isdigit(): save_params['id'] = int(ws_for_cache) else: save_params['workspace'] = ws_for_cache.strip() save_result = ws.save_objects(save_params) print('Bowtie2IndexV2 cached to: ') pprint(save_result[0]) return True except Exception: # if we fail in saving the cached object, don't worry print('WARNING: exception encountered when trying to cache the index files:') print(traceback.format_exc()) print('END WARNING: exception encountered when trying to cache the index files') return False
def __init__(self, config, logger=None): self.config = config self.logger = logger self.callback_url = os.environ['SDK_CALLBACK_URL'] self.scratch = os.path.join(config['scratch'], 'DEM_' + str(uuid.uuid4())) self.ws_url = config['workspace-url'] self.ws_client = Workspace(self.ws_url) self.fv = KBaseFeatureValues(self.callback_url) self.dfu = DataFileUtil(self.callback_url) self.setAPI = SetAPI(self.callback_url) self.gsu = GenomeSearchUtil(self.callback_url) self._mkdir_p(self.scratch)
def __init__(self, ws_name_or_id, ws_url=None, token=None, ws_info=None, ws=None): if ws: self.ws = ws else: if ws_url is None: raise ValueError('ws_url was not defined') if token is None: print('DataPalette warning: token was not set') self.ws = Workspace(ws_url, token=token) if ws_info: if ws_name_or_id: raise ValueError("Either ws_name_or_id or ws_info should be set") self.ws_info = WorkspaceInfo(ws_info) else: if str(ws_name_or_id).isdigit(): self.ws_info = WorkspaceInfo(self.ws.get_workspace_info({'id': int(ws_name_or_id)})) else: self.ws_info = WorkspaceInfo(self.ws.get_workspace_info({ 'workspace': str(ws_name_or_id) })) self.palette_ref = None
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('kb_deseq'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'kb_deseq', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = workspaceService(cls.wsURL) cls.ws = Workspace(cls.wsURL, token=token) cls.serviceImpl = kb_deseq(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] cls.gfu = GenomeFileUtil(cls.callback_url) cls.dfu = DataFileUtil(cls.callback_url, service_ver='dev') cls.ru = ReadsUtils(cls.callback_url) cls.rau = ReadsAlignmentUtils(cls.callback_url, service_ver='dev') cls.stringtie = kb_stringtie(cls.callback_url, service_ver='dev') cls.eu = ExpressionUtils(cls.callback_url, service_ver='dev') cls.deseq_runner = DESeqUtil(cls.cfg) suffix = int(time.time() * 1000) cls.wsName = "test_kb_stringtie_" + str(suffix) cls.wsClient.create_workspace({'workspace': cls.wsName}) cls.dfu.ws_name_to_id(cls.wsName) cls.prepare_data()
def __init__(self, config): self.ws_url = config["workspace-url"] self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.shock_url = config['shock-url'] self.scratch = config['scratch'] self.srv_wiz_url = config['srv-wiz-url'] self.ws = Workspace(self.ws_url, token=self.token) self.bt = kb_Bowtie2(self.callback_url) self.rau = ReadsAlignmentUtils(self.callback_url) self.qualimap = kb_QualiMap(self.callback_url) self.ru = ReadsUtils(self.callback_url) self.dfu = DataFileUtil(self.callback_url) self.set_client = SetAPI(self.srv_wiz_url)
def test_build_hisat2_index_from_assembly_ok(self): manager = Hisat2IndexManager(self.wsURL, self.callback_url, self.scratch) ws = Workspace(self.wsURL) genome_obj_info = ws.get_objects2({ 'objects': [{'ref': self.genome_ref}], 'no_data': 1 }) # get the list of genome refs from the returned info. # if there are no refs (or something funky with the return), this will be an empty list. # this WILL fail if data is an empty list. But it shouldn't be, and we know because # we have a real genome reference, or get_objects2 would fail. genome_obj_refs = genome_obj_info.get('data', [{}])[0].get('refs', []) # see which of those are of an appropriate type (ContigSet or Assembly), if any. assembly_ref = list() ref_params = [{'ref': x} for x in genome_obj_refs] ref_info = ws.get_object_info3({'objects': ref_params}) for idx, info in enumerate(ref_info.get('infos')): if "KBaseGenomeAnnotations.Assembly" in info[2] or "KBaseGenomes.ContigSet" in info[2]: assembly_ref.append(";".join(ref_info.get('paths')[idx])) assembly_ref = assembly_ref[0] idx_prefix = manager.get_hisat2_index(assembly_ref) self.assertIn("kb_hisat2_idx", idx_prefix)
def fetch_fasta_from_genome(genome_ref, ws_url, callback_url): """ Returns an assembly or contigset as FASTA. """ if not check_ref_type(genome_ref, ['KBaseGenomes.Genome'], ws_url): raise ValueError( "The given genome_ref {} is not a KBaseGenomes.Genome type!") # test if genome references an assembly type # do get_objects2 without data. get list of refs ws = Workspace(ws_url) genome_obj_info = ws.get_objects2({ 'objects': [{ 'ref': genome_ref }], 'no_data': 1 }) # get the list of genome refs from the returned info. # if there are no refs (or something funky with the return), this will be an empty list. # this WILL fail if data is an empty list. But it shouldn't be, and we know because # we have a real genome reference, or get_objects2 would fail. genome_obj_refs = genome_obj_info.get('data', [{}])[0].get('refs', []) # see which of those are of an appropriate type (ContigSet or Assembly), if any. assembly_ref = list() ref_params = [{'ref': x} for x in genome_obj_refs] ref_info = ws.get_object_info3({'objects': ref_params}) for idx, info in enumerate(ref_info.get('infos')): if "KBaseGenomeAnnotations.Assembly" in info[ 2] or "KBaseGenomes.ContigSet" in info[2]: assembly_ref.append(";".join(ref_info.get('paths')[idx])) if len(assembly_ref) == 1: return fetch_fasta_from_assembly(assembly_ref[0], ws_url, callback_url) else: raise ValueError( "Multiple assemblies found associated with the given genome ref {}! " "Unable to continue.")
def setUpClass(cls): token = os.environ.get('KB_AUTH_TOKEN', None) config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('TreeUtils'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'TreeUtils', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = Workspace(cls.wsURL) cls.serviceImpl = TreeUtils(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] cls.dfu = DataFileUtil(cls.callback_url) suffix = int(time.time() * 1000) cls.wsName = "test_CompoundSetUtils_" + str(suffix) ret = cls.wsClient.create_workspace({'workspace': cls.wsName}) cls.wsId = ret[0] cls.tree_obj = json.load(open('data/tree.json')) info = cls.dfu.save_objects({ "id": cls.wsId, "objects": [{ "type": "KBaseTrees.Tree", "data": cls.tree_obj, "name": "test_tree" }] })[0] cls.tree_ref = "%s/%s/%s" % (info[6], info[0], info[4])
def setUpClass(cls): cls.token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('kb_quast'): cls.cfg[nameval[0]] = nameval[1] authServiceUrl = cls.cfg.get( 'auth-service-url', "https://kbase.us/services/authorization/Sessions/Login") auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(cls.token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': cls.token, 'user_id': user_id, 'provenance': [{ 'service': 'kb_quast', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.shockURL = cls.cfg['shock-url'] cls.ws = Workspace(cls.cfg['workspace-url'], token=cls.token) cls.hs = HandleService(url=cls.cfg['handle-service-url'], token=cls.token) cls.au = AssemblyUtil(os.environ['SDK_CALLBACK_URL']) cls.impl = kb_quast(cls.cfg) cls.scratch = cls.cfg['scratch'] shutil.rmtree(cls.scratch) os.mkdir(cls.scratch) suffix = int(time.time() * 1000) wsName = "test_ReadsUtils_" + str(suffix) cls.ws_info = cls.ws.create_workspace({'workspace': wsName}) cls.dfu = DataFileUtil(os.environ['SDK_CALLBACK_URL']) cls.staged = {} cls.nodes_to_delete = [] cls.handles_to_delete = [] # cls.setupTestData() print('\n\n=============== Starting tests ==================')
def __init__(self, config, services, logger=None): self.config = config self.logger = logger self.callback_url = os.environ['SDK_CALLBACK_URL'] self.scratch = os.path.join(config['scratch'], 'cuffdiff_merge_' + str(uuid.uuid4())) self.ws_url = config['workspace-url'] self.services = services self.ws_client = Workspace(self.services['workspace_service_url']) self.dfu = DataFileUtil(self.callback_url) self.gfu = GenomeFileUtil(self.callback_url) self.rau = ReadsAlignmentUtils(self.callback_url) self.eu = ExpressionUtils(self.callback_url) self.deu = DifferentialExpressionUtils(self.callback_url) self.cuffmerge_runner = CuffMerge(config, logger) self.num_threads = mp.cpu_count() handler_utils._mkdir_p(self.scratch)
def setUpClass(cls): token = os.environ.get('KB_AUTH_TOKEN', None) config_file = os.environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('NarrativeService'): cls.cfg[nameval[0]] = nameval[1] authServiceUrl = cls.cfg.get( 'auth-service-url', "https://kbase.us/services/authorization/Sessions/Login") auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'NarrativeService', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) # Set up test Workspace cls.ws_url = cls.cfg['workspace-url'] cls.ws_client = Workspace(cls.ws_url, token=token) cls.test_ws_info = cls._make_workspace() cls.test_ws_name = cls.test_ws_info[1] # Build test data stuff. # 1. Make a fake reads object - test for report (should be null) cls.fake_reads_upa = cls._make_fake_reads(cls.test_ws_name, "FakeReads") # 2. Make a report, give it that reads object - test for report, should find it cls.fake_report_upa = cls._make_fake_report(cls.fake_reads_upa, cls.test_ws_name) cls.service_impl = NarrativeService(cls.cfg)
def __init__(self, config, provenance): self.workspace_url = config['workspace-url'] self.callback_url = os.environ['SDK_CALLBACK_URL'] self.token = os.environ['KB_AUTH_TOKEN'] self.provenance = provenance self.scratch = os.path.join(config['scratch'], str(uuid.uuid4())) _mkdir_p(self.scratch) if 'shock-url' in config: self.shock_url = config['shock-url'] if 'handle-service-url' in config: self.handle_url = config['handle-service-url'] self.ws_client = Workspace(self.workspace_url, token=self.token) self.kbr = KBaseReport(self.callback_url) self.genome_count_dir = os.path.join(self.scratch, str(uuid.uuid4())) _mkdir_p(self.genome_count_dir)
def setUpClass(cls): cls.test_dir = os.path.dirname(os.path.abspath(__file__)) cls.token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) cls.cfg['token'] = cls.token for nameval in config.items('NarrativeIndexer'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token # authServiceUrl = cls.cfg['auth-service-url'] # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error # cls.ctx = MethodContext(None) # cls.ctx.update({'token': cls.token, # 'user_id': user_id, # 'provenance': [ # {'service': 'NarrativeIndexer', # 'method': 'please_never_use_it_in_production', # 'method_params': [] # }], # 'authenticated': 1}) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = Workspace(cls.wsURL) # Kafka cls.kserver = cls.cfg.get('kafka-server', 'kafka') cls.admin = AdminClient({'bootstrap.servers': cls.kserver}) # create a random topic cls.topic = 'testevents-%d' % (randint(1, 10000)) cls.cfg['kafka-topic'] = cls.topic cls.admin.delete_topics([cls.topic]) new_topics = [ NewTopic(cls.topic, num_partitions=1, replication_factor=1) ] cls.admin.create_topics(new_topics) # Create an instance cls.serviceImpl = NarrativeIndexer(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] cls.producer = Producer({'bootstrap.servers': cls.kserver})
def find_object_report(self, ctx, params): """ find_object_report searches for a referencing report. All reports (if made properly) reference the objects that were created at the same time. To find that report, we search back up the reference chain. If the object in question was a copy, then there is no referencing report. We might still want to see it, though! If the original object is accessible, we'll continue the search from that object, and mark the associated object UPA in the return value. :param params: instance of type "FindObjectReportParams" (This first version only takes a single UPA as input and attempts to find the report that made it.) -> structure: parameter "upa" of String :returns: instance of type "FindObjectReportOutput" (report_upas: the UPAs for the report object. If empty list, then no report is available. But there might be more than one... object_upa: the UPA for the object that this report references. If the originally passed object was copied, then this will be the source of that copy that has a referencing report. copy_inaccessible: 1 if this object was copied, and the user can't see the source, so no report's available. error: if an error occurred while looking up (found an unavailable copy, or the report is not accessible), this will have a sensible string, more or less. Optional.) -> structure: parameter "report_upas" of list of String, parameter "object_upa" of String, parameter "copy_inaccessible" of type "boolean" (@range [0,1]), parameter "error" of String """ # ctx is the context object # return variables are: returnVal #BEGIN find_object_report report_fetcher = ReportFetcher( Workspace(self.workspaceURL, token=ctx["token"])) returnVal = report_fetcher.find_report_from_object(params['upa']) #END find_object_report # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method find_object_report return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
def remove_narratorial(self, ctx, params): """ :param params: instance of type "RemoveNarratorialParams" -> structure: parameter "ws" of String :returns: instance of type "RemoveNarratorialResult" -> structure: """ # ctx is the context object # return variables are: returnVal #BEGIN remove_narratorial if 'ws' not in params: raise ValueError( '"ws" field indicating WS name or id is required.') ws = Workspace(self.workspaceURL, token=ctx["token"]) nu = NarratorialUtils() nu.remove_narratorial(params['ws'], ws) returnVal = {} #END remove_narratorial # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method remove_narratorial return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('STAR'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'STAR', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.wsClient = workspaceService(cls.wsURL) cls.ws = Workspace(cls.wsURL, token=token) cls.serviceImpl = STAR(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL']
def __init__(self, scratch_dir, workspace_url, callback_url, srv_wiz_url, context): self.scratch_dir = scratch_dir self.workspace_url = workspace_url self.callback_url = callback_url self.srv_wiz_url = srv_wiz_url self.provenance = context.provenance() self.job_id = None rpc_context = context.get('rpc_context') if rpc_context is not None and hasattr(rpc_context, 'get'): current_call_ctx = rpc_context.get('call_stack') if len(current_call_ctx): self.job_id = current_call_ctx[0].get('job_id') # from the provenance, extract out the version to run by exact hash if possible self.my_version = 'release' if len(self.provenance) > 0: if 'subactions' in self.provenance[0]: self.my_version = self.get_version_from_subactions( 'kb_BatchApp', self.provenance[0]['subactions']) print('Running kb_BatchApp version = ' + self.my_version) self.ws = Workspace(self.workspace_url) self.parallel_runner = KBParallel(self.callback_url, service_ver='dev')
class WorkspaceAdminUtil: def __init__(self, config): wsurl = config.get('workspace-url') self.atoken = config.get('ws-admin-token') self.noadmin = False if self.atoken is None or self.atoken == '': self.noadmin = True self.atoken = config['token'] self.ws = Workspace(wsurl, token=self.atoken) def list_objects(self, params): """ Provide something that acts like a standard listObjects """ if self.noadmin: return self.ws.list_objects(params) return self.ws.administer({'command': 'listObjects', 'params': params}) def get_objects2(self, params): """ Provide something that acts like a standard getObjects """ if self.noadmin: return self.ws.get_objects2(params) return self.ws.administer({'command': 'getObjects', 'params': params}) def get_workspace_info(self, params): """ Provide something that acts like a standard getObjects """ if self.noadmin: return self.ws.get_workspace_info(params) return self.ws.administer({ 'command': 'getWorkspaceInfo', 'params': params })
class DiffExprMatrixUtils: """ Constains a set of functions for expression levels calculations. """ PARAM_IN_WS_NAME = 'workspace_name' PARAM_IN_OBJ_NAME = 'output_obj_name' PARAM_IN_DIFFEXPMATSET_REF = 'diffExprMatrixSet_ref' def __init__(self, config, logger=None): self.config = config self.logger = logger self.scratch = os.path.join(config['scratch'], 'DEM_' + str(uuid.uuid4())) self.ws_url = config['workspace-url'] self._mkdir_p(self.scratch) pass def _mkdir_p(self, path): """ _mkdir_p: make directory for given path """ if not path: return try: os.makedirs(path) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise def process_params(self, params): """ validates params passed to gen expression matrix method """ for p in [self.PARAM_IN_DIFFEXPMATSET_REF]: if p not in params: raise ValueError('"{}" parameter is required, but missing'.format(p)) def get_expressionset_data(self, expressionset_ref): expr_set_obj = self.ws_client.get_objects2( {'objects': [{'ref': expressionset_ref}]})['data'][0] expr_set_obj_type = expr_set_obj.get('info')[2] expr_set_data = dict() expr_set_data['ws_name'] = expr_set_obj.get('info')[7] expr_set_data['obj_name'] = expr_set_obj.get('info')[1] if re.match('KBaseRNASeq.RNASeqExpressionSet-\d.\d', expr_set_obj_type): expr_set_data['genome_ref'] = expr_set_obj['data']['genome_id'] expr_obj_refs = list() for expr_obj in expr_set_obj['data']['mapped_expression_ids']: expr_obj_refs.append(expr_obj.values()[0]) expr_set_data['expr_obj_refs'] = expr_obj_refs elif re.match('KBaseSets.ExpressionSet-\d.\d', expr_set_obj_type): items = expr_set_obj.get('data').get('items') expr_obj_refs = list() for item in items: expr_obj_refs.append(item['ref']) expr_obj = self.ws_client.get_objects2( {'objects': [{'ref': expr_obj_refs[0]}]})['data'][0] expr_set_data['genome_ref'] = expr_obj['data']['genome_id'] expr_set_data['expr_obj_refs'] = expr_obj_refs else: raise TypeError(self.PARAM_IN_EXPSET_REF + ' should be of type ' + 'KBaseRNASeq.RNASeqExpressionSet ' + 'or KBaseSets.ExpressionSet') return expr_set_data def get_diffexpr_matrixset(self, params, token): self.ws_client = Workspace(self.ws_url, token=token) col_names = {'gene_id': 'gene', 'log2_fold_change': 'log2fc_f', 'p_value': 'p_value_f', 'q_value': 'q_value'} json_fields = ['log2fc_f', 'p_value_f', 'q_value'] self.process_params(params) diffexprmatset_list = list() diffexprmatset_ref = params.get(self.PARAM_IN_DIFFEXPMATSET_REF) diffexprmatset_obj = self.ws_client.get_objects2( {'objects': [{'ref': diffexprmatset_ref}]})['data'][0] items = diffexprmatset_obj.get('data').get('items') diffexprmat_refs = list() for item in items: diffexprmat_refs.append(item['ref']) self.logger.info('DiffExprMatrix ref: ' + item['ref']) for diffexprmat_ref in diffexprmat_refs: diffexprmat_dict = dict() diffexprmat_obj = self.ws_client.get_objects2( {'objects': [{'ref': diffexprmat_ref}]})['data'][0] diffexprmat = diffexprmat_obj.get('data') diffexprmat_dict['condition_1'] = diffexprmat.get('condition_mapping').keys()[0] diffexprmat_dict['condition_2'] = diffexprmat.get('condition_mapping').values()[0] voldata = list() data = diffexprmat.get('data') for row_index, row_id in enumerate(data.get('row_ids')): row_data = dict() row_data['gene'] = row_id values = data.get('values')[row_index] for col_index in range(len(values)): row_data[json_fields[col_index]] = values[col_index] voldata.append(row_data) diffexprmat_dict['voldata'] = voldata diffexprmatset_list.append(diffexprmat_dict) return diffexprmatset_list
class NarrativeManager: KB_CELL = 'kb-cell' KB_TYPE = 'type' KB_APP_CELL = 'kb_app' KB_FUNCTION_CELL = 'function_input' KB_OUTPUT_CELL = 'function_output' KB_ERROR_CELL = 'kb_error' KB_CODE_CELL = 'kb_code' KB_STATE = 'widget_state' DEBUG = False DATA_PALETTES_TYPES = DataPaletteTypes(False) def __init__(self, config, ctx, set_api_cache, dps_cache): self.narrativeMethodStoreURL = config['narrative-method-store'] self.set_api_cache = set_api_cache # DynamicServiceCache type self.dps_cache = dps_cache # DynamicServiceCache type self.token = ctx["token"] self.user_id = ctx["user_id"] self.ws = Workspace(config['workspace-url'], token=self.token) self.intro_md_file = config['intro-markdown-file'] # We switch DPs on only for internal Continuous Integration environment for now: if config['kbase-endpoint'].startswith("https://ci.kbase.us/"): self.DATA_PALETTES_TYPES = DataPaletteTypes(True) def list_objects_with_sets(self, ws_id=None, ws_name=None, workspaces=None, types=None, include_metadata=0): if not workspaces: if (not ws_id) and (not ws_name): raise ValueError( "One and only one of 'ws_id', 'ws_name', 'workspaces' " + "parameters should be set") workspaces = [self._get_workspace_name_or_id(ws_id, ws_name)] return self._list_objects_with_sets(workspaces, types, include_metadata) def _list_objects_with_sets(self, workspaces, types, include_metadata): type_map = None if types is not None: type_map = {key: True for key in types} processed_refs = {} data = [] if self.DEBUG: print("NarrativeManager._list_objects_with_sets: processing sets") t1 = time.time() set_ret = self.set_api_cache.call_method( "list_sets", [{ 'workspaces': workspaces, 'include_set_item_info': 1, 'include_raw_data_palettes': 1, 'include_metadata': include_metadata }], self.token) sets = set_ret['sets'] dp_data = set_ret.get('raw_data_palettes') dp_refs = set_ret.get('raw_data_palette_refs') for set_info in sets: # Process target_set_items = [] for set_item in set_info['items']: target_set_items.append(set_item['info']) if self._check_info_type(set_info['info'], type_map): data_item = { 'object_info': set_info['info'], 'set_items': { 'set_items_info': target_set_items } } data.append(data_item) processed_refs[set_info['ref']] = data_item if self.DEBUG: print(" (time=" + str(time.time() - t1) + ")") if self.DEBUG: print("NarrativeManager._list_objects_with_sets: loading ws_info") t2 = time.time() ws_info_list = [] #for ws in workspaces: if len(workspaces) == 1: ws = workspaces[0] ws_id = None ws_name = None if str(ws).isdigit(): ws_id = int(ws) else: ws_name = str(ws) ws_info_list.append( self.ws.get_workspace_info({ "id": ws_id, "workspace": ws_name })) else: ws_map = {key: True for key in workspaces} for ws_info in self.ws.list_workspace_info({'perm': 'r'}): if ws_info[1] in ws_map or str(ws_info[0]) in ws_map: ws_info_list.append(ws_info) if self.DEBUG: print(" (time=" + str(time.time() - t2) + ")") if self.DEBUG: print( "NarrativeManager._list_objects_with_sets: loading workspace objects" ) t3 = time.time() for info in WorkspaceListObjectsIterator( self.ws, ws_info_list=ws_info_list, list_objects_params={'includeMetadata': include_metadata}): item_ref = str(info[6]) + '/' + str(info[0]) + '/' + str(info[4]) if item_ref not in processed_refs and self._check_info_type( info, type_map): data_item = {'object_info': info} data.append(data_item) processed_refs[item_ref] = data_item if self.DEBUG: print(" (time=" + str(time.time() - t3) + ")") if self.DEBUG: print( "NarrativeManager._list_objects_with_sets: processing DataPalettes" ) t5 = time.time() if dp_data is None or dp_refs is None: dps = self.dps_cache dp_ret = dps.call_method("list_data", [{ 'workspaces': workspaces, 'include_metadata': include_metadata }], self.token) dp_data = dp_ret['data'] dp_refs = dp_ret['data_palette_refs'] for item in dp_data: ref = item['ref'] if self._check_info_type(item['info'], type_map): data_item = None if ref in processed_refs: data_item = processed_refs[ref] else: data_item = {'object_info': item['info']} processed_refs[ref] = data_item data.append(data_item) dp_info = {} if 'dp_ref' in item: dp_info['ref'] = item['dp_ref'] if 'dp_refs' in item: dp_info['refs'] = item['dp_refs'] data_item['dp_info'] = dp_info if self.DEBUG: print(" (time=" + str(time.time() - t5) + ")") return {"data": data, 'data_palette_refs': dp_refs} def _check_info_type(self, info, type_map): if type_map is None: return True obj_type = info[2].split('-')[0] return type_map.get(obj_type, False) def copy_narrative(self, newName, workspaceRef, workspaceId): time_ms = int(round(time.time() * 1000)) newWsName = self.user_id + ':narrative_' + str(time_ms) # add the 'narrative' field to newWsMeta later. newWsMeta = {"is_temporary": "false", "narrative_nice_name": newName} # start with getting the existing narrative object. currentNarrative = self.ws.get_objects([{'ref': workspaceRef}])[0] if not workspaceId: workspaceId = currentNarrative['info'][6] # Let's prepare exceptions for clone the workspace. # 1) currentNarrative object: excluded_list = [{'objid': currentNarrative['info'][0]}] # 2) let's exclude objects of types under DataPalette handling: data_palette_type = "DataPalette.DataPalette" excluded_types = [data_palette_type] excluded_types.extend(self.DATA_PALETTES_TYPES.keys()) add_to_palette_list = [] dp_detected = False for obj_type in excluded_types: list_objects_params = {'type': obj_type} if obj_type == data_palette_type: list_objects_params['showHidden'] = 1 for info in WorkspaceListObjectsIterator( self.ws, ws_id=workspaceId, list_objects_params=list_objects_params): if obj_type == data_palette_type: dp_detected = True else: add_to_palette_list.append({ 'ref': str(info[6]) + '/' + str(info[0]) + '/' + str(info[4]) }) excluded_list.append({'objid': info[0]}) # clone the workspace EXCEPT for currentNarrative object + obejcts of DataPalette types: newWsId = self.ws.clone_workspace({ 'wsi': { 'id': workspaceId }, 'workspace': newWsName, 'meta': newWsMeta, 'exclude': excluded_list })[0] try: if dp_detected: self.dps_cache.call_method( "copy_palette", [{ 'from_workspace': str(workspaceId), 'to_workspace': str(newWsId) }], self.token) if len(add_to_palette_list) > 0: # There are objects in source workspace that have type under DataPalette handling # but these objects are physically stored in source workspace rather that saved # in DataPalette object. So they weren't copied by "dps.copy_palette". self.dps_cache.call_method("add_to_palette", [{ 'workspace': str(newWsId), 'new_refs': add_to_palette_list }], self.token) # update the ref inside the narrative object and the new workspace metadata. newNarMetadata = currentNarrative['info'][10] newNarMetadata['name'] = newName newNarMetadata['ws_name'] = newWsName newNarMetadata['job_info'] = json.dumps({ 'queue_time': 0, 'running': 0, 'completed': 0, 'run_time': 0, 'error': 0 }) currentNarrative['data']['metadata']['name'] = newName currentNarrative['data']['metadata']['ws_name'] = newWsName currentNarrative['data']['metadata']['job_ids'] = { 'apps': [], 'methods': [], 'job_usage': { 'queue_time': 0, 'run_time': 0 } } # save the shiny new Narrative so it's at version 1 newNarInfo = self.ws.save_objects({ 'id': newWsId, 'objects': [{ 'type': currentNarrative['info'][2], 'data': currentNarrative['data'], 'provenance': currentNarrative['provenance'], 'name': currentNarrative['info'][1], 'meta': newNarMetadata }] }) # now, just update the workspace metadata to point # to the new narrative object newNarId = newNarInfo[0][0] self.ws.alter_workspace_metadata({ 'wsi': { 'id': newWsId }, 'new': { 'narrative': str(newNarId) } }) return {'newWsId': newWsId, 'newNarId': newNarId} except: # let's delete copy of workspace so it's out of the way - it's broken self.ws.delete_workspace({'id': newWsId}) raise # continue raising previous exception def create_new_narrative(self, app, method, appparam, appData, markdown, copydata, importData, includeIntroCell): if app and method: raise ValueError( "Must provide no more than one of the app or method params") if (not importData) and copydata: importData = copydata.split(';') if (not appData) and appparam: appData = [] for tmp_item in appparam.split(';'): tmp_tuple = tmp_item.split(',') step_pos = None if tmp_tuple[0]: try: step_pos = int(tmp_tuple[0]) except ValueError: pass appData.append([step_pos, tmp_tuple[1], tmp_tuple[2]]) cells = None if app: cells = [{"app": app}] elif method: cells = [{"method": method}] elif markdown: cells = [{"markdown": markdown}] return self._create_temp_narrative(cells, appData, importData, includeIntroCell) def _get_intro_markdown(self): """ Creates and returns a cell with the introductory text included. """ # Load introductory markdown text with open(self.intro_md_file) as intro_file: intro_md = intro_file.read() return intro_md def _create_temp_narrative(self, cells, parameters, importData, includeIntroCell): # Migration to python of JavaScript class from https://github.com/kbase/kbase-ui/blob/4d31151d13de0278765a69b2b09f3bcf0e832409/src/client/modules/plugins/narrativemanager/modules/narrativeManager.js#L414 narr_id = int(round(time.time() * 1000)) workspaceName = self.user_id + ':narrative_' + str(narr_id) narrativeName = "Narrative." + str(narr_id) ws = self.ws ws_info = ws.create_workspace({ 'workspace': workspaceName, 'description': '' }) newWorkspaceInfo = ServiceUtils.workspaceInfoToObject(ws_info) [narrativeObject, metadataExternal ] = self._fetchNarrativeObjects(workspaceName, cells, parameters, includeIntroCell) objectInfo = ws.save_objects({ 'workspace': workspaceName, 'objects': [{ 'type': 'KBaseNarrative.Narrative', 'data': narrativeObject, 'name': narrativeName, 'meta': metadataExternal, 'provenance': [{ 'script': 'NarrativeManager.py', 'description': 'Created new ' + 'Workspace/Narrative bundle.' }], 'hidden': 0 }] })[0] objectInfo = ServiceUtils.objectInfoToObject(objectInfo) self._completeNewNarrative(newWorkspaceInfo['id'], objectInfo['id'], importData) return {'workspaceInfo': newWorkspaceInfo, 'narrativeInfo': objectInfo} def _fetchNarrativeObjects(self, workspaceName, cells, parameters, includeIntroCell): if not cells: cells = [] # fetchSpecs appSpecIds = [] methodSpecIds = [] specMapping = {'apps': {}, 'methods': {}} for cell in cells: if 'app' in cell: appSpecIds.append(cell['app']) elif 'method' in cell: methodSpecIds.append(cell['method']) nms = NarrativeMethodStore(self.narrativeMethodStoreURL, token=self.token) if len(appSpecIds) > 0: appSpecs = nms.get_app_spec({'ids': appSpecIds}) for spec in appSpecs: spec_id = spec['info']['id'] specMapping['apps'][spec_id] = spec if len(methodSpecIds) > 0: methodSpecs = nms.get_method_spec({'ids': methodSpecIds}) for spec in methodSpecs: spec_id = spec['info']['id'] specMapping['methods'][spec_id] = spec # end of fetchSpecs metadata = { 'job_ids': { 'methods': [], 'apps': [], 'job_usage': { 'queue_time': 0, 'run_time': 0 } }, 'format': 'ipynb', 'creator': self.user_id, 'ws_name': workspaceName, 'name': 'Untitled', 'type': 'KBaseNarrative.Narrative', 'description': '', 'data_dependencies': [] } cellData = self._gatherCellData(cells, specMapping, parameters, includeIntroCell) narrativeObject = { 'nbformat_minor': 0, 'cells': cellData, 'metadata': metadata, 'nbformat': 4 } metadataExternal = {} for key in metadata: value = metadata[key] if isinstance(value, basestring): metadataExternal[key] = value else: metadataExternal[key] = json.dumps(value) return [narrativeObject, metadataExternal] def _gatherCellData(self, cells, specMapping, parameters, includeIntroCell): cell_data = [] if includeIntroCell == 1: cell_data.append({ 'cell_type': 'markdown', 'source': self._get_intro_markdown(), 'metadata': {} }) for cell_pos, cell in enumerate(cells): if 'app' in cell: cell_data.append( self._buildAppCell(len(cell_data), specMapping['apps'][cell['app']], parameters)) elif 'method' in cell: cell_data.append( self._buildMethodCell( len(cell_data), specMapping['methods'][cell['method']], parameters)) elif 'markdown' in cell: cell_data.append({ 'cell_type': 'markdown', 'source': cell['markdown'], 'metadata': {} }) else: raise ValueError("cannot add cell #" + str(cell_pos) + ", unrecognized cell content") return cell_data def _buildAppCell(self, pos, spec, params): cellId = 'kb-cell-' + str(pos) + '-' + str(uuid.uuid4()) cell = { 'cell_type': 'markdown', 'source': "<div id='" + cellId + "'></div>" + "\n<script>" + "$('#" + cellId + "').kbaseNarrativeAppCell({'appSpec' : '" + self._safeJSONStringify(spec) + "', 'cellId' : '" + cellId + "'});" + "</script>", 'metadata': {} } cellInfo = {} widgetState = [] cellInfo[self.KB_TYPE] = self.KB_APP_CELL cellInfo['app'] = spec if params: steps = {} for param in params: stepid = 'step_' + str(param[0]) if stepid not in steps: steps[stepid] = {} steps[stepid]['inputState'] = {} steps[stepid]['inputState'][param[1]] = param[2] state = { 'state': { 'step': steps } } widgetState.append(state) cellInfo[self.KB_STATE] = widgetState cell['metadata'][self.KB_CELL] = cellInfo return cell def _buildMethodCell(self, pos, spec, params): cellId = 'kb-cell-' + str(pos) + '-' + str(uuid.uuid4()) cell = { 'cell_type': 'markdown', 'source': "<div id='" + cellId + "'></div>" + "\n<script>" + "$('#" + cellId + "').kbaseNarrativeMethodCell({'method' : '" + self._safeJSONStringify(spec) + "'});" + "</script>", 'metadata': {} } cellInfo = {'method': spec, 'widget': spec['widgets']['input']} cellInfo[self.KB_TYPE] = self.KB_FUNCTION_CELL widgetState = [] if params: wparams = {} for param in params: wparams[param[1]] = param[2] widgetState.append({'state': wparams}) cellInfo[self.KB_STATE] = widgetState cell['metadata'][self.KB_CELL] = cellInfo return cell def _completeNewNarrative(self, workspaceId, objectId, importData): self.ws.alter_workspace_metadata({ 'wsi': { 'id': workspaceId }, 'new': { 'narrative': str(objectId), 'is_temporary': 'true' } }) # copy_to_narrative: if not importData: return objectsToCopy = [{'ref': x} for x in importData] infoList = self.ws.get_object_info_new({ 'objects': objectsToCopy, 'includeMetadata': 0 }) for item in infoList: objectInfo = ServiceUtils.objectInfoToObject(item) self.copy_object(objectInfo['ref'], workspaceId, None, None, objectInfo) def _safeJSONStringify(self, obj): return json.dumps(self._safeJSONStringifyPrepare(obj)) def _safeJSONStringifyPrepare(self, obj): if isinstance(obj, basestring): return obj.replace("'", "'").replace('"', """) elif isinstance(obj, list): for pos in range(len(obj)): obj[pos] = self._safeJSONStringifyPrepare(obj[pos]) elif isinstance(obj, dict): obj_keys = list(obj.keys()) for key in obj_keys: obj[key] = self._safeJSONStringifyPrepare(obj[key]) else: pass # it's boolean/int/float/None return obj def _get_workspace_name_or_id(self, ws_id, ws_name): ret = ws_name if not ret: ret = str(ws_id) return ret def copy_object(self, ref, target_ws_id, target_ws_name, target_name, src_info): # There should be some logic related to DataPalettes if (not target_ws_id) and (not target_ws_name): raise ValueError("Neither target workspace ID nor name is defined") if not src_info: src_info_tuple = self.ws.get_object_info_new({ 'objects': [{ 'ref': ref }], 'includeMetadata': 0 })[0] src_info = ServiceUtils.objectInfoToObject(src_info_tuple) type_name = src_info['typeModule'] + '.' + src_info['typeName'] type_config = self.DATA_PALETTES_TYPES.get(type_name) if type_config is not None: # Copy with DataPaletteService if target_name: raise ValueError( "'target_name' cannot be defined for DataPalette copy") target_ws_name_or_id = self._get_workspace_name_or_id( target_ws_id, target_ws_name) self.dps_cache.call_method("add_to_palette", [{ 'workspace': target_ws_name_or_id, 'new_refs': [{ 'ref': ref }] }], self.token) return {'info': src_info} else: if not target_name: target_name = src_info['name'] obj_info_tuple = self.ws.copy_object({ 'from': { 'ref': ref }, 'to': { 'wsid': target_ws_id, 'workspace': target_ws_name, 'name': target_name } }) obj_info = ServiceUtils.objectInfoToObject(obj_info_tuple) return {'info': obj_info} def list_available_types(self, workspaces): data = self.list_objects_with_sets(workspaces=workspaces)['data'] type_stat = {} for item in data: info = item['object_info'] obj_type = info[2].split('-')[0] if obj_type in type_stat: type_stat[obj_type] += 1 else: type_stat[obj_type] = 1 return {'type_stat': type_stat}
class MutualInfoUtil: def __init__(self, config): self.ws_url = config["workspace-url"] self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.shock_url = config['shock-url'] self.dfu = DataFileUtil(self.callback_url) self.ws = Workspace(self.ws_url, token=self.token) self.scratch = config['scratch'] def _mkdir_p(self, path): """ _mkdir_p: make directory for given path """ if not path: return try: os.makedirs(path) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise def test_dfu(self): output_directory = self.scratch #output_directory = "/kb/module/test1/" #os.mkdir(output_directory) #self._mkdir_p(output_directory) test_file = os.path.join(output_directory, 'index.html') with open(test_file, 'w') as file: file.write("test!") print("OUTPUT DIR") print(output_directory) print(os.listdir(output_directory)) print("file_to_shock") report_shock_id = self.dfu.file_to_shock({ 'file_path': output_directory, 'pack': 'targz' #'pack': 'zip' }) print(report_shock_id) return def _validate_run_flux_mutual_information_analysis_params(self, params): """ _validate_run_flux_mutual_information_analysis_params: validates params passed to run_flux_mutual_information_analysis method """ log('start validating validate_run_flux_mutual_information_analysis params' ) # check for required parameters for p in ['fbamodel_id', 'compounds', 'media_id', 'workspace_name']: if p not in params: raise ValueError( '"{}" parameter is required, but missing'.format(p)) def _get_file_from_ws(self, ref): try: file_path = self.ws.get_objects2({'objects': [{'ref': ref}]}) file_path = file_path['data'][0] except Exception as e: raise ValueError('Unable to get object from workspace: (' + ref + ')' + str(e)) return file_path def _make_media_files(self, ws_name, base, compounds): """ Build and store media objects for each combination of compound added to the base media. :param base: The base media file :param compounds: the set of compound to test :return: A list of media ids and a matrix with each media combination defined """ ref = ws_name + "/" + base if base.find("/") != -1: ref = base output = self._get_file_from_ws(ref) base_media = output['data'] base = output['info'][1] myuuid = str(uuid.uuid4()) media_ids = [base] new_media_list = [] media_matrix = [[""] + compounds] media_matrix.append([[base] + [0] * len(compounds)]) for n_comp in range(1, len(compounds) + 1): for combo in combinations(compounds, n_comp): new_media_id = base + '_v%s' % len(media_matrix) media_ids.append(new_media_id) media_matrix.append( [new_media_id] + [1 if comp in combo else 0 for comp in compounds]) new_media = deepcopy(base_media) new_media['id'] = new_media_id new_media['name'] = new_media_id for new_comp in combo: new_media['mediacompounds'].append({ 'compound_ref': '48/1/1/compounds/id/%s' % new_comp.split('_')[0], 'concentration': 1.0, 'maxFlux': 1000, 'minFlux': -1000 }) new_media_list.append(new_media) print("Made %s Media Files" % (len(media_ids) - 1)) info = self.ws.save_objects({ 'workspace': ws_name, "objects": [{ "hidden": 1, "type": "KBaseBiochem.Media", "data": media, "name": myuuid + "-" + media['name'] } for media in new_media_list] }) #print(info) return media_ids, media_matrix, myuuid def _run_fba(self, workspace_name, media_id_list, fbamodel_id, myuuid, base_media): print('running fba') fba_tool_obj = fba_tools(self.callback_url, service_ver='dev') new_media_list = [] for media in media_id_list: if media == base_media: new_media_list.append(workspace_name + "/" + media) else: new_media_list.append(workspace_name + "/" + myuuid + "-" + media) fba_tool_obj.run_flux_balance_analysis({ "max_c_uptake": 60, #"max_c_uptake": 6, // previously default is 6 later set to 60 "workspace": workspace_name, "fbamodel_id": fbamodel_id, "fba_output_id": fbamodel_id + ".mifba", "fbamodel_workspace": workspace_name, "media_id_list": new_media_list, "target_reaction": "bio1", "minimize_flux": 1 }) output = self.ws.get_objects2({ 'objects': [{ 'ref': workspace_name + "/" + fbamodel_id + '.mifba' }] }) #json.dump(output, open(self.scratch+'/fba.json', 'w')) fba = output['data'][0]['data'] biomass_data = "FBAs,Biomass\n" secretion_file = "," + ','.join(media_id_list) + "\n" full_secretion_file = "," + ','.join(media_id_list) + "\n" full_flux_file = "," + ','.join(media_id_list) + "\n" flux_file = "," + ','.join(media_id_list) + "\n" objectives = fba['other_objectives'] for i in range(0, len(objectives)): biomass_data = biomass_data + media_id_list[i] + "," + str( objectives[i]) + "\n" flux_vars = fba['FBAReactionVariables'] for var in flux_vars: id = var['modelreaction_ref'].split("/").pop() flux_file = flux_file + id full_flux_file = full_flux_file + id fluxes = var['other_values'] for i in range(0, len(objectives)): if objectives[i] == 0: full_flux_file = full_flux_file + ",0" flux_file = flux_file + ",0" else: full_flux_file = full_flux_file + "," + str(fluxes[i]) if abs(fluxes[i]) < 1e-7: flux_file = flux_file + ",0" else: flux_file = flux_file + ",1" flux_file = flux_file + "\n" full_flux_file = full_flux_file + "\n" secretion_vars = fba['FBACompoundVariables'] for var in secretion_vars: id = var['modelcompound_ref'].split("/").pop() secretion_file = secretion_file + id full_secretion_file = full_secretion_file + id fluxes = var['other_values'] for i in range(0, len(objectives)): if objectives[i] == 0: full_secretion_file = full_secretion_file + ",0" secretion_file = secretion_file + ",0" else: full_secretion_file = full_secretion_file + "," + str( fluxes[i]) if abs(fluxes[i]) < 1e-7: secretion_file = secretion_file + ",0" elif fluxes[i] < 0: secretion_file = secretion_file + ",-1" else: secretion_file = secretion_file + ",1" secretion_file = secretion_file + "\n" full_secretion_file = full_secretion_file + "\n" output_directory = os.path.join(self.scratch, str(uuid.uuid4())) self._mkdir_p(output_directory) biomass_path = os.path.join(output_directory, 'biomass.csv') secretion_path = os.path.join(output_directory, 'secretion.csv') flux_path = os.path.join(output_directory, 'flux.csv') full_secretion_path = os.path.join(output_directory, 'full_secretion.csv') full_flux_path = os.path.join(output_directory, 'full_flux.csv') with open(biomass_path, 'w') as biomass_f: biomass_f.write(biomass_data) with open(secretion_path, 'w') as secretion_f: secretion_f.write(secretion_file) with open(flux_path, 'w') as flux_f: flux_f.write(flux_file) with open(full_secretion_path, 'w') as full_secretion_f: full_secretion_f.write(full_secretion_file) with open(full_flux_path, 'w') as full_flux_f: full_flux_f.write(full_flux_file) return [ biomass_path, secretion_path, flux_path, full_secretion_path, full_flux_path ] def _make_index_html(self, result_file_path, mutual_info_dict): overview_content = '' overview_content += '<table><tr><th>Mutual Information for various chemical compound combinations' overview_content += ' Object</th></td>' overview_content += '<tr><th>Input Chemical Compound Combination</th>' overview_content += '<th>Mutual Information (in Bits)</th>' overview_content += '</tr>' for k, v in mutual_info_dict.items(): overview_content += '<tr><td>{}</td><td>{}</td></tr>'.format(k, v) overview_content += '</table>' with open(result_file_path, 'w') as result_file: with open( os.path.join(os.path.dirname(__file__), 'report_template.html'), 'r') as report_template_file: report_template = report_template_file.read() report_template = report_template.replace( '<p>Overview_Content</p>', overview_content) result_file.write(report_template) return def _generate_html_report(self, result_directory, mutual_info_dict): """ _generate_html_report: generate html summary report """ #scratch, uui, datafileutil, file_to_shock, shockId, extended report log('start generating html report') html_report = list() output_directory = os.path.join(self.scratch, str(uuid.uuid4())) self._mkdir_p(output_directory) result_file_path = os.path.join(output_directory, 'mutual_information_report.html') shutil.copy(os.path.join(result_directory, 'MI_plot.png'), os.path.join(output_directory, 'MI_plot.png')) overview_content = '' overview_content += '<table><tr><th>Mutual Information for various chemical compound combinations' overview_content += ' Object</th></td>' overview_content += '<tr><th>Input Chemical Compound Combination</th>' overview_content += '<th>Mutual Information (in Bits)</th>' overview_content += '</tr>' for k, v in mutual_info_dict.items(): overview_content += '<tr><td>{}</td><td>{}</td></tr>'.format(k, v) overview_content += '</table>' with open(result_file_path, 'w') as result_file: with open( os.path.join(os.path.dirname(__file__), 'report_template.html'), 'r') as report_template_file: report_template = report_template_file.read() report_template = report_template.replace( '<p>Overview_Content</p>', overview_content) result_file.write(report_template) report_shock_id = self.dfu.file_to_shock({ 'file_path': output_directory, 'pack': 'targz' })['shock_id'] #report_shock_id = self.dfu.file_to_shock({'file_path': output_directory, # 'pack': 'zip'})['shock_id'] html_report.append({ 'shock_id': report_shock_id, 'name': os.path.basename(result_file_path), 'label': os.path.basename(result_file_path), 'description': 'HTML summary report for Mutual Information App' }) return html_report def _generate_report(self, result_directory, mutual_info_dict, workspace_name): """ _generate_report: generate summary report """ print('-->I am here *************') uuidStr = str(uuid.uuid4()) output_directory = os.path.join(self.scratch, str(uuid.uuid4())) self._mkdir_p(output_directory) test_file = os.path.join(output_directory, "index.html") self._make_index_html(test_file, mutual_info_dict[1]) #shutil.copy2(os.path.join(os.path.dirname(__file__), 'data', 'index.html'), output_directory) # shutil.copy('/kb/module/data/index.html', result_directory + '/' + uuidStr + '/index.html') json.dump(mutual_info_dict[0], open(os.path.join(output_directory, 'pdata.json'), 'w')) #shutil.copy('pdata.json', result_directory + '/' + uuidStr + '/pdata.json') # DataFileUtils to shock print(output_directory) print(os.listdir(output_directory)) report_shock_result = self.dfu.file_to_shock({ 'file_path': output_directory, 'pack': 'targz' }) #report_shock_result = self.dfu.file_to_shock({'file_path': output_directory, # 'pack': 'zip'}) report_shock_id = report_shock_result['shock_id'] print(report_shock_result) report_file = { 'name': 'index.html', 'description': 'the report', 'shock_id': report_shock_id } log('creating report') #output_html_files = self._generate_html_report(result_directory, # mutual_info_dict) report_params = { 'message': '', 'workspace_name': workspace_name, 'html_links': [report_file], 'file_links': [], 'direct_html_link_index': 0, 'html_window_height': 333, 'report_object_name': 'MutualInfomation_report_' + uuidStr } kbase_report_client = KBaseReport(self.callback_url) output = kbase_report_client.create_extended_report(report_params) report_output = { 'report_name': output['name'], 'report_ref': output['ref'] } return report_output ######### @@@@@@@ALL THREE MUTUAL INFORMATION CALCULATION START FROM HERE@@@@@@@############# def _generate_mutual_info(self, media_matrix, fba_file, mi_options): #print('this is fba_file') #print(fba_file) df1 = pd.read_csv(fba_file[0]) df1.values #df1.as_matrix() #print('-->printing df1')# **** rm #print(df1.to_string())# **** rm #print(type(df1)) # **** rm #print('-->printing media_matrix') #print(media_matrix) df3 = pd.DataFrame(columns=media_matrix[0][1:]) for i in range(1, len(media_matrix)): if i == 1: df3.loc[media_matrix[i][0][0]] = media_matrix[i][0][1:] else: df3.loc[media_matrix[i][0]] = media_matrix[i][1:] #print('-->*************OK') #print(df3) #----Input validation of Media/FBAs with Binary Matrix FBAs------ # 1.0 Number of rows in Media.csv file = (Number of columns -1) # 1.0. If they are different: Through an ERROR saying missed match number of FBAs in media and binary matrix. # 1.1 Check whether the elements in Media.csv file contains only binary values (i.e. 0 and 1) # 1.1. If the elements are different: Through an ERROR saying not approapriate input values # 1.2 Check whether the compounds in Media.csv file match with number of FBAs # 1.2. If the compounds are different from number of FBAs: Through an ERROR saying not appropriate input values media_matrix = df3 s_df1 = df1.shape s_df2 = media_matrix.shape #print(media_matrix,type(media_matrix)) Temp_df2 = np.array(media_matrix.values) #print('-->******') #print(Temp_df2) # Create matrix with only the elements remove first column and all the rows Temp_df2 = Temp_df2[0:, 1:] Bin_val_check = np.array_equal(Temp_df2, Temp_df2.astype(bool)) #num_compounds = (s_df2[1])-1 num_compounds = s_df2[1] if ((s_df1[1] - 1) != s_df2[0]) or (Bin_val_check != True) or (int( math.log(s_df2[0], 2)) != num_compounds): print('invalid input values') #-----All possible combination of the chemical compounds---------------------- # 2.0 Sperating m0 from rest of the lables Temp1_df2 = media_matrix #print('-->*************OK') #print(Temp1_df2) cols = Temp1_df2.columns for i in range(0, len(cols)): Temp1_df2.loc[Temp1_df2[cols[i]] == 1, cols[i]] = cols[i] #print('-->*************OK') #print (Temp1_df2) # 2.1 Creating a disctionary for all FBAs except m0 #print(len(Temp1_df2)) #print('--->*********') #print(Temp1_df2) mydict = {} for x in range(0, len(Temp1_df2)): for i in range(0, s_df2[1]): currentvalue = Temp1_df2.iloc[x, i] currentid = Temp1_df2.index[x] mydict.setdefault(currentid, []) if currentvalue != 0: mydict[currentid].append(currentvalue) # Add the first key as m0 media_0_name = Temp1_df2.index[0] mydict[media_0_name] = ["0"] # Sort the keys mydict = collections.OrderedDict(natsort.natsorted(mydict.items())) #print ('--> ********') compoundslist = Temp1_df2.columns.get_values() compoundslist.tolist() #print(compoundslist) #print('all possible combination') #print(len(compoundslist)) # List of Compounds combination in the list my_combi_list = [] for L in range(0, len(compoundslist) + 1): for subset in itertools.combinations(compoundslist, L): my_combi_list.append(list(subset)) my_combi_list[0] = [0] # print(my_combi_list) ''' for k, v in mydict.iteritems(): #print('--> ********') print(k, v) ''' # Created a dictionary where the keys: # list of compounds combination # values are corresponding FBAs list in df2 result_dict = {} for element in my_combi_list[1:]: for k, v in mydict.iteritems(): if set(v).issubset(set(map(lambda x: str(x), element))): key = ','.join(map(lambda x: str(x), element)) if result_dict.get(key): media_list = result_dict[key] media_list.append(k) media_list = list(set(media_list)) result_dict.update({key: media_list}) else: result_dict.update({key: [media_0_name, k]}) # Sort the keys result_dict['0'] = [media_0_name] result_dict = collections.OrderedDict( natsort.natsorted(result_dict.items())) # print(result_dict) #print('-->I am here **** OK') #print(result_dict) #print (df1) # Created a dictionary where the keys are: # list of compounds combination # values are compounds combination FBAs with df1 vaules All_Comp_Combi_dic = {} for column, value in result_dict.items(): All_Comp_Combi_dic.update({column: df1.get(value)}) # print('-->All_Comp_Combi_dic******') # print (All_Comp_Combi_dic) # print(result_dict) # To print an item from the All_Comp_Combi_dic df = (pd.DataFrame(All_Comp_Combi_dic.items())) #print('--> printing df') #print(df[0].to_string()) #print(df[1][7]) ######### INTRACELLULAR FLUX MUTUAL INFORMATION CALCULATION ############# if mi_options == "flux": print('Intracellular flux') MI_dict = {} for k in range(0, len(df[0])): drop_rows_df = df[1][k].drop_duplicates(keep="first") drop_columns_df = drop_rows_df.T.drop_duplicates( keep="first").T remove = [] removed = {} count_values = {} cols = df[1][k].columns for i in range(len(cols) - 1): duplicated = [] v = df[1][k][cols[i]].values for j in range(i + 1, len(cols)): if np.array_equal(v, df[1][k][cols[j]].values): remove.append(cols[j]) duplicated.append(cols[j]) if duplicated and cols[i] not in remove: removed.update({cols[i]: duplicated}) count = {} for key, value in removed.items(): count.update({key: len(value)}) #print v # print drop_columns_df count_values = count.values() # print count_values count_values = map(lambda x: x + 1, count_values) # print count_values d = {x: count_values.count(x) for x in count_values} #print('-->count_values') #print(count_values) #-------Mutual Inforamtion (MI) calculation------------- FBAs = len(df[1][k].columns) pure_entropy = math.log(FBAs, 2) #print (pure_entropy) (-->ok rm) # If No duplicates exist and list "value" is empty if not count_values: #print("List is empty") No_duplicate_FBAs = len(drop_columns_df.columns) conditional_entropy = -1 * (No_duplicate_FBAs * ( (1 / No_duplicate_FBAs) * ((1 / 1) * math.log(1.0 / 1.0, 2)))) Mutual_Info = pure_entropy - conditional_entropy #print('Mutaul Info:', Mutual_Info) if count_values: # If duplicates exist and list "value" is not empty conditional_entropy = 0 for key in d: #print key, d[key] Temp = -1 * d[key] * (key / float(FBAs)) * key * ( 1.0 / key) * math.log(1.0 / key, 2) conditional_entropy = Temp + conditional_entropy #print "%3f" %Temp Mutual_Info = pure_entropy - conditional_entropy MI_dict[df[0][k]] = Mutual_Info MI_dict['0'] = 0.0 #Sorted MI_dict MI_dict = sorted(MI_dict.items(), key=lambda x: (-len(x[0]), x[0])) MI_dict = OrderedDict(MI_dict) #print(MI_dict) #print('-->rest') #print(compoundslist) #print(num_compounds) x_groups = [[] for x in range(num_compounds)] y_groups = [[] for x in range(num_compounds)] names = [[] for x in range(num_compounds)] Comp_Mapping = [[] for x in range(num_compounds)] for key, val in MI_dict.iteritems(): del_count = key.count(',') x_groups[del_count].append(key) y_groups[del_count].append(val) # for x, y in zip(x_groups, y_groups): # data.append(go.Bar(x=x, y=y, name='test')) pdata = [] for i in range(0, len(x_groups)): names[i] = str(i + 1) + ' Compound Combination' Comp_Mapping = str(i + 1) + '-' + compoundslist[i] record = {} record["x"] = [] for e in x_groups[i]: record["x"].append("c" + e) record["y"] = y_groups[i] record["names"] = names[i] record["Comp_Mapping"] = Comp_Mapping pdata.append(record) #print (pdata) #json.dump(pdata, open(self.scratch+'/pdata.json', 'w')) return [pdata, MI_dict] #return MI_dict ######### INPUT COMPONENTS AND BIOMASS FLUX MUTUAL INFORMATION CALCULATION ############# if mi_options == "biomass": # Load the file contain the information of FBAs(media) along with corresponding Biomass (growth) print('biomass flux') df2 = pd.read_csv(fba_file[1]) df2.values #print(df) MI_dict_biomass = {} for r in range(0, len(df[0])): reaction_states = df[1][r].head(1000) def get_groups(flux_df): groups = collections.defaultdict(list) unique = flux_df.aggregate(lambda x: hash(str(x.values))) for k, v in unique[0:].iteritems(): groups[v].append(k) return dict([(i, g) for i, g in enumerate(groups.values())]) n_group = collections.defaultdict(int) groups = get_groups(reaction_states) for group in groups.values(): n_group[len(group)] += 1 groups_count = {} for key, values in groups.items(): groups_count[key] = len(values) # print groups_count # Take first FBA label of every group group_id = {} for k, v in groups.items(): group_id.update({k: groups.values()[k][0]}) # Obtain the Biomass of each Group cols_df = group_id.values() cols_df2 = df2.columns #print (cols_df) # Dictionary of first FBA label of every group and its corresponding number of members groups_label_count = {} for k, v in groups_count.items(): groups_label_count.update({cols_df[k]: v}) #print('groups_label_count') #print(groups_label_count) def get_cond_count(re_group): media_cond = 0 for media in re_group['FBAs']: media_cond += groups_label_count[media] return media_cond # Extract FBA Groups biomass inside df2 Groups_Biomass = df2[df2['FBAs'].isin(cols_df)] #print('-->I am here') #print(Groups_Biomass) # Regroup based on the biomass values re_group = Groups_Biomass.groupby('Biomass') biomass_FBAs_groups = re_group.aggregate(get_cond_count) biomass_FBAs_label_groups = Groups_Biomass.groupby( "Biomass", sort=True).sum() #print(biomass_FBAs_label_groups) #print (biomass_FBAs_label_groups) Summery = pd.merge(left=biomass_FBAs_label_groups, left_index=True, right=biomass_FBAs_groups, right_index=True, how='inner') Data_4_CondMI = Summery.groupby('FBAs_y').count() Data_4_CondMI = Data_4_CondMI.to_dict(orient='dict') for k, v in Data_4_CondMI.items(): Data_4_CondMI = v Num_of_FBAs = Data_4_CondMI.keys() Count_Num_of_FBAs = Data_4_CondMI.values() # -------Mutual Inforamtion (MI) calculation Stage II (input compounds respect to BIOMASS------------- # Pure Entropy FBAs = len(df[1][r].columns) pure_entropy = math.log(FBAs, 2) conditional_entropy = 0.0 for l in range(0, len(Count_Num_of_FBAs)): temp = -1 * Count_Num_of_FBAs[l] * ( Num_of_FBAs[l] / float(FBAs)) * Num_of_FBAs[l] * ( 1.0 / float(Num_of_FBAs[l]) * (math.log(1.0 / float(Num_of_FBAs[l]), 2))) conditional_entropy += temp Mutual_Info_Biomass = pure_entropy - conditional_entropy # print('Mutaul Info:', Mutual_Info_Biomass) #print(Mutual_Info_Biomass) MI_dict_biomass.update({df[0][r]: Mutual_Info_Biomass}) #print(MI_dict_biomass) # Sorted MI_dict_biomass MI_dict_biomass = sorted(MI_dict_biomass.items(), key=lambda x: (-len(x[0]), x[0])) MI_dict_biomass = OrderedDict(MI_dict_biomass) #print(MI_dict_biomass) x_groups = [[] for x in range(num_compounds)] y_groups = [[] for x in range(num_compounds)] names = [[] for x in range(num_compounds)] Comp_Mapping = [[] for x in range(num_compounds)] for key, val in MI_dict_biomass.iteritems(): del_count = key.count(',') x_groups[del_count].append(key) y_groups[del_count].append(val) pdata = [] for i in range(0, len(x_groups)): names[i] = str(i + 1) + ' Compound Combination' Comp_Mapping = str(i + 1) + '-' + compoundslist[i] record = {} record["x"] = [] for e in x_groups[i]: record["x"].append("c" + e) record["y"] = y_groups[i] record["names"] = names[i] record["Comp_Mapping"] = Comp_Mapping pdata.append(record) return [pdata, MI_dict_biomass] ######### INPUT COMPONENTS AND BIOMASS, SECRETION FLUX MUTUAL INFORMATION CALCULATION ############# if mi_options == "secretion": #Load the file contain the information of FBAs(media) along with corresponding Biomass (growth) print('secretion flux') df4 = pd.read_csv(fba_file[2], header=0, index_col=0) df4.index.name = 'FBAs' df4 = df4.T dfbiomass = pd.read_csv(fba_file[1]) aa = dfbiomass['Biomass'].values.tolist() # print(len(aa)) df4['Biomass'] = aa # print(df4.shape) compoundslist_b_u_s = list(df4.columns.values) #print(compoundslist_b_u_s) MI_dict_b_u_s = {} for r in range(0, len(df[0])): reaction_states = df[1][r].head(1000) def get_groups(flux_df): groups = collections.defaultdict(list) unique = flux_df.aggregate(lambda x: hash(str(x.values))) for k, v in unique[0:].iteritems(): groups[v].append(k) return dict([(i, g) for i, g in enumerate(groups.values())]) n_group = collections.defaultdict(int) groups = get_groups(reaction_states) for group in groups.values(): n_group[len(group)] += 1 #print(n_group) #print(groups) groups_count = {} for key, values in groups.items(): groups_count[key] = len(values) # print(groups_count) # Take first FBA label of every group group_id = {} for k, v in groups.items(): group_id.update({k: groups.values()[k][0]}) # Obtain the Biomass of each Group cols_df = group_id.values() cols_df4 = df4.columns # Dictionary of first FBA label of every group and its corresponding number of members groups_label_count = {} for k, v in groups_count.items(): groups_label_count.update({cols_df[k]: v}) #print(groups_label_count) # Extract FBA Groups biomass inside df4 df5 = df4.reset_index() Groups_Biomass = df5[df5['index'].isin(cols_df)] #print(Groups_Biomass) # Regroup based on the biomass values re_group = Groups_Biomass.groupby(compoundslist_b_u_s) #print(re_group) my_list = [] for index, values in re_group: my_list.append(values['index'].values) #print(my_list) B_U_S_dict = {} for media in my_list: if len(media) > 1: media_cond = 0 for i in (0, len(media) - 1): media_cond += groups_label_count[media[i]] B_U_S_dict.update({str(media)[1:-1]: media_cond}) #final_my_dict.update({tuple(media.tolist()):media_cond}) else: B_U_S_dict.update({ str(media)[1:-1]: groups_label_count[str(tuple( media.tolist()))[1:-1][:-1][1:-1]] }) B_U_S_dict = {k: v for k, v in B_U_S_dict.iteritems()} #print(B_U_S_dict) Summery = pd.DataFrame(B_U_S_dict.items(), columns=['index_x', 'index_y']) Data_4_CondMI = Summery.groupby('index_y').count() Data_4_CondMI = Data_4_CondMI.to_dict(orient='dict') #print(Data_4_CondMI) for k, v in Data_4_CondMI.items(): Data_4_CondMI = v Num_of_FBAs = Data_4_CondMI.keys() Count_Num_of_FBAs = Data_4_CondMI.values() #print(Num_of_FBAs) #print(Count_Num_of_FBAs) #print('-->***<---') # -------Mutual Inforamtion (MI) calculation Stage II (input compounds respect to Biomass, Uptake and Secretion------------- # Pure Entropy FBAs = len(df[1][r].columns) pure_entropy = math.log(FBAs, 2) conditional_entropy = 0.0 for l in range(0, len(Count_Num_of_FBAs)): temp = -1 * Count_Num_of_FBAs[l] * ( Num_of_FBAs[l] / float(FBAs)) * Num_of_FBAs[l] * ( 1.0 / float(Num_of_FBAs[l]) * (math.log(1.0 / float(Num_of_FBAs[l]), 2))) conditional_entropy += temp Mutual_Info_B_U_S = pure_entropy - conditional_entropy # print('Mutaul Info:', Mutual_Info_B_U_S) MI_dict_b_u_s.update({df[0][r]: Mutual_Info_B_U_S}) # Sorted MI_dict_biomass MI_dict_b_u_s = sorted(MI_dict_b_u_s.items(), key=lambda x: (-len(x[0]), x[0])) MI_dict_b_u_s = OrderedDict(MI_dict_b_u_s) #print(MI_dict_b_u_s) x_groups = [[] for x in range(num_compounds)] y_groups = [[] for x in range(num_compounds)] names = [[] for x in range(num_compounds)] Comp_Mapping = [[] for x in range(num_compounds)] for key, val in MI_dict_b_u_s.iteritems(): del_count = key.count(',') x_groups[del_count].append(key) y_groups[del_count].append(val) # for x, y in zip(x_groups, y_groups): # data.append(go.Bar(x=x, y=y, name='test')) pdata = [] for i in range(0, len(x_groups)): names[i] = str(i + 1) + ' Compound Combination' Comp_Mapping = str(i + 1) + '-' + compoundslist[i] record = {} record["x"] = [] for e in x_groups[i]: record["x"].append("c" + e) record["y"] = y_groups[i] record["names"] = names[i] record["Comp_Mapping"] = Comp_Mapping pdata.append(record) return [pdata, MI_dict_b_u_s]
class TaxonAPI: ''' Module Name: TaxonAPI Module Description: A KBase module: TaxonAPI ''' ######## WARNING FOR GEVENT USERS ####### noqa # Since asynchronous IO can lead to methods - even the same method - # interrupting each other, you must be *very* careful when using global # state. A method could easily clobber the state set by another while # the latter method is running. ######################################### noqa VERSION = "1.0.2" GIT_URL = "" GIT_COMMIT_HASH = "5b8cdf63a676a609ea4f180891cf75006640f148" #BEGIN_CLASS_HEADER _GENOME_TYPES = ['KBaseGenomes.Genome', 'KBaseGenomeAnnotations.GenomeAnnotation'] _TAXON_TYPES = ['KBaseGenomeAnnotations.Taxon'] @functools32.lru_cache(maxsize=1000) def get_object(self, ref): res = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0] return res def get_data(self, ref): obj = self.get_object(ref) return obj['data'] @functools32.lru_cache(maxsize=1000) def translate_to_MD5_types(self, ktype): return self.ws.translate_to_MD5_types([ktype]).values()[0] def get_referrers(self, ref): referrers = self.ws.list_referencing_objects( [{"ref": ref}])[0] object_refs_by_type = dict() tlist = [] for x in referrers: tlist.append(x[2]) typemap = self.ws.translate_to_MD5_types(tlist) for x in referrers: typestring = typemap[x[2]] if typestring not in object_refs_by_type: object_refs_by_type[typestring] = list() upa = '%d/%d/%d' % (x[6], x[0], x[4]) object_refs_by_type[typestring].append(upa) return object_refs_by_type def get_reffers_type(self, ref, types): referrers = self.get_referrers(ref) children = list() for object_type in referrers: if object_type.split('-')[0] in types: children.extend(referrers[object_type]) return children def make_hash(self, i): omd = i[10] if i[10] == {}: omd = None return { 'type_string': i[2], 'workspace_id': i[6], 'object_checksum': i[8], 'object_reference': '%d/%d' % (i[6], i[0]), 'object_size': i[9], 'saved_by': i[5], 'object_id': i[0], 'save_date': i[3], 'object_metadata': omd, 'object_name': i[1], 'version': i[4], 'workspace_name': i[7], 'object_reference_versioned': '%d/%d/%d' % (i[6], i[0], i[4]) } #END_CLASS_HEADER # config contains contents of config file in a hash or None if it couldn't # be found def __init__(self, config): #BEGIN_CONSTRUCTOR self.workspaceURL = config['workspace-url'] self.ws = Workspace(self.workspaceURL) self.shockURL = config['shock-url'] self.logger = logging.getLogger() log_handler = logging.StreamHandler() log_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")) self.logger.addHandler(log_handler) #END_CONSTRUCTOR pass def get_parent(self, ctx, ref): """ Retrieve parent Taxon. @return Reference to parent Taxon. :param ref: instance of type "ObjectReference" :returns: instance of type "ObjectReference" """ # ctx is the context object # return variables are: returnVal #BEGIN get_parent data = self.get_data(ref) try: returnVal = data['parent_taxon_ref'] # returnVal=taxon_api.get_parent(ref_only=True) except: returnVal = '' #END get_parent # At some point might do deeper type checking... if not isinstance(returnVal, basestring): raise ValueError('Method get_parent return value ' + 'returnVal is not type basestring as required.') # return the results return [returnVal] def get_children(self, ctx, ref): """ Retrieve children Taxon. @return List of references to child Taxons. :param ref: instance of type "ObjectReference" :returns: instance of list of type "ObjectReference" """ # ctx is the context object # return variables are: returnVal #BEGIN get_children returnVal = self.get_reffers_type(ref, self._TAXON_TYPES) #END get_children # At some point might do deeper type checking... if not isinstance(returnVal, list): raise ValueError('Method get_children return value ' + 'returnVal is not type list as required.') # return the results return [returnVal] def get_genome_annotations(self, ctx, ref): """ funcdef GenomeAnnotation(s) that refer to this Taxon. If this is accessing a KBaseGenomes.Genome object, it will return an empty list (this information is not available). @return List of references to GenomeAnnotation objects. :param ref: instance of type "ObjectReference" :returns: instance of list of type "ObjectReference" """ # ctx is the context object # return variables are: returnVal #BEGIN get_genome_annotations returnVal = self.get_reffers_type(ref, self._GENOME_TYPES) #END get_genome_annotations # At some point might do deeper type checking... if not isinstance(returnVal, list): raise ValueError('Method get_genome_annotations return value ' + 'returnVal is not type list as required.') # return the results return [returnVal] def get_scientific_lineage(self, ctx, ref): """ Retrieve the scientific lineage. @return Strings for each 'unit' of the lineage, ordered in the usual way from Domain to Kingdom to Phylum, etc. :param ref: instance of type "ObjectReference" :returns: instance of list of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_scientific_lineage o = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data'] returnVal = [x.strip() for x in o['scientific_lineage'].split(";")] #END get_scientific_lineage # At some point might do deeper type checking... if not isinstance(returnVal, list): raise ValueError('Method get_scientific_lineage return value ' + 'returnVal is not type list as required.') # return the results return [returnVal] def get_scientific_name(self, ctx, ref): """ Retrieve the scientific name. @return The scientific name, e.g., "Escherichia Coli K12 str. MG1655" :param ref: instance of type "ObjectReference" :returns: instance of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_scientific_name obj = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data'] returnVal = obj['scientific_name'] #END get_scientific_name # At some point might do deeper type checking... if not isinstance(returnVal, basestring): raise ValueError('Method get_scientific_name return value ' + 'returnVal is not type basestring as required.') # return the results return [returnVal] def get_taxonomic_id(self, ctx, ref): """ Retrieve the NCBI taxonomic ID of this Taxon. For type KBaseGenomes.Genome, the ``source_id`` will be returned. @return Integer taxonomic ID. :param ref: instance of type "ObjectReference" :returns: instance of Long """ # ctx is the context object # return variables are: returnVal #BEGIN get_taxonomic_id obj = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data'] returnVal = obj['taxonomy_id'] #END get_taxonomic_id # At some point might do deeper type checking... if not isinstance(returnVal, int): raise ValueError('Method get_taxonomic_id return value ' + 'returnVal is not type int as required.') # return the results return [returnVal] def get_kingdom(self, ctx, ref): """ Retrieve the kingdom. :param ref: instance of type "ObjectReference" :returns: instance of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_kingdom obj = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data'] returnVal = obj['kingdom'] #END get_kingdom # At some point might do deeper type checking... if not isinstance(returnVal, basestring): raise ValueError('Method get_kingdom return value ' + 'returnVal is not type basestring as required.') # return the results return [returnVal] def get_domain(self, ctx, ref): """ Retrieve the domain. :param ref: instance of type "ObjectReference" :returns: instance of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_domain obj = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data'] returnVal = obj['domain'] #END get_domain # At some point might do deeper type checking... if not isinstance(returnVal, basestring): raise ValueError('Method get_domain return value ' + 'returnVal is not type basestring as required.') # return the results return [returnVal] def get_genetic_code(self, ctx, ref): """ Retrieve the genetic code. :param ref: instance of type "ObjectReference" :returns: instance of Long """ # ctx is the context object # return variables are: returnVal #BEGIN get_genetic_code obj = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data'] returnVal = obj['genetic_code'] #END get_genetic_code # At some point might do deeper type checking... if not isinstance(returnVal, int): raise ValueError('Method get_genetic_code return value ' + 'returnVal is not type int as required.') # return the results return [returnVal] def get_aliases(self, ctx, ref): """ Retrieve the aliases. :param ref: instance of type "ObjectReference" :returns: instance of list of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_aliases obj = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['data'] if 'aliases' in obj: returnVal = obj['aliases'] else: returnVal = list() #END get_aliases # At some point might do deeper type checking... if not isinstance(returnVal, list): raise ValueError('Method get_aliases return value ' + 'returnVal is not type list as required.') # return the results return [returnVal] def get_info(self, ctx, ref): """ Retrieve object info. @skip documentation :param ref: instance of type "ObjectReference" :returns: instance of type "ObjectInfo" (* @skip documentation) -> structure: parameter "object_id" of Long, parameter "object_name" of String, parameter "object_reference" of String, parameter "object_reference_versioned" of String, parameter "type_string" of String, parameter "save_date" of String, parameter "version" of Long, parameter "saved_by" of String, parameter "workspace_id" of Long, parameter "workspace_name" of String, parameter "object_checksum" of String, parameter "object_size" of Long, parameter "object_metadata" of mapping from String to String """ # ctx is the context object # return variables are: returnVal #BEGIN get_info # returnVal = self.ws.get_objects2({'objects': [{'ref': ref}]})['data'][0]['info'] i = self.get_object(ref)['info'] #md5_typestr = self.ws.translate_to_MD5_types([i[2]]).values()[0] returnVal = self.make_hash(i) #END get_info # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method get_info return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def get_history(self, ctx, ref): """ Retrieve object history. @skip documentation :param ref: instance of type "ObjectReference" :returns: instance of type "ObjectHistory" (* @skip documentation) -> list of type "ObjectInfo" (* @skip documentation) -> structure: parameter "object_id" of Long, parameter "object_name" of String, parameter "object_reference" of String, parameter "object_reference_versioned" of String, parameter "type_string" of String, parameter "save_date" of String, parameter "version" of Long, parameter "saved_by" of String, parameter "workspace_id" of Long, parameter "workspace_name" of String, parameter "object_checksum" of String, parameter "object_size" of Long, parameter "object_metadata" of mapping from String to String """ # ctx is the context object # return variables are: returnVal #BEGIN get_history # returnVal = self.ws.get_object_history({'ref': ref}) returnVal = [] for i in self.ws.get_object_history({'ref': ref}): returnVal.append(self.make_hash(i)) #END get_history # At some point might do deeper type checking... if not isinstance(returnVal, list): raise ValueError('Method get_history return value ' + 'returnVal is not type list as required.') # return the results return [returnVal] def get_provenance(self, ctx, ref): """ Retrieve object provenance. @skip documentation :param ref: instance of type "ObjectReference" :returns: instance of type "ObjectProvenance" (* @skip documentation) -> list of type "ObjectProvenanceAction" (* @skip documentation) -> structure: parameter "time" of String, parameter "service_name" of String, parameter "service_version" of String, parameter "service_method" of String, parameter "method_parameters" of list of String, parameter "script_name" of String, parameter "script_version" of String, parameter "script_command_line" of String, parameter "input_object_references" of list of String, parameter "validated_object_references" of list of String, parameter "intermediate_input_ids" of list of String, parameter "intermediate_output_ids" of list of String, parameter "external_data" of list of type "ExternalDataUnit" (* @skip documentation) -> structure: parameter "resource_name" of String, parameter "resource_url" of String, parameter "resource_version" of String, parameter "resource_release_date" of String, parameter "data_url" of String, parameter "data_id" of String, parameter "description" of String, parameter "description" of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_provenance prov = self.ws.get_object_provenance([{"ref": ref}])[0]['provenance'] returnVal = [] copy_keys = {"time": "time", "service": "service_name", "service_ver": "service_version", "method": "service_method", "method_params": "method_parameters", "script": "script_name", "script_ver": "script_version", "script_command_line": "script_command_line", "input_ws_objects": "input_object_references", "resolved_ws_objects": "validated_object_references", "intermediate_incoming": "intermediate_input_ids", "intermediate_outgoing": "intermediate_output_ids", "external_data": "external_data", "description": "description" } for object_provenance in prov: action = dict() for k in copy_keys: if k in object_provenance: if isinstance(object_provenance[k], list) and len(object_provenance[k]) == 0: continue action[copy_keys[k]] = object_provenance[k] returnVal.append(action) #END get_provenance # At some point might do deeper type checking... if not isinstance(returnVal, list): raise ValueError('Method get_provenance return value ' + 'returnVal is not type list as required.') # return the results return [returnVal] def get_id(self, ctx, ref): """ Retrieve object identifier. @skip documentation :param ref: instance of type "ObjectReference" :returns: instance of Long """ # ctx is the context object # return variables are: returnVal #BEGIN get_id returnVal = self.get_object(ref)['info'][0] #END get_id # At some point might do deeper type checking... if not isinstance(returnVal, int): raise ValueError('Method get_id return value ' + 'returnVal is not type int as required.') # return the results return [returnVal] def get_name(self, ctx, ref): """ Retrieve object name. @skip documentation :param ref: instance of type "ObjectReference" :returns: instance of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_name returnVal = self.get_object(ref)['info'][1] #END get_name # At some point might do deeper type checking... if not isinstance(returnVal, basestring): raise ValueError('Method get_name return value ' + 'returnVal is not type basestring as required.') # return the results return [returnVal] def get_version(self, ctx, ref): """ Retrieve object version. @skip documentation :param ref: instance of type "ObjectReference" :returns: instance of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_version returnVal = str(self.get_object(ref)['info'][4]) #END get_version # At some point might do deeper type checking... if not isinstance(returnVal, basestring): raise ValueError('Method get_version return value ' + 'returnVal is not type basestring as required.') # return the results return [returnVal] def get_all_data(self, ctx, params): """ :param params: instance of type "GetAllDataParams" -> structure: parameter "ref" of type "ObjectReference", parameter "include_decorated_scientific_lineage" of type "boolean" (A boolean. 0 = false, other = true.), parameter "include_decorated_children" of type "boolean" (A boolean. 0 = false, other = true.), parameter "exclude_children" of type "boolean" (A boolean. 0 = false, other = true.) :returns: instance of type "TaxonData" -> structure: parameter "parent" of type "ObjectReference", parameter "children" of list of type "ObjectReference", parameter "decorated_children" of list of type "TaxonInfo" -> structure: parameter "ref" of type "ObjectReference", parameter "scientific_name" of String, parameter "scientific_lineage" of list of String, parameter "decorated_scientific_lineage" of list of type "TaxonInfo" -> structure: parameter "ref" of type "ObjectReference", parameter "scientific_name" of String, parameter "scientific_name" of String, parameter "taxonomic_id" of Long, parameter "kingdom" of String, parameter "domain" of String, parameter "genetic_code" of Long, parameter "aliases" of list of String, parameter "obj_info" of type "ObjectInfo" (* @skip documentation) -> structure: parameter "object_id" of Long, parameter "object_name" of String, parameter "object_reference" of String, parameter "object_reference_versioned" of String, parameter "type_string" of String, parameter "save_date" of String, parameter "version" of Long, parameter "saved_by" of String, parameter "workspace_id" of Long, parameter "workspace_name" of String, parameter "object_checksum" of String, parameter "object_size" of Long, parameter "object_metadata" of mapping from String to String """ # ctx is the context object # return variables are: d #BEGIN get_all_data d = {} ref = params['ref'] obj = self.get_object(ref) data = obj['data'] try: d['parent'] = data['parent_taxon_ref'] except KeyError: print('Error getting parent for ' + ref) # +':\n'+ str(traceback.format_exc())) d['parent'] = None if 'exclude_children' in params and params['exclude_children'] == 1: pass else: d['children'] = self.get_reffers_type(ref, self._TAXON_TYPES) d['scientific_lineage'] = data['scientific_lineage'] d['scientific_name'] = data['scientific_name'] d['taxonomic_id'] = data['taxonomy_id'] try: d['kingdom'] = data['kingdom'] # throws error if not found, so catch and log it except KeyError: print('Error getting kingdom for ' + ref) # +':\n'+ str(traceback.format_exc())) d['kingdom'] = None d['domain'] = data['domain'] d['genetic_code'] = data['genetic_code'] d['aliases'] = None if 'aliases' in data: d['aliases'] = data['aliases'] d['info'] = self.make_hash(obj['info']) key = 'include_decorated_scientific_lineage' if key in params and params[key] == 1: l = self.get_decorated_scientific_lineage(ctx, {'ref': ref})[0] d['decorated_scientific_lineage'] = l['decorated_scientific_lineage'] key = 'include_decorated_children' if key in params and params[key] == 1: l = self.get_decorated_children(ctx, {'ref': ref})[0] d['decorated_children'] = l['decorated_children'] #END get_all_data # At some point might do deeper type checking... if not isinstance(d, dict): raise ValueError('Method get_all_data return value ' + 'd is not type dict as required.') # return the results return [d] def get_decorated_scientific_lineage(self, ctx, params): """ :param params: instance of type "GetDecoratedScientificLineageParams" -> structure: parameter "ref" of type "ObjectReference" :returns: instance of type "DecoratedScientificLineage" (list starts at the root, and goes on down to this) -> structure: parameter "decorated_scientific_lineage" of list of type "TaxonInfo" -> structure: parameter "ref" of type "ObjectReference", parameter "scientific_name" of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_decorated_scientific_lineage lineageList = [] ref = params['ref'] while True: parent_data = None try: # note: doesn't look like there is a way to get a reference # of a Taxon directly (without constructing it from # object_info), so first get reference, then instantiate # another API object parent_ref = self.get_data(ref)['parent_taxon_ref'] if parent_ref is not None: data = self.get_data(ref) scientific_name = data['scientific_name'] if scientific_name != 'root': parent_data = { 'ref': parent_ref, 'scientific_name': scientific_name } ref = parent_ref except KeyError: # case where parent is not found pass if parent_data is not None: lineageList.append(parent_data) else: break lineageList.reverse() # reverse list to match scientific_lineage style returnVal = {'decorated_scientific_lineage': lineageList[:-1]} #END get_decorated_scientific_lineage # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method get_decorated_scientific_lineage return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def get_decorated_children(self, ctx, params): """ :param params: instance of type "GetDecoratedChildrenParams" -> structure: parameter "ref" of type "ObjectReference" :returns: instance of type "DecoratedChildren" -> structure: parameter "decorated_children" of list of type "TaxonInfo" -> structure: parameter "ref" of type "ObjectReference", parameter "scientific_name" of String """ # ctx is the context object # return variables are: returnVal #BEGIN get_decorated_children ref = params['ref'] children_refs = self.get_reffers_type(ref, self._TAXON_TYPES) decorated_children = [] for child_ref in children_refs: decorated_children.append({ 'ref': child_ref, 'scientific_name': self.get_data(child_ref)['scientific_name'] }) returnVal = {'decorated_children': decorated_children} #END get_decorated_children # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method get_decorated_children return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def status(self, ctx): #BEGIN_STATUS returnVal = {'state': "OK", 'message': "", 'version': self.VERSION, 'git_url': self.GIT_URL, 'git_commit_hash': self.GIT_COMMIT_HASH} #END_STATUS return [returnVal]
class FeatureSetBuilder: def _mkdir_p(self, path): """ _mkdir_p: make directory for given path """ if not path: return try: os.makedirs(path) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise def _validate_upload_featureset_from_diff_expr_params(self, params): """ _validate_upload_featureset_from_diff_expr_params: validates params passed to upload_featureset_from_diff_expr method """ log('start validating upload_featureset_from_diff_expr params') # check for required parameters for p in ['diff_expression_ref', 'workspace_name', 'p_cutoff', 'q_cutoff', 'fold_change_cutoff']: if p not in params: raise ValueError('"{}" parameter is required, but missing'.format(p)) p = params.get('fold_scale_type') if p and p != 'logarithm': raise ValueError('"fold_scale_type" parameter must be set to "logarithm", if used') def _generate_report(self, up_feature_set_ref_list, down_feature_set_ref_list, filtered_expression_matrix_ref_list, workspace_name): """ _generate_report: generate summary report """ log('start creating report') output_html_files = self._generate_html_report(up_feature_set_ref_list, down_feature_set_ref_list) objects_created = list() for up_feature_set_ref in up_feature_set_ref_list: objects_created += [{'ref': up_feature_set_ref, 'description': 'Upper FeatureSet Object'}] for down_feature_set_ref in down_feature_set_ref_list: objects_created += [{'ref': down_feature_set_ref, 'description': 'Lower FeatureSet Object'}] for filtered_expression_matrix_ref in filtered_expression_matrix_ref_list: objects_created += [{'ref': filtered_expression_matrix_ref, 'description': 'Filtered ExpressionMatrix Object'}] report_params = {'message': '', 'workspace_name': workspace_name, 'objects_created': objects_created, 'html_links': output_html_files, 'direct_html_link_index': 0, 'html_window_height': 333, 'report_object_name': 'kb_FeatureSetUtils_report_' + str(uuid.uuid4())} kbase_report_client = KBaseReport(self.callback_url) output = kbase_report_client.create_extended_report(report_params) report_output = {'report_name': output['name'], 'report_ref': output['ref']} return report_output def _generate_html_report(self, up_feature_set_ref_list, down_feature_set_ref_list): """ _generate_html_report: generate html summary report """ log('start generating html report') html_report = list() output_directory = os.path.join(self.scratch, str(uuid.uuid4())) self._mkdir_p(output_directory) result_file_path = os.path.join(output_directory, 'report.html') uppper_feature_content = '' for up_feature_set_ref in up_feature_set_ref_list: feature_set_obj = self.ws.get_objects2({'objects': [{'ref': up_feature_set_ref}]})['data'][0] feature_set_data = feature_set_obj['data'] feature_set_info = feature_set_obj['info'] feature_set_name = feature_set_info[1] elements = feature_set_data.get('elements') feature_ids = elements.keys() uppper_feature_content += '<tr><td>{}</td><td>{}</td></tr>'.format(feature_set_name, len(feature_ids)) lower_feature_content = '' for down_feature_set_ref in down_feature_set_ref_list: feature_set_obj = self.ws.get_objects2({'objects': [{'ref': down_feature_set_ref}]})['data'][0] feature_set_data = feature_set_obj['data'] feature_set_info = feature_set_obj['info'] feature_set_name = feature_set_info[1] elements = feature_set_data.get('elements') feature_ids = elements.keys() lower_feature_content += '<tr><td>{}</td><td>{}</td></tr>'.format(feature_set_name, len(feature_ids)) with open(result_file_path, 'w') as result_file: with open(os.path.join(os.path.dirname(__file__), 'report_template.html'), 'r') as report_template_file: report_template = report_template_file.read() report_template = report_template.replace('<tr><td>Upper_FeatureSet</td></tr>', uppper_feature_content) report_template = report_template.replace('<tr><td>Lower_FeatureSet</td></tr>', lower_feature_content) result_file.write(report_template) html_report.append({'path': result_file_path, 'name': os.path.basename(result_file_path), 'label': os.path.basename(result_file_path), 'description': 'HTML summary report'}) return html_report def _process_diff_expression(self, diff_expression_set_ref, result_directory, condition_label_pair): """ _process_diff_expression: process differential expression object info """ log('start processing differential expression object') diff_expr_set_data = self.ws.get_objects2({'objects': [{'ref': diff_expression_set_ref}]})['data'][0]['data'] set_items = diff_expr_set_data['items'] diff_expr_matrix_file_name = 'gene_results.csv' diff_expr_matrix_file = os.path.join(result_directory, diff_expr_matrix_file_name) with open(diff_expr_matrix_file, 'w') as csvfile: fieldnames = ['gene_id', 'log2_fold_change', 'p_value', 'q_value'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for set_item in set_items: diff_expression_ref = set_item['ref'] diff_expression_data = self.ws.get_objects2({'objects': [{'ref': diff_expression_ref}]})['data'][0]['data'] label_string = set_item['label'] label_list = map(lambda x: x.strip(), label_string.split(',')) condition_1 = label_list[0] condition_2 = label_list[1] if condition_1 in condition_label_pair and condition_2 in condition_label_pair: genome_id = diff_expression_data['genome_ref'] matrix_data = diff_expression_data['data'] selected_diff_expression_ref = diff_expression_ref with open(diff_expr_matrix_file, 'ab') as csvfile: row_ids = matrix_data.get('row_ids') row_values = matrix_data.get('values') writer = csv.DictWriter(csvfile, fieldnames=fieldnames) for pos, row_id in enumerate(row_ids): row_value = row_values[pos] writer.writerow({'gene_id': row_id, 'log2_fold_change': row_value[0], 'p_value': row_value[1], 'q_value': row_value[2]}) return diff_expr_matrix_file, genome_id, selected_diff_expression_ref def _generate_feature_set(self, feature_ids, genome_id, workspace_name, feature_set_name): """ _generate_feature_set: generate FeatureSet object KBaseCollections.FeatureSet type: typedef structure { string description; list<feature_id> element_ordering; mapping<feature_id, list<genome_ref>> elements; } FeatureSet; """ log('start saving KBaseCollections.FeatureSet object') if isinstance(workspace_name, int) or workspace_name.isdigit(): workspace_id = workspace_name else: workspace_id = self.dfu.ws_name_to_id(workspace_name) elements = {} map(lambda feature_id: elements.update({feature_id: [genome_id]}), feature_ids) feature_set_data = {'description': 'Generated FeatureSet from DifferentialExpression', 'element_ordering': feature_ids, 'elements': elements} object_type = 'KBaseCollections.FeatureSet' save_object_params = { 'id': workspace_id, 'objects': [{'type': object_type, 'data': feature_set_data, 'name': feature_set_name}]} dfu_oi = self.dfu.save_objects(save_object_params)[0] feature_set_obj_ref = str(dfu_oi[6]) + '/' + str(dfu_oi[0]) + '/' + str(dfu_oi[4]) return feature_set_obj_ref def _process_matrix_file(self, diff_expr_matrix_file, comp_p_value, comp_q_value, comp_fold_change_cutoff): """ _process_matrix_file: filter matrix file by given cutoffs """ log('start processing matrix file') up_feature_ids = [] down_feature_ids = [] if comp_fold_change_cutoff < 0: comp_fold_change_cutoff = -comp_fold_change_cutoff with open(diff_expr_matrix_file, 'r') as file: reader = csv.DictReader(file) for row in reader: feature_id = row['gene_id'] row_p_value = row['p_value'] row_q_value = row['q_value'] row_fold_change_cutoff = row['log2_fold_change'] null_value = set(['NA', 'null', '']) col_value = set([row_p_value, row_q_value, row_fold_change_cutoff]) if not col_value.intersection(null_value): p_value_condition = float(row_p_value) <= comp_p_value q_value_condition = float(row_q_value) <= comp_q_value up_matches_condition = (p_value_condition and q_value_condition and (float(row_fold_change_cutoff) >= comp_fold_change_cutoff)) down_matches_condition = (p_value_condition and q_value_condition and (float(row_fold_change_cutoff) <= -comp_fold_change_cutoff)) if up_matches_condition: up_feature_ids.append(feature_id) elif down_matches_condition: down_feature_ids.append(feature_id) return list(set(up_feature_ids)), list(set(down_feature_ids)) def _filter_expression_matrix(self, expression_matrix_ref, feature_ids, workspace_name, filtered_expression_matrix_suffix, diff_expression_matrix_ref): """ _filter_expression_matrix: generated filtered expression matrix """ log('start saving KBaseFeatureValues.ExpressionMatrix object') if isinstance(workspace_name, int) or workspace_name.isdigit(): workspace_id = workspace_name else: workspace_id = self.dfu.ws_name_to_id(workspace_name) expression_matrix_obj = self.dfu.get_objects({'object_refs': [expression_matrix_ref]})['data'][0] expression_matrix_info = expression_matrix_obj['info'] expression_matrix_data = expression_matrix_obj['data'] expression_matrix_name = expression_matrix_info[1] if re.match('.*_*[Ee]xpression_*[Mm]atrix', expression_matrix_name): filtered_expression_matrix_name = re.sub('_*[Ee]xpression_*[Mm]atrix', filtered_expression_matrix_suffix, expression_matrix_name) else: filtered_expression_matrix_name = expression_matrix_name + \ filtered_expression_matrix_suffix filtered_expression_matrix_data = expression_matrix_data.copy() data = filtered_expression_matrix_data['data'] row_ids = data['row_ids'] values = data['values'] filtered_data = data.copy() filtered_row_ids = list() filtered_values = list() for pos, row_id in enumerate(row_ids): if row_id in feature_ids: filtered_row_ids.append(row_id) filtered_values.append(values[pos]) filtered_data['row_ids'] = filtered_row_ids filtered_data['values'] = filtered_values filtered_expression_matrix_data['data'] = filtered_data object_type = 'KBaseFeatureValues.ExpressionMatrix' save_object_params = { 'id': workspace_id, 'objects': [{'type': object_type, 'data': filtered_expression_matrix_data, 'name': filtered_expression_matrix_name, 'extra_provenance_input_refs': [diff_expression_matrix_ref]}]} dfu_oi = self.dfu.save_objects(save_object_params)[0] filtered_expression_matrix_ref = str( dfu_oi[6]) + '/' + str(dfu_oi[0]) + '/' + str(dfu_oi[4]) return filtered_expression_matrix_ref def _xor(self, a, b): return bool(a) != bool(b) def _check_input_labels(self, condition_pairs, available_condition_labels): """ _check_input_labels: check input condition pairs """ checked = True for condition_pair in condition_pairs: label_string = condition_pair['label_string'][0].strip() label_list = map(lambda x: x.strip(), label_string.split(',')) first_label = label_list[0] second_label = label_list[1] if first_label not in available_condition_labels: error_msg = 'Condition: {} is not availalbe. '.format(first_label) error_msg += 'Available conditions: {}'.format(available_condition_labels) raise ValueError(error_msg) if second_label not in available_condition_labels: error_msg = 'Condition: {} is not availalbe. '.format(second_label) error_msg += 'Available conditions: {}'.format(available_condition_labels) raise ValueError(error_msg) if first_label == second_label: raise ValueError('Input conditions are the same') return checked def _get_condition_labels(self, diff_expression_set_ref): """ _get_condition_labels: get all possible condition label pairs """ log('getting all possible condition pairs') condition_label_pairs = list() available_condition_labels = set() diff_expression_set_obj = self.ws.get_objects2({'objects': [{'ref': diff_expression_set_ref}] })['data'][0] diff_expression_set_data = diff_expression_set_obj['data'] items = diff_expression_set_data.get('items') for item in items: label_string = item['label'] label_list = map(lambda x: x.strip(), label_string.split(',')) condition_label_pairs.append(label_list) map(lambda x: available_condition_labels.add(x), label_list) log('all pssible conditon pairs:\n{}'.format(condition_label_pairs)) return condition_label_pairs, available_condition_labels def __init__(self, config): self.ws_url = config["workspace-url"] self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.shock_url = config['shock-url'] self.ws = Workspace(self.ws_url, token=self.token) self.dfu = DataFileUtil(self.callback_url) self.scratch = config['scratch'] def upload_featureset_from_diff_expr(self, params): """ upload_featureset_from_diff_expr: create FeatureSet from RNASeqDifferentialExpression based on given threshold cutoffs required params: diff_expression_ref: DifferetialExpressionMatrixSet object reference expression_matrix_ref: ExpressionMatrix object reference p_cutoff: p value cutoff q_cutoff: q value cutoff fold_scale_type: one of ["linear", "log2+1", "log10+1"] fold_change_cutoff: fold change cutoff feature_set_suffix: Result FeatureSet object name suffix filtered_expression_matrix_suffix: Result ExpressionMatrix object name suffix workspace_name: the name of the workspace it gets saved to return: result_directory: folder path that holds all files generated up_feature_set_ref_list: list of generated upper FeatureSet object reference down_feature_set_ref_list: list of generated down FeatureSet object reference filtered_expression_matrix_ref_list: list of generated filtered ExpressionMatrix object ref report_name: report name generated by KBaseReport report_ref: report reference generated by KBaseReport """ self._validate_upload_featureset_from_diff_expr_params(params) diff_expression_set_ref = params.get('diff_expression_ref') diff_expression_set_info = self.ws.get_object_info3({"objects": [{"ref": diff_expression_set_ref}]} )['infos'][0] diff_expression_set_name = diff_expression_set_info[1] result_directory = os.path.join(self.scratch, str(uuid.uuid4())) self._mkdir_p(result_directory) (available_condition_label_pairs, available_condition_labels) = self._get_condition_labels(diff_expression_set_ref) run_all_combinations = params.get('run_all_combinations') condition_pairs = params.get('condition_pairs') if not self._xor(run_all_combinations, condition_pairs): error_msg = "Invalid input:\nselect 'Run All Paired Condition Combinations' " error_msg += "or provide partial condition pairs. Don't do both or neither" raise ValueError(error_msg) if run_all_combinations: condition_label_pairs = available_condition_label_pairs else: if self._check_input_labels(condition_pairs, available_condition_labels): condition_label_pairs = list() for condition_pair in condition_pairs: label_string = condition_pair['label_string'][0].strip() condition_labels = map(lambda x: x.strip(), label_string.split(',')) condition_label_pairs.append(condition_labels) up_feature_set_ref_list = list() down_feature_set_ref_list = list() filtered_expression_matrix_ref_list = list() for condition_label_pair in condition_label_pairs: condition_string = '_' + '_'.join(condition_label_pair) diff_expr_matrix_file, genome_id, diff_expr_matrix_ref = self._process_diff_expression( diff_expression_set_ref, result_directory, condition_label_pair) up_feature_ids, down_feature_ids = self._process_matrix_file( diff_expr_matrix_file, params.get('p_cutoff'), params.get('q_cutoff'), params.get('fold_change_cutoff')) filtered_expression_matrix_suffix = condition_string + \ params.get('filtered_expression_matrix_suffix') if params.get('expression_matrix_ref'): filtered_expression_matrix_ref = self._filter_expression_matrix( params.get('expression_matrix_ref'), up_feature_ids + down_feature_ids, params.get('workspace_name'), filtered_expression_matrix_suffix, diff_expr_matrix_ref) filtered_expression_matrix_ref_list.append(filtered_expression_matrix_ref) feature_set_suffix = params.get('feature_set_suffix') up_feature_set_name = diff_expression_set_name + \ condition_string + '_up' + feature_set_suffix up_feature_set_ref = self._generate_feature_set(up_feature_ids, genome_id, params.get('workspace_name'), up_feature_set_name) up_feature_set_ref_list.append(up_feature_set_ref) down_feature_set_name = diff_expression_set_name + \ condition_string + '_down' + feature_set_suffix down_feature_set_ref = self._generate_feature_set(down_feature_ids, genome_id, params.get('workspace_name'), down_feature_set_name) down_feature_set_ref_list.append(down_feature_set_ref) returnVal = {'result_directory': result_directory, 'up_feature_set_ref_list': up_feature_set_ref_list, 'down_feature_set_ref_list': down_feature_set_ref_list, 'filtered_expression_matrix_ref_list': filtered_expression_matrix_ref_list} report_output = self._generate_report(up_feature_set_ref_list, down_feature_set_ref_list, filtered_expression_matrix_ref_list, params.get('workspace_name')) returnVal.update(report_output) return returnVal
def setUpClass(cls): token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('NarrativeService'): cls.cfg[nameval[0]] = nameval[1] authServiceUrl = cls.cfg.get( 'auth-service-url', "https://kbase.us/services/authorization/Sessions/Login") auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': token, 'user_id': user_id, 'provenance': [{ 'service': 'NarrativeService', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.serviceWizardURL = cls.cfg['service-wizard'] cls.wsClient1 = Workspace(cls.wsURL, token=token) cls.serviceImpl = NarrativeService(cls.cfg) cls.SetAPI_version = cls.cfg['setapi-version'] cls.DataPalette_version = cls.cfg['datapaletteservice-version'] cls.intro_text_file = cls.cfg['intro-markdown-file'] # Second user test_cfg_file = '/kb/module/work/test.cfg' test_cfg_text = "[test]\n" with open(test_cfg_file, "r") as f: test_cfg_text += f.read() config = ConfigParser() config.readfp(StringIO.StringIO(test_cfg_text)) test_cfg_dict = dict(config.items("test")) if 'test_token2' not in test_cfg_dict: raise ValueError( "Configuration in <module>/test_local/test.cfg file should " + "include second user credentials ('test_token2' key)") token2 = test_cfg_dict['test_token2'] user2 = auth_client.get_user(token2) print("Test user2: " + user2) cls.ctx2 = MethodContext(None) cls.ctx2.update({ 'token': token2, 'user_id': user2, 'provenance': [{ 'service': 'NarrativeService', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsClient2 = Workspace(cls.wsURL, token=token2) cls.wsClients = [cls.wsClient1, cls.wsClient2] cls.createdWorkspaces = [[], []] # Example objects: cls.example_ws_name = cls.createWsStatic(0) # Reads cls.example_reads_name = "example_reads.1" foft = FakeObjectsForTests(os.environ['SDK_CALLBACK_URL']) info1 = foft.create_fake_reads({ 'ws_name': cls.example_ws_name, 'obj_names': [cls.example_reads_name] })[0] cls.example_reads_ref = str(info1[6]) + '/' + str( info1[0]) + '/' + str(info1[4]) # Genome cls.example_genome_name = "example_genome.1" foft = FakeObjectsForTests(os.environ['SDK_CALLBACK_URL']) info2 = foft.create_fake_genomes({ 'ws_name': cls.example_ws_name, 'obj_names': [cls.example_genome_name] })[0] cls.example_genome_ref = str(info2[6]) + '/' + str( info2[0]) + '/' + str(info2[4]) # Other objects foft.create_any_objects({ 'ws_name': cls.example_ws_name, 'obj_names': ['any_obj_' + str(i) for i in range(0, 30)] })
def __init__(self, config): self.cfg = config self.scratch = config['scratch'] self.gsu = GenomeSearchUtil(os.environ['SDK_CALLBACK_URL']) self.dfu = DataFileUtil(os.environ['SDK_CALLBACK_URL']) self.ws = Workspace(config["workspace-url"])
class FeatureSetDownload: def __init__(self, config): self.cfg = config self.scratch = config['scratch'] self.gsu = GenomeSearchUtil(os.environ['SDK_CALLBACK_URL']) self.dfu = DataFileUtil(os.environ['SDK_CALLBACK_URL']) self.ws = Workspace(config["workspace-url"]) @staticmethod def validate_params(params, expected={"workspace_name", "featureset_name"}): expected = set(expected) pkeys = set(params) if expected - pkeys: raise ValueError("Required keys {} not in supplied parameters" .format(", ".join(expected - pkeys))) def to_tsv(self, params): working_dir = os.path.join(self.scratch, 'featureset-download-'+str(uuid.uuid4())) os.makedirs(working_dir) header = ['Feature Id', 'Aliases', 'Genome', 'Type', 'Function'] fs_name, fs_dicts = self.make_featureset_dict(params['featureset_ref']) files = {'file_path': "{}/{}.tsv".format(working_dir, fs_name)} writer = csv.DictWriter(open(files['file_path'], 'w'), header, delimiter='\t', lineterminator='\n') writer.writeheader() for feat in fs_dicts: writer.writerow(feat) return fs_name, files def make_featureset_dict(self, fs_ref): features = [] ret = self.dfu.get_objects({'object_refs': [fs_ref]})['data'][0] feat_set = ret['data'] fs_name = ret['info'][1] feat_by_genome = defaultdict(list) for k, v in feat_set['elements'].items(): feat_by_genome[v[0]].append(k) for genome, fids in feat_by_genome.items(): genome_name = self.ws.get_object_info3({'objects': [{'ref': genome}]})['infos'][0][1] res = self.gsu.search({'ref': genome, 'structured_query': {'feature_id': fids}, 'sort_by': [['contig_id', 1]], 'start': 0, 'limit': len(fids) }) for feat in res['features']: features.append({'Feature Id': feat['feature_id'], 'Aliases': ", ".join(feat['aliases'].keys()), 'Genome': "{} ({})".format(genome_name, genome), 'Type': feat['feature_type'], 'Function': feat['function'] }) return fs_name, features def export(self, files, name, params): export_package_dir = os.path.join(self.scratch, name+str(uuid.uuid4())) os.makedirs(export_package_dir) for file in files: shutil.move(file, os.path.join(export_package_dir, os.path.basename(file))) # package it up and be done package_details = self.dfu.package_for_download({ 'file_path': export_package_dir, 'ws_refs': [params['featureset_ref']] }) return {'shock_id': package_details['shock_id']}
class AveExpressionMatrixBuilder: def _validate_calculate_average_expression_matrix_params(self, params): """ _validate_calculate_average_expression_matrix_params: validates params passed to calculate_average_expression_matrix method """ log('start validating calculate_average_expression_matrix params') # check for required parameters for p in ['expression_matrix_ref', 'output_suffix', 'workspace_name']: if p not in params: raise ValueError('"{}" parameter is required, but missing'.format(p)) def _generate_report(self, expression_matrix_ref, workspace_name): """ _generate_report: generate report """ objects_created = [{'ref': expression_matrix_ref, 'description': 'Average ExpressionMatrix'}] report_params = {'message': '', 'workspace_name': workspace_name, 'objects_created': objects_created, # 'html_links': output_html_files, # 'direct_html_link_index': 0, 'html_window_height': 366, 'report_object_name': 'kb_ave_expr_matrix_report_' + str(uuid.uuid4())} kbase_report_client = KBaseReport(self.callback_url, token=self.token) output = kbase_report_client.create_extended_report(report_params) report_output = {'report_name': output['name'], 'report_ref': output['ref']} return report_output def _save_expression_matrix(self, em_data, em_obj_name, workspace_name): """ _save_expression_matrix: saving ExpressionMatrix """ try: log('saving ExpressionMatrix [{}]'.format(em_obj_name)) data_type = 'KBaseFeatureValues.ExpressionMatrix' obj_info = self.dfu.save_objects({'id': self.dfu.ws_name_to_id(workspace_name), 'objects': [{'type': data_type, 'data': em_data, 'name': em_obj_name}]})[0] except Exception as e: log(e) raise Exception('Failed Saving ExpressionMatrix to Workspace') expression_matrix_ref = str(obj_info[6]) + '/' + str(obj_info[0]) + '/' + str(obj_info[4]) return expression_matrix_ref def __init__(self, config): self.ws_url = config["workspace-url"] self.callback_url = config['SDK_CALLBACK_URL'] self.token = config['KB_AUTH_TOKEN'] self.shock_url = config['shock-url'] self.ws = Workspace(self.ws_url, token=self.token) self.dfu = DataFileUtil(self.callback_url) self.scratch = config['scratch'] def calculate_average_expression_matrix(self, params): """ calculate_average_expression_matrix: create an average ExpressionMatrix object from a ExpressionMatrix object required params: expression_matrix_ref: ExpressionMatrix object reference output_suffix: output average ExpressionMatrix name suffix workspace_name: the name of the workspace it gets saved to return: average_expression_matrix_ref: generated average ExpressionMatrix object reference report_name: report name generated by KBaseReport report_ref: report reference generated by KBaseReport """ log('--->\nrunning AveExpressionMatrixBuilder.calculate_average_expression_matrix\n' + 'params:\n{}'.format(json.dumps(params, indent=1))) self._validate_calculate_average_expression_matrix_params(params) expression_matrix_ref = params.get('expression_matrix_ref') expression_matrix = self.ws.get_objects2({'objects': [{'ref': expression_matrix_ref}]})['data'][0] expression_matrix_data = expression_matrix['data'] expression_matrix_info = expression_matrix['info'] condition_map = expression_matrix_data['condition_mapping'] ori_data = expression_matrix_data['data'] ori_col_ids = ori_data['col_ids'] ori_row_ids = ori_data['row_ids'] ori_values = ori_data['values'] labels = condition_map.keys() if set(labels) != set(ori_col_ids): error_msg = 'available labels: {}\n'.format(ori_col_ids) error_msg += 'labels in condition_mapping: {}'.format(labels) raise ValueError(error_msg) condition_pos = {} for label, condition in condition_map.iteritems(): if condition not in condition_pos: condition_pos.update({condition: [ori_col_ids.index(label)]}) else: condition_list = condition_pos[condition] condition_list.append(ori_col_ids.index(label)) condition_pos.update({condition: condition_list}) conditions = condition_pos.keys() ave_values = [] for ori_value in ori_values: ave_value = [None] * len(conditions) for condition, poss in condition_pos.iteritems(): ave_pos = conditions.index(condition) sum_value = 0.0 for pos in poss: sum_value += round(float(ori_value[pos]), 3) average = sum_value / len(poss) ave_value[ave_pos] = round(average, 2) ave_values.append(ave_value) average_data = {} average_data.update({'row_ids': ori_row_ids}) average_data.update({'col_ids': conditions}) average_data.update({'values': ave_values}) em_data = {} genome_ref = expression_matrix_data.get('genome_ref') if genome_ref: em_data.update({'genome_ref': genome_ref}) em_data.update({'scale': expression_matrix_data.get('scale')}) em_data.update({'type': expression_matrix_data.get('type')}) em_data.update({'feature_mapping': expression_matrix_data.get('feature_mapping')}) em_data.update({'condition_mapping': expression_matrix_data.get('condition_mapping')}) em_data.update({'data': average_data}) expression_matrix_name = expression_matrix_info[1] ave_expression_matrix_name = expression_matrix_name + params.get('output_suffix') workspace_name = params.get('workspace_name') ave_expression_matrix_ref = self._save_expression_matrix(em_data, ave_expression_matrix_name, workspace_name) returnVal = {'average_expression_matrix_ref': ave_expression_matrix_ref} report_output = self._generate_report(ave_expression_matrix_ref, workspace_name) returnVal.update(report_output) return returnVal