def go_enrch_net(meth, net_obj_id=None, p_value=0.05, ec=None, domain=None): """Identify Gene Ontology terms enriched in individual network clusters :param net_obj_id: Cluster object id :type net_obj_id: kbtypes.KBaseNetworks.Network :param p_value: p-value cutoff :type p_value: kbtypes.Unicode :param ec: Evidence code list (comma separated, IEA, ...) :type ec:kbtypes.Unicode :param domain: Domain list (comma separated, biological_process, ...) :type domain: kbtypes.Unicode :return: Workspace id :rtype: kbtypes.Unicode :output_widget: GeneTableWidget """ # :default p_value: 0.05 meth.stages = 3 meth.advance("Prepare enrichment test") gc = GWAS(URLS.gwas, token=meth.token) ec = ec.replace(" ", "") domain = domain.replace(" ", "") ec_list = [i for i in ec.split(',')] domain_list = [i for i in domain.split(',')] wsd = Workspace2(token=meth.token, wsid=meth.workspace_id) oc = Ontology(url=URLS.ontology) net_object = wsd.get_objects([{ 'workspace': meth.workspace_id, 'name': net_obj_id }]) nc = Node(net_object[0]['data']['nodes'], net_object[0]['data']['edges']) gids = [ i for i in sorted(nc.ugids.keys()) if 'CDS' in i or 'locus' in i or ( not 'clst' in i and not i.startswith('cluster')) ] gids2cds = ids2cds(gids) meth.advance("Run enrichment test for each clusters") rows = [] for hr_nd in net_object[0]['data']['nodes']: gid = hr_nd['entity_id'] if not (gid.startswith('cluster.') or 'clst' in gid): continue glist = nc.get_gene_list(gid) # now everything is in CDS id #llist = [] #for i in glist: # if i in mrnas: i = mrnas[i] # if i in locus: i = locus[i] # if 'locus' in i: llist.append(i) #llist = [ locus[mrnas[i]] ]; # it will ignore orignal locus ids (TODO: keep locus) cds_gl = [gids2cds[i] for i in glist] enr_list = oc.get_go_enrichment(cds_gl, domain_list, ec_list, 'hypergeometric', 'GO') enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False) go_enr_smry = "" go_enr_anns = ["", "", ""] for i in range(len(enr_list)): goen = enr_list[i] if goen['pvalue'] > float(p_value): continue hr_nd['user_annotations']['gce.' + goen['goID'] + ".desc"] = goen['goDesc'][0] hr_nd['user_annotations']['gce.' + goen['goID'] + ".domain"] = goen['goDesc'][1] hr_nd['user_annotations']['gce.' + goen['goID'] + ".p_value"] = ` goen['pvalue'] ` if i < 3: go_enr_smry += goen['goID'] + "(" + "{:6.4f}".format( goen['pvalue']) + ")" + goen['goDesc'][0] + "\n" go_enr_anns[i] = goen['goID'] + "(" + "{:6.4f}".format( goen['pvalue']) + ")" + goen['goDesc'][0] hr_nd['user_annotations']['go_enrichnment_annotation'] = go_enr_smry rows.append( [gid, len(glist), go_enr_anns[0], go_enr_anns[1], go_enr_anns[2]]) wsd.save_objects({ 'workspace': meth.workspace_id, 'objects': [{ 'type': 'KBaseNetworks.Network', 'data': net_object[0]['data'], 'name': net_obj_id + ".cenr", 'meta': { 'orginal': net_obj_id } }] }) rows = sorted(rows, key=lambda x: x[1], reverse=True) #meth.debug("rows: {}".format(rows)) header = [ "Cluster ID", "# of Genes", "Annotation1", "Annotation2", "Annotation3" ] data = {'table': [header] + rows} return json.dumps(data)
def featureset_net_enr(meth, feature_set_id=None, p_value=None, ref_wsid="KBasePublicNetwork", ref_network=None, out_id=None): """This method annotate GO terms and execute GO enrichment test :param feature_set_id: FeatureSet workspace object id :type feature_set_id: kbtypes.KBaseSearch.FeatureSet :param p_value: p-value cutoff :type p_value: kbtypes.Unicode :param ref_wsid: Reference Network workspace id (optional, default to current workspace) :type ref_wsid: kbtypes.Unicode :param ref_network: Reference Network object name :type ref_network:kbtypes.KBaseNetworks.Network :param out_id: Output FeatureSet object identifier :type out_id: kbtypes.KBaseSearch.FeatureSet :return: New workspace object :rtype: kbtypes.Unicode :output_widget: GeneTableWidget """ meth.stages = 3 meth.advance("Prepare Enrichment Test") oc = Ontology(url=URLS.ontology) ws = Workspace2(token=meth.token, wsid=meth.workspace_id) fs = ws.get(feature_set_id) if not ref_wsid: ref_wsid = meth.workspace_id ws2 = Workspace2(token=meth.token, wsid=ref_wsid) net = ws2.get(ref_network) # checking user input if 'edges' not in net or 'nodes' not in net or 'elements' not in fs: return "{}" qid2cds = ids2cds(fs['elements'].keys()) # parse networks object nc = Node(net['nodes'], net['edges']) meth.advance("Execute Enrichment Test") qcdss = set(qid2cds.values()) enr_dict = oc.association_test(list(qcdss), ref_wsid, ref_network, '', 'hypergeometric', 'none', p_value) enr_list = sorted([(value, key) for (key, value) in enr_dict.items()]) nid2name = {} for ne in net['nodes']: nid2name[ne['entity_id']] = ne['name'] pwy_enr_smry = "" header = ["Pathway ID", "Name", "p-value", "FeatureSet ID (# genes)"] fields = [] objects = [] for i in range(len(enr_list)): pwy_en = enr_list[i] if float(pwy_en[0]) > float(p_value): continue cgenes = set(nc.get_gene_list(pwy_en[1])) cgenes = list(cgenes.intersection(qcdss)) cfs = genelist2fs(cgenes) fields.append([ pwy_en[1], nid2name[pwy_en[1]], "{:12.10f}".format(float(pwy_en[0])), out_id + "_to_" + pwy_en[1] + "({})".format(len(cgenes)) ]) objects.append({ 'type': 'KBaseSearch.FeatureSet', 'data': cfs, 'name': out_id + "_to_" + pwy_en[1], 'meta': { 'original': feature_set_id, 'ref_wsid': ref_wsid, 'ref_net': ref_network, 'pwy_id': pwy_en[1] } }) if i < 3: pwy_enr_smry += pwy_en[1] + "(" + "{:6.4f}".format(float( pwy_en[0])) + ")" + nid2name[pwy_en[1]] + "\n" data = {'table': [header] + fields} meth.advance("Saving output to Workspace") objects.append({ 'type': 'KBaseSearch.FeatureSet', 'data': fs, 'name': out_id, 'meta': { 'original': feature_set_id, 'ref_wsid': ref_wsid, 'ref_net': ref_network, 'pwy_enr_summary': pwy_enr_smry } }) ws.save_objects({'workspace': meth.workspace_id, 'objects': objects}) meth.advance("Returning object") return json.dumps(data)
def go_anno_net(meth, net_obj_id=None): """Add Gene Ontology annotation to network gene nodes :param net_obj_id: Network object id :type net_obj_id: kbtypes.KBaseNetworks.Network :return: Workspace id :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 5 meth.advance("Prepare annotation service") #gc = GWAS(URLS.gwas, token=meth.token) # load from current or other workspace wsid = meth.workspace_id # save to current workspace ws_save_id = meth.workspace_id meth.advance("Load network object") wsd = Workspace2(token=meth.token, wsid=wsid) oc = Ontology(url=URLS.ontology) net_object = wsd.get(net_obj_id) nc = Node(net_object['nodes'], net_object['edges']) idc = IDServerAPI(URLS.ids) cdmic = CDMI_API(URLS.cdmi) cdmie = CDMI_EntityAPI(URLS.cdmi) #idm = IdMap(URLS.idmap) gids = [ i for i in sorted(nc.ugids.keys()) if 'CDS' in i or 'locus' in i or ( not 'clst' in i and not i.startswith('cluster') and 'ps.' not in i) ] meth.advance("Get relationships from central data model") #eids = idc.kbase_ids_to_external_ids(gids) eids = kb_id2ext_id(idc, gids, 100) gids2cds = ids2cds(gids) cgids = gids2cds.values() cds2l = cds2locus(cgids) #mrnas_l = cdmie.get_relationship_Encompasses(gids, [], ['to_link'], []) #mrnas = dict((i[1]['from_link'], i[1]['to_link']) for i in mrnas_l) #locus_l = cdmie.get_relationship_Encompasses(mrnas.values(), [], ['to_link'], []) #locus = dict((i[1]['from_link'], i[1]['to_link']) for i in locus_l) #lgids = [locus[mrnas[i]] for i in gids if i in mrnas.keys()] # ignore original locus ids in gids lgids = cds2l.values() meth.advance("Annotate ({:d} nodes, {:d} edges)".format( len(net_object['nodes']), len(net_object['edges']))) #ots = oc.get_goidlist(lgids, ['biological_process'], ['IEA']) ots = oc.get_goidlist(cgids, [], []) oan = () #oc.get_go_annotation(lgids) funcs = cdmic.fids_to_functions(lgids) funcs_org = cdmic.fids_to_functions(cgids) annotate_nodes(net_object, ots=ots, oan=oan, funcs=funcs, funcs_org=funcs_org, eids=eids, gids2cds=gids2cds, cds2l=cds2l) meth.advance("Save annotated object to workspace {}".format(ws_save_id)) obj = { 'type': 'KBaseNetworks.Network', 'data': net_object, 'name': net_obj_id + ".ano", 'meta': { 'original': net_obj_id } } wsd.save_objects({'workspace': ws_save_id, 'objects': [obj]}) return _workspace_output(net_obj_id + ".ano")
def featureset_go_anal(meth, feature_set_id=None, p_value=0.05, ec='IEA', domain='biological_process', out_id=None): """This method annotate GO terms and execute GO enrichment test :param feature_set_id: FeatureSet workspace object id :type feature_set_id: kbtypes.KBaseSearch.FeatureSet :param p_value: p-value cutoff :type p_value: kbtypes.Unicode :param ec: Evidence code list (comma separated, IEA,ISS,IDA,IEP,IPI,RCA ..) :type ec:kbtypes.Unicode :param domain: Domain list (comma separated, biological_process,molecular_function,cellular_component) :type domain: kbtypes.Unicode :param out_id: Output FeatureSet object identifier :type out_id: kbtypes.KBaseSearch.FeatureSet :return: New workspace object :rtype: kbtypes.Unicode :output_widget: GeneTableWidget """ meth.stages = 4 meth.advance("Prepare Enrichment Test") oc = Ontology(url=URLS.ontology) ws = Workspace2(token=meth.token, wsid=meth.workspace_id) fs = ws.get(feature_set_id) qid2cds = ids2cds(fs['elements'].keys()) cds2l = cds2locus(qid2cds.values()) cdmic = CDMI_API(URLS.cdmi) lfunc = cdmic.fids_to_functions(cds2l.values()) meth.advance("Annotate GO Term") ec = ec.replace(" ", "") domain = domain.replace(" ", "") ec_list = [i for i in ec.split(',')] domain_list = [i for i in domain.split(',')] ots = oc.get_goidlist(list(set(qid2cds.values())), domain_list, ec_list) go_key = lambda go, i, ext: "go.{}.{:d}.{}".format(go, i, ext) go2cds = {} for gid in fs['elements']: lid = qid2cds[gid] if 'data' in fs['elements'][gid]: if not fs['elements'][gid]['data']['function']: fs['elements'][gid]['data']['function'] = lfunc[cds2l[lid]] if 'metadata' not in fs['elements'][gid]: fs['elements'][gid]['metadata'] = {} if lid in ots: go_enr_list = [] for lcnt, go in enumerate(ots[lid].keys()): if go not in go2cds: go2cds[go] = set() go2cds[go].add(lid) for i, goen in enumerate(ots[lid][go]): for ext in "domain", "ec", "desc": fs['elements'][gid]['metadata'][go_key( go, i, ext)] = goen[ext] fs['elements'][gid]['metadata'][go_key( go, i, ext)] = goen[ext] meth.advance("Execute Enrichment Test") enr_list = oc.get_go_enrichment(list(set(qid2cds.values())), domain_list, ec_list, 'hypergeometric', 'GO') enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False) header = [ "GO ID", "Description", "Domain", "p-value", "FeatureSet ID (# genes)" ] fields = [] objects = [] go_enr_smry = "" for i in range(len(enr_list)): goen = enr_list[i] if goen['pvalue'] > float(p_value): continue cfs = genelist2fs(list(go2cds[goen['goID']])) goid = goen['goID'].replace(":", "") fields.append([ goen['goID'], goen['goDesc'][0], goen['goDesc'][1], "{:12.10f}".format(goen['pvalue']), "{}_to_{} ({})".format(out_id, goid, len(go2cds[goen['goID']])) ]) objects.append({ 'type': 'KBaseSearch.FeatureSet', 'data': cfs, 'name': out_id + "_to_" + goid, 'meta': { 'original': feature_set_id, 'domain': domain, 'ec': ec, 'GO_ID': goen['goID'] } }) if i < 3: go_enr_smry += goen['goID'] + "(" + "{:6.4f}".format( goen['pvalue']) + ")" + goen['goDesc'][0] + "\n" go_enr_smry data = {'table': [header] + fields} meth.advance("Saving output to Workspace") objects.append({ 'type': 'KBaseSearch.FeatureSet', 'data': fs, 'name': out_id, 'meta': { 'original': feature_set_id, 'enr_summary': go_enr_smry } }) ws.save_objects({'workspace': meth.workspace_id, 'objects': objects}) return json.dumps(data)
def go_enrch_net(meth, net_obj_id=None, p_value=0.05, ec=None, domain=None): """Identify Gene Ontology terms enriched in individual network clusters :param net_obj_id: Cluster object id :type net_obj_id: kbtypes.KBaseNetworks.Network :param p_value: p-value cutoff :type p_value: kbtypes.Unicode :param ec: Evidence code list (comma separated, IEA, ...) :type ec:kbtypes.Unicode :param domain: Domain list (comma separated, biological_process, ...) :type domain: kbtypes.Unicode :return: Workspace id :rtype: kbtypes.Unicode :output_widget: GeneTableWidget """ # :default p_value: 0.05 meth.stages = 3 meth.advance("Prepare enrichment test") gc = GWAS(URLS.gwas, token=meth.token) ec = ec.replace(" ","") domain = domain.replace(" ","") ec_list = [ i for i in ec.split(',')] domain_list = [ i for i in domain.split(',')] wsd = Workspace2(token=meth.token, wsid=meth.workspace_id) oc = Ontology(url=URLS.ontology) net_object = wsd.get_objects([{'workspace' : meth.workspace_id, 'name' : net_obj_id}]); nc = Node(net_object[0]['data']['nodes'], net_object[0]['data']['edges']) gids = [ i for i in sorted(nc.ugids.keys()) if 'CDS' in i or 'locus' in i or (not 'clst' in i and not i.startswith('cluster'))] gids2cds = ids2cds(gids) meth.advance("Run enrichment test for each clusters") rows = [] for hr_nd in net_object[0]['data']['nodes']: gid = hr_nd['entity_id'] if not (gid.startswith('cluster.') or 'clst' in gid ): continue glist = nc.get_gene_list(gid) # now everything is in CDS id #llist = [] #for i in glist: # if i in mrnas: i = mrnas[i] # if i in locus: i = locus[i] # if 'locus' in i: llist.append(i) #llist = [ locus[mrnas[i]] ]; # it will ignore orignal locus ids (TODO: keep locus) cds_gl = [gids2cds[i] for i in glist] enr_list = oc.get_go_enrichment(cds_gl, domain_list, ec_list, 'hypergeometric', 'GO') enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False) go_enr_smry = ""; go_enr_anns = ["", "", ""] for i in range(len(enr_list)): goen = enr_list[i] if goen['pvalue'] > float(p_value) : continue hr_nd['user_annotations']['gce.'+goen['goID']+".desc" ] = goen['goDesc'][0] hr_nd['user_annotations']['gce.'+goen['goID']+".domain" ] = goen['goDesc'][1] hr_nd['user_annotations']['gce.'+goen['goID']+".p_value" ] = `goen['pvalue']` if i < 3 : go_enr_smry += goen['goID']+"(" + "{:6.4f}".format(goen['pvalue']) + ")" + goen['goDesc'][0] + "\n" go_enr_anns[i] = goen['goID']+"(" + "{:6.4f}".format(goen['pvalue']) + ")" + goen['goDesc'][0] hr_nd['user_annotations']['go_enrichnment_annotation'] = go_enr_smry rows.append([gid,len(glist),go_enr_anns[0],go_enr_anns[1],go_enr_anns[2]]) wsd.save_objects({'workspace' : meth.workspace_id, 'objects' : [{'type' : 'KBaseNetworks.Network', 'data' : net_object[0]['data'], 'name' : net_obj_id + ".cenr", 'meta' : {'orginal' : net_obj_id}}]}) rows = sorted(rows, key=lambda x: x[1], reverse=True) #meth.debug("rows: {}".format(rows)) header = ["Cluster ID", "# of Genes", "Annotation1", "Annotation2", "Annotation3"] data = {'table': [header] + rows} return json.dumps(data)
def go_anno_net(meth, net_obj_id=None): """Add Gene Ontology annotation to network gene nodes :param net_obj_id: Network object id :type net_obj_id: kbtypes.KBaseNetworks.Network :return: Workspace id :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 5 meth.advance("Prepare annotation service") #gc = GWAS(URLS.gwas, token=meth.token) # load from current or other workspace wsid = meth.workspace_id # save to current workspace ws_save_id = meth.workspace_id meth.advance("Load network object") wsd = Workspace2(token=meth.token, wsid=wsid) oc = Ontology(url=URLS.ontology) net_object = wsd.get(net_obj_id) nc = Node(net_object['nodes'], net_object['edges']) idc = IDServerAPI(URLS.ids) cdmic = CDMI_API(URLS.cdmi) cdmie = CDMI_EntityAPI(URLS.cdmi) #idm = IdMap(URLS.idmap) gids = [i for i in sorted(nc.ugids.keys()) if 'CDS' in i or 'locus' in i or (not 'clst' in i and not i.startswith('cluster') and 'ps.' not in i )] meth.advance("Get relationships from central data model") #eids = idc.kbase_ids_to_external_ids(gids) eids = kb_id2ext_id(idc, gids, 100) gids2cds = ids2cds(gids) cgids = gids2cds.values() cds2l = cds2locus(cgids) #mrnas_l = cdmie.get_relationship_Encompasses(gids, [], ['to_link'], []) #mrnas = dict((i[1]['from_link'], i[1]['to_link']) for i in mrnas_l) #locus_l = cdmie.get_relationship_Encompasses(mrnas.values(), [], ['to_link'], []) #locus = dict((i[1]['from_link'], i[1]['to_link']) for i in locus_l) #lgids = [locus[mrnas[i]] for i in gids if i in mrnas.keys()] # ignore original locus ids in gids lgids = cds2l.values() meth.advance("Annotate ({:d} nodes, {:d} edges)".format( len(net_object['nodes']), len(net_object['edges']))) #ots = oc.get_goidlist(lgids, ['biological_process'], ['IEA']) ots = oc.get_goidlist(cgids, [], []) oan = () #oc.get_go_annotation(lgids) funcs = cdmic.fids_to_functions(lgids) funcs_org = cdmic.fids_to_functions(cgids) annotate_nodes(net_object, ots=ots, oan=oan, funcs=funcs, funcs_org=funcs_org, eids=eids, gids2cds=gids2cds, cds2l=cds2l) meth.advance("Save annotated object to workspace {}".format(ws_save_id)) obj = { 'type': 'KBaseNetworks.Network', 'data': net_object, 'name': net_obj_id + ".ano", 'meta': { 'original': net_obj_id } } wsd.save_objects({'workspace': ws_save_id, 'objects': [obj]}) return _workspace_output(net_obj_id + ".ano")
def featureset_net_enr(meth, feature_set_id=None, p_value=None, ref_wsid="KBasePublicNetwork", ref_network=None, out_id=None): """This method annotate GO terms and execute GO enrichment test :param feature_set_id: FeatureSet workspace object id :type feature_set_id: kbtypes.KBaseSearch.FeatureSet :param p_value: p-value cutoff :type p_value: kbtypes.Unicode :param ref_wsid: Reference Network workspace id (optional, default to current workspace) :type ref_wsid: kbtypes.Unicode :param ref_network: Reference Network object name :type ref_network:kbtypes.KBaseNetworks.Network :param out_id: Output FeatureSet object identifier :type out_id: kbtypes.KBaseSearch.FeatureSet :return: New workspace object :rtype: kbtypes.Unicode :output_widget: GeneTableWidget """ meth.stages = 3 meth.advance("Prepare Enrichment Test") oc = Ontology(url=URLS.ontology) ws = Workspace2(token=meth.token, wsid=meth.workspace_id) fs = ws.get(feature_set_id) if not ref_wsid : ref_wsid = meth.workspace_id ws2 = Workspace2(token=meth.token, wsid=ref_wsid) net = ws2.get(ref_network) # checking user input if 'edges' not in net or 'nodes' not in net or 'elements' not in fs: return "{}" qid2cds = ids2cds(fs['elements'].keys()) # parse networks object nc = Node(net['nodes'],net['edges']); meth.advance("Execute Enrichment Test") qcdss = set(qid2cds.values()) enr_dict = oc.association_test(list(qcdss), ref_wsid, ref_network, '', 'hypergeometric', 'none', p_value) enr_list = sorted([(value,key) for (key,value) in enr_dict.items()]) nid2name = {} for ne in net['nodes']: nid2name[ne['entity_id']] = ne['name'] pwy_enr_smry = "" header = ["Pathway ID", "Name", "p-value", "FeatureSet ID (# genes)"] fields = [] objects = [] for i in range(len(enr_list)): pwy_en = enr_list[i] if float(pwy_en[0]) > float(p_value) : continue cgenes = set(nc.get_gene_list(pwy_en[1])) cgenes = list(cgenes.intersection(qcdss)) cfs = genelist2fs(cgenes) fields.append([pwy_en[1], nid2name[pwy_en[1]], "{:12.10f}".format(float(pwy_en[0])), out_id + "_to_" + pwy_en[1] + "({})".format(len(cgenes))]) objects.append({'type' : 'KBaseSearch.FeatureSet', 'data' : cfs, 'name' : out_id + "_to_" + pwy_en[1], 'meta' : {'original' : feature_set_id, 'ref_wsid' : ref_wsid, 'ref_net' : ref_network, 'pwy_id' :pwy_en[1]}}) if i < 3 : pwy_enr_smry += pwy_en[1]+"(" + "{:6.4f}".format(float(pwy_en[0])) + ")" + nid2name[pwy_en[1]] + "\n" data = {'table': [header] + fields} meth.advance("Saving output to Workspace") objects.append({'type' : 'KBaseSearch.FeatureSet', 'data' : fs, 'name' : out_id, 'meta' : {'original' : feature_set_id, 'ref_wsid' : ref_wsid, 'ref_net' : ref_network, 'pwy_enr_summary' :pwy_enr_smry}}) ws.save_objects({'workspace' : meth.workspace_id, 'objects' :objects}) meth.advance("Returning object") return json.dumps(data)
def featureset_go_anal(meth, feature_set_id=None, p_value=0.05, ec='IEA', domain='biological_process', out_id=None): """This method annotate GO terms and execute GO enrichment test :param feature_set_id: FeatureSet workspace object id :type feature_set_id: kbtypes.KBaseSearch.FeatureSet :param p_value: p-value cutoff :type p_value: kbtypes.Unicode :param ec: Evidence code list (comma separated, IEA,ISS,IDA,IEP,IPI,RCA ..) :type ec:kbtypes.Unicode :param domain: Domain list (comma separated, biological_process,molecular_function,cellular_component) :type domain: kbtypes.Unicode :param out_id: Output FeatureSet object identifier :type out_id: kbtypes.KBaseSearch.FeatureSet :return: New workspace object :rtype: kbtypes.Unicode :output_widget: GeneTableWidget """ meth.stages = 4 meth.advance("Prepare Enrichment Test") oc = Ontology(url=URLS.ontology) ws = Workspace2(token=meth.token, wsid=meth.workspace_id) fs = ws.get(feature_set_id) qid2cds = ids2cds(fs['elements'].keys()) cds2l = cds2locus(qid2cds.values()) cdmic = CDMI_API(URLS.cdmi) lfunc = cdmic.fids_to_functions(cds2l.values()) meth.advance("Annotate GO Term") ec = ec.replace(" ","") domain = domain.replace(" ","") ec_list = [ i for i in ec.split(',')] domain_list = [ i for i in domain.split(',')] ots = oc.get_goidlist(list(set(qid2cds.values())), domain_list, ec_list) go_key = lambda go, i, ext: "go.{}.{:d}.{}".format(go, i, ext) go2cds = {} for gid in fs['elements']: lid = qid2cds[gid] if 'data' in fs['elements'][gid]: if not fs['elements'][gid]['data']['function']: fs['elements'][gid]['data']['function'] = lfunc[cds2l[lid]] if 'metadata' not in fs['elements'][gid]: fs['elements'][gid]['metadata'] = {} if lid in ots: go_enr_list = [] for lcnt, go in enumerate(ots[lid].keys()): if go not in go2cds: go2cds[go] = set() go2cds[go].add(lid) for i, goen in enumerate(ots[lid][go]): for ext in "domain", "ec", "desc": fs['elements'][gid]['metadata'][go_key(go, i, ext)] = goen[ext] fs['elements'][gid]['metadata'][go_key(go, i, ext)] = goen[ext] meth.advance("Execute Enrichment Test") enr_list = oc.get_go_enrichment(list(set(qid2cds.values())), domain_list, ec_list, 'hypergeometric', 'GO') enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False) header = ["GO ID", "Description", "Domain", "p-value", "FeatureSet ID (# genes)"] fields = [] objects = [] go_enr_smry = "" for i in range(len(enr_list)): goen = enr_list[i] if goen['pvalue'] > float(p_value) : continue cfs = genelist2fs(list(go2cds[goen['goID']])) goid = goen['goID'].replace(":","") fields.append([goen['goID'], goen['goDesc'][0], goen['goDesc'][1], "{:12.10f}".format(goen['pvalue']), "{}_to_{} ({})".format(out_id, goid,len(go2cds[goen['goID']])) ]) objects.append({'type' : 'KBaseSearch.FeatureSet', 'data' : cfs, 'name' : out_id + "_to_" + goid, 'meta' : {'original' : feature_set_id, 'domain' : domain, 'ec' : ec, 'GO_ID' :goen['goID']}}) if i < 3 : go_enr_smry += goen['goID']+"(" + "{:6.4f}".format(goen['pvalue']) + ")" + goen['goDesc'][0] + "\n" go_enr_smry data = {'table': [header] + fields} meth.advance("Saving output to Workspace") objects.append({'type' : 'KBaseSearch.FeatureSet', 'data' : fs, 'name' : out_id, 'meta' : {'original' : feature_set_id, 'enr_summary' : go_enr_smry}}) ws.save_objects({'workspace' : meth.workspace_id, 'objects' :objects}) return json.dumps(data)