def genelist2fs(gl): qid2cds = ids2cds(gl) fs = { "description": "Feature set generated by " + ",".join(gl), "elements": {} } cdmie = CDMI_EntityAPI(URLS.cdmi) cdmic = CDMI_API(URLS.cdmi) cds_ids = qid2cds.values() cds2l = cds2locus(cds_ids) lfunc = cdmic.fids_to_functions(cds2l.values()) fm = cdmie.get_entity_Feature( cds_ids, ['feature_type', 'source_id', 'sequence_length', 'function', 'alias']) for i in cds_ids: if i in fm: if not fm[i]['function'] and cds2l[i] in lfunc: fm[i]['function'] = lfunc[cds2l[i]] fs['elements'][i] = { "data": { 'type': fm[i]['feature_type'], 'id': i, 'dna_sequence_length': int(fm[i]['sequence_length']), 'function': fm[i]['function'], 'aliases': fm[i]['alias'] } } return fs
def genelist2fs(gl): qid2cds = ids2cds(gl) fs = {"description" : "Feature set generated by " + ",".join(gl), "elements" : {} } cdmie = CDMI_EntityAPI(URLS.cdmi) cdmic = CDMI_API(URLS.cdmi) cds_ids = qid2cds.values() cds2l = cds2locus(cds_ids); lfunc = cdmic.fids_to_functions(cds2l.values()) fm = cdmie.get_entity_Feature(cds_ids,['feature_type', 'source_id', 'sequence_length', 'function', 'alias']) for i in cds_ids: if i in fm: if not fm[i]['function'] and cds2l[i] in lfunc: fm[i]['function'] = lfunc[cds2l[i]] fs['elements'][i] = {"data" : { 'type' : fm[i]['feature_type'], 'id' : i, 'dna_sequence_length' : int(fm[i]['sequence_length']), 'function' : fm[i]['function'], 'aliases' : fm[i]['alias']}} return fs
def go_anno_net(meth, net_obj_id=None): """Add Gene Ontology annotation to network gene nodes :param net_obj_id: Network object id :type net_obj_id: kbtypes.KBaseNetworks.Network :return: Workspace id :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 5 meth.advance("Prepare annotation service") #gc = GWAS(URLS.gwas, token=meth.token) # load from current or other workspace wsid = meth.workspace_id # save to current workspace ws_save_id = meth.workspace_id meth.advance("Load network object") wsd = Workspace2(token=meth.token, wsid=wsid) oc = Ontology(url=URLS.ontology) net_object = wsd.get(net_obj_id) nc = Node(net_object['nodes'], net_object['edges']) idc = IDServerAPI(URLS.ids) cdmic = CDMI_API(URLS.cdmi) cdmie = CDMI_EntityAPI(URLS.cdmi) #idm = IdMap(URLS.idmap) gids = [ i for i in sorted(nc.ugids.keys()) if 'CDS' in i or 'locus' in i or ( not 'clst' in i and not i.startswith('cluster') and 'ps.' not in i) ] meth.advance("Get relationships from central data model") #eids = idc.kbase_ids_to_external_ids(gids) eids = kb_id2ext_id(idc, gids, 100) gids2cds = ids2cds(gids) cgids = gids2cds.values() cds2l = cds2locus(cgids) #mrnas_l = cdmie.get_relationship_Encompasses(gids, [], ['to_link'], []) #mrnas = dict((i[1]['from_link'], i[1]['to_link']) for i in mrnas_l) #locus_l = cdmie.get_relationship_Encompasses(mrnas.values(), [], ['to_link'], []) #locus = dict((i[1]['from_link'], i[1]['to_link']) for i in locus_l) #lgids = [locus[mrnas[i]] for i in gids if i in mrnas.keys()] # ignore original locus ids in gids lgids = cds2l.values() meth.advance("Annotate ({:d} nodes, {:d} edges)".format( len(net_object['nodes']), len(net_object['edges']))) #ots = oc.get_goidlist(lgids, ['biological_process'], ['IEA']) ots = oc.get_goidlist(cgids, [], []) oan = () #oc.get_go_annotation(lgids) funcs = cdmic.fids_to_functions(lgids) funcs_org = cdmic.fids_to_functions(cgids) annotate_nodes(net_object, ots=ots, oan=oan, funcs=funcs, funcs_org=funcs_org, eids=eids, gids2cds=gids2cds, cds2l=cds2l) meth.advance("Save annotated object to workspace {}".format(ws_save_id)) obj = { 'type': 'KBaseNetworks.Network', 'data': net_object, 'name': net_obj_id + ".ano", 'meta': { 'original': net_obj_id } } wsd.save_objects({'workspace': ws_save_id, 'objects': [obj]}) return _workspace_output(net_obj_id + ".ano")
def featureset_go_anal(meth, feature_set_id=None, p_value=0.05, ec='IEA', domain='biological_process', out_id=None): """This method annotate GO terms and execute GO enrichment test :param feature_set_id: FeatureSet workspace object id :type feature_set_id: kbtypes.KBaseSearch.FeatureSet :param p_value: p-value cutoff :type p_value: kbtypes.Unicode :param ec: Evidence code list (comma separated, IEA,ISS,IDA,IEP,IPI,RCA ..) :type ec:kbtypes.Unicode :param domain: Domain list (comma separated, biological_process,molecular_function,cellular_component) :type domain: kbtypes.Unicode :param out_id: Output FeatureSet object identifier :type out_id: kbtypes.KBaseSearch.FeatureSet :return: New workspace object :rtype: kbtypes.Unicode :output_widget: GeneTableWidget """ meth.stages = 4 meth.advance("Prepare Enrichment Test") oc = Ontology(url=URLS.ontology) ws = Workspace2(token=meth.token, wsid=meth.workspace_id) fs = ws.get(feature_set_id) qid2cds = ids2cds(fs['elements'].keys()) cds2l = cds2locus(qid2cds.values()) cdmic = CDMI_API(URLS.cdmi) lfunc = cdmic.fids_to_functions(cds2l.values()) meth.advance("Annotate GO Term") ec = ec.replace(" ", "") domain = domain.replace(" ", "") ec_list = [i for i in ec.split(',')] domain_list = [i for i in domain.split(',')] ots = oc.get_goidlist(list(set(qid2cds.values())), domain_list, ec_list) go_key = lambda go, i, ext: "go.{}.{:d}.{}".format(go, i, ext) go2cds = {} for gid in fs['elements']: lid = qid2cds[gid] if 'data' in fs['elements'][gid]: if not fs['elements'][gid]['data']['function']: fs['elements'][gid]['data']['function'] = lfunc[cds2l[lid]] if 'metadata' not in fs['elements'][gid]: fs['elements'][gid]['metadata'] = {} if lid in ots: go_enr_list = [] for lcnt, go in enumerate(ots[lid].keys()): if go not in go2cds: go2cds[go] = set() go2cds[go].add(lid) for i, goen in enumerate(ots[lid][go]): for ext in "domain", "ec", "desc": fs['elements'][gid]['metadata'][go_key( go, i, ext)] = goen[ext] fs['elements'][gid]['metadata'][go_key( go, i, ext)] = goen[ext] meth.advance("Execute Enrichment Test") enr_list = oc.get_go_enrichment(list(set(qid2cds.values())), domain_list, ec_list, 'hypergeometric', 'GO') enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False) header = [ "GO ID", "Description", "Domain", "p-value", "FeatureSet ID (# genes)" ] fields = [] objects = [] go_enr_smry = "" for i in range(len(enr_list)): goen = enr_list[i] if goen['pvalue'] > float(p_value): continue cfs = genelist2fs(list(go2cds[goen['goID']])) goid = goen['goID'].replace(":", "") fields.append([ goen['goID'], goen['goDesc'][0], goen['goDesc'][1], "{:12.10f}".format(goen['pvalue']), "{}_to_{} ({})".format(out_id, goid, len(go2cds[goen['goID']])) ]) objects.append({ 'type': 'KBaseSearch.FeatureSet', 'data': cfs, 'name': out_id + "_to_" + goid, 'meta': { 'original': feature_set_id, 'domain': domain, 'ec': ec, 'GO_ID': goen['goID'] } }) if i < 3: go_enr_smry += goen['goID'] + "(" + "{:6.4f}".format( goen['pvalue']) + ")" + goen['goDesc'][0] + "\n" go_enr_smry data = {'table': [header] + fields} meth.advance("Saving output to Workspace") objects.append({ 'type': 'KBaseSearch.FeatureSet', 'data': fs, 'name': out_id, 'meta': { 'original': feature_set_id, 'enr_summary': go_enr_smry } }) ws.save_objects({'workspace': meth.workspace_id, 'objects': objects}) return json.dumps(data)
def go_anno_net(meth, net_obj_id=None): """Add Gene Ontology annotation to network gene nodes :param net_obj_id: Network object id :type net_obj_id: kbtypes.KBaseNetworks.Network :return: Workspace id :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 5 meth.advance("Prepare annotation service") #gc = GWAS(URLS.gwas, token=meth.token) # load from current or other workspace wsid = meth.workspace_id # save to current workspace ws_save_id = meth.workspace_id meth.advance("Load network object") wsd = Workspace2(token=meth.token, wsid=wsid) oc = Ontology(url=URLS.ontology) net_object = wsd.get(net_obj_id) nc = Node(net_object['nodes'], net_object['edges']) idc = IDServerAPI(URLS.ids) cdmic = CDMI_API(URLS.cdmi) cdmie = CDMI_EntityAPI(URLS.cdmi) #idm = IdMap(URLS.idmap) gids = [i for i in sorted(nc.ugids.keys()) if 'CDS' in i or 'locus' in i or (not 'clst' in i and not i.startswith('cluster') and 'ps.' not in i )] meth.advance("Get relationships from central data model") #eids = idc.kbase_ids_to_external_ids(gids) eids = kb_id2ext_id(idc, gids, 100) gids2cds = ids2cds(gids) cgids = gids2cds.values() cds2l = cds2locus(cgids) #mrnas_l = cdmie.get_relationship_Encompasses(gids, [], ['to_link'], []) #mrnas = dict((i[1]['from_link'], i[1]['to_link']) for i in mrnas_l) #locus_l = cdmie.get_relationship_Encompasses(mrnas.values(), [], ['to_link'], []) #locus = dict((i[1]['from_link'], i[1]['to_link']) for i in locus_l) #lgids = [locus[mrnas[i]] for i in gids if i in mrnas.keys()] # ignore original locus ids in gids lgids = cds2l.values() meth.advance("Annotate ({:d} nodes, {:d} edges)".format( len(net_object['nodes']), len(net_object['edges']))) #ots = oc.get_goidlist(lgids, ['biological_process'], ['IEA']) ots = oc.get_goidlist(cgids, [], []) oan = () #oc.get_go_annotation(lgids) funcs = cdmic.fids_to_functions(lgids) funcs_org = cdmic.fids_to_functions(cgids) annotate_nodes(net_object, ots=ots, oan=oan, funcs=funcs, funcs_org=funcs_org, eids=eids, gids2cds=gids2cds, cds2l=cds2l) meth.advance("Save annotated object to workspace {}".format(ws_save_id)) obj = { 'type': 'KBaseNetworks.Network', 'data': net_object, 'name': net_obj_id + ".ano", 'meta': { 'original': net_obj_id } } wsd.save_objects({'workspace': ws_save_id, 'objects': [obj]}) return _workspace_output(net_obj_id + ".ano")
def featureset_go_anal(meth, feature_set_id=None, p_value=0.05, ec='IEA', domain='biological_process', out_id=None): """This method annotate GO terms and execute GO enrichment test :param feature_set_id: FeatureSet workspace object id :type feature_set_id: kbtypes.KBaseSearch.FeatureSet :param p_value: p-value cutoff :type p_value: kbtypes.Unicode :param ec: Evidence code list (comma separated, IEA,ISS,IDA,IEP,IPI,RCA ..) :type ec:kbtypes.Unicode :param domain: Domain list (comma separated, biological_process,molecular_function,cellular_component) :type domain: kbtypes.Unicode :param out_id: Output FeatureSet object identifier :type out_id: kbtypes.KBaseSearch.FeatureSet :return: New workspace object :rtype: kbtypes.Unicode :output_widget: GeneTableWidget """ meth.stages = 4 meth.advance("Prepare Enrichment Test") oc = Ontology(url=URLS.ontology) ws = Workspace2(token=meth.token, wsid=meth.workspace_id) fs = ws.get(feature_set_id) qid2cds = ids2cds(fs['elements'].keys()) cds2l = cds2locus(qid2cds.values()) cdmic = CDMI_API(URLS.cdmi) lfunc = cdmic.fids_to_functions(cds2l.values()) meth.advance("Annotate GO Term") ec = ec.replace(" ","") domain = domain.replace(" ","") ec_list = [ i for i in ec.split(',')] domain_list = [ i for i in domain.split(',')] ots = oc.get_goidlist(list(set(qid2cds.values())), domain_list, ec_list) go_key = lambda go, i, ext: "go.{}.{:d}.{}".format(go, i, ext) go2cds = {} for gid in fs['elements']: lid = qid2cds[gid] if 'data' in fs['elements'][gid]: if not fs['elements'][gid]['data']['function']: fs['elements'][gid]['data']['function'] = lfunc[cds2l[lid]] if 'metadata' not in fs['elements'][gid]: fs['elements'][gid]['metadata'] = {} if lid in ots: go_enr_list = [] for lcnt, go in enumerate(ots[lid].keys()): if go not in go2cds: go2cds[go] = set() go2cds[go].add(lid) for i, goen in enumerate(ots[lid][go]): for ext in "domain", "ec", "desc": fs['elements'][gid]['metadata'][go_key(go, i, ext)] = goen[ext] fs['elements'][gid]['metadata'][go_key(go, i, ext)] = goen[ext] meth.advance("Execute Enrichment Test") enr_list = oc.get_go_enrichment(list(set(qid2cds.values())), domain_list, ec_list, 'hypergeometric', 'GO') enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False) header = ["GO ID", "Description", "Domain", "p-value", "FeatureSet ID (# genes)"] fields = [] objects = [] go_enr_smry = "" for i in range(len(enr_list)): goen = enr_list[i] if goen['pvalue'] > float(p_value) : continue cfs = genelist2fs(list(go2cds[goen['goID']])) goid = goen['goID'].replace(":","") fields.append([goen['goID'], goen['goDesc'][0], goen['goDesc'][1], "{:12.10f}".format(goen['pvalue']), "{}_to_{} ({})".format(out_id, goid,len(go2cds[goen['goID']])) ]) objects.append({'type' : 'KBaseSearch.FeatureSet', 'data' : cfs, 'name' : out_id + "_to_" + goid, 'meta' : {'original' : feature_set_id, 'domain' : domain, 'ec' : ec, 'GO_ID' :goen['goID']}}) if i < 3 : go_enr_smry += goen['goID']+"(" + "{:6.4f}".format(goen['pvalue']) + ")" + goen['goDesc'][0] + "\n" go_enr_smry data = {'table': [header] + fields} meth.advance("Saving output to Workspace") objects.append({'type' : 'KBaseSearch.FeatureSet', 'data' : fs, 'name' : out_id, 'meta' : {'original' : feature_set_id, 'enr_summary' : go_enr_smry}}) ws.save_objects({'workspace' : meth.workspace_id, 'objects' :objects}) return json.dumps(data)