Ejemplo n.º 1
0
def go_enrch_net(meth, net_obj_id=None, p_value=0.05, ec=None, domain=None):
    """Identify Gene Ontology terms enriched in individual network clusters

    :param net_obj_id: Cluster object id
    :type net_obj_id: kbtypes.KBaseNetworks.Network
    :param p_value: p-value cutoff
    :type p_value: kbtypes.Unicode
    :param ec: Evidence code list (comma separated, IEA, ...)
    :type ec:kbtypes.Unicode
    :param domain: Domain list (comma separated, biological_process, ...)
    :type domain: kbtypes.Unicode
    :return: Workspace id
    :rtype: kbtypes.Unicode
    :output_widget: GeneTableWidget
    """
    #    :default p_value: 0.05
    meth.stages = 3

    meth.advance("Prepare enrichment test")
    gc = GWAS(URLS.gwas, token=meth.token)

    ec = ec.replace(" ", "")
    domain = domain.replace(" ", "")
    ec_list = [i for i in ec.split(',')]
    domain_list = [i for i in domain.split(',')]

    wsd = Workspace2(token=meth.token, wsid=meth.workspace_id)
    oc = Ontology(url=URLS.ontology)

    net_object = wsd.get_objects([{
        'workspace': meth.workspace_id,
        'name': net_obj_id
    }])
    nc = Node(net_object[0]['data']['nodes'], net_object[0]['data']['edges'])

    gids = [
        i for i in sorted(nc.ugids.keys()) if 'CDS' in i or 'locus' in i or (
            not 'clst' in i and not i.startswith('cluster'))
    ]

    gids2cds = ids2cds(gids)

    meth.advance("Run enrichment test for each clusters")
    rows = []
    for hr_nd in net_object[0]['data']['nodes']:
        gid = hr_nd['entity_id']
        if not (gid.startswith('cluster.') or 'clst' in gid): continue
        glist = nc.get_gene_list(gid)
        # now everything is in CDS id
        #llist = []
        #for i in glist:
        #    if i in mrnas: i = mrnas[i]
        #    if i in locus: i = locus[i]
        #    if 'locus' in i: llist.append(i)
        #llist = [ locus[mrnas[i]] ]; # it will ignore orignal locus ids (TODO: keep locus)
        cds_gl = [gids2cds[i] for i in glist]

        enr_list = oc.get_go_enrichment(cds_gl, domain_list, ec_list,
                                        'hypergeometric', 'GO')

        enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False)
        go_enr_smry = ""
        go_enr_anns = ["", "", ""]
        for i in range(len(enr_list)):
            goen = enr_list[i]
            if goen['pvalue'] > float(p_value): continue
            hr_nd['user_annotations']['gce.' + goen['goID'] +
                                      ".desc"] = goen['goDesc'][0]
            hr_nd['user_annotations']['gce.' + goen['goID'] +
                                      ".domain"] = goen['goDesc'][1]
            hr_nd['user_annotations']['gce.' + goen['goID'] +
                                      ".p_value"] = ` goen['pvalue'] `
            if i < 3:
                go_enr_smry += goen['goID'] + "(" + "{:6.4f}".format(
                    goen['pvalue']) + ")" + goen['goDesc'][0] + "\n"
                go_enr_anns[i] = goen['goID'] + "(" + "{:6.4f}".format(
                    goen['pvalue']) + ")" + goen['goDesc'][0]
        hr_nd['user_annotations']['go_enrichnment_annotation'] = go_enr_smry
        rows.append(
            [gid,
             len(glist), go_enr_anns[0], go_enr_anns[1], go_enr_anns[2]])

    wsd.save_objects({
        'workspace':
        meth.workspace_id,
        'objects': [{
            'type': 'KBaseNetworks.Network',
            'data': net_object[0]['data'],
            'name': net_obj_id + ".cenr",
            'meta': {
                'orginal': net_obj_id
            }
        }]
    })

    rows = sorted(rows, key=lambda x: x[1], reverse=True)

    #meth.debug("rows: {}".format(rows))
    header = [
        "Cluster ID", "# of Genes", "Annotation1", "Annotation2", "Annotation3"
    ]
    data = {'table': [header] + rows}
    return json.dumps(data)
Ejemplo n.º 2
0
def featureset_net_enr(meth,
                       feature_set_id=None,
                       p_value=None,
                       ref_wsid="KBasePublicNetwork",
                       ref_network=None,
                       out_id=None):
    """This method annotate GO terms and execute GO enrichment test

    :param feature_set_id: FeatureSet workspace object id
    :type feature_set_id: kbtypes.KBaseSearch.FeatureSet
    :param p_value: p-value cutoff
    :type p_value: kbtypes.Unicode
    :param ref_wsid: Reference Network workspace id (optional, default to current workspace)
    :type ref_wsid: kbtypes.Unicode
    :param ref_network: Reference Network object name
    :type ref_network:kbtypes.KBaseNetworks.Network
    :param out_id: Output FeatureSet object identifier
    :type out_id: kbtypes.KBaseSearch.FeatureSet
    :return: New workspace object
    :rtype: kbtypes.Unicode
    :output_widget: GeneTableWidget
    """
    meth.stages = 3
    meth.advance("Prepare Enrichment Test")

    oc = Ontology(url=URLS.ontology)
    ws = Workspace2(token=meth.token, wsid=meth.workspace_id)
    fs = ws.get(feature_set_id)
    if not ref_wsid: ref_wsid = meth.workspace_id
    ws2 = Workspace2(token=meth.token, wsid=ref_wsid)
    net = ws2.get(ref_network)

    # checking user input
    if 'edges' not in net or 'nodes' not in net or 'elements' not in fs:
        return "{}"

    qid2cds = ids2cds(fs['elements'].keys())
    # parse networks object
    nc = Node(net['nodes'], net['edges'])

    meth.advance("Execute Enrichment Test")
    qcdss = set(qid2cds.values())
    enr_dict = oc.association_test(list(qcdss), ref_wsid, ref_network, '',
                                   'hypergeometric', 'none', p_value)
    enr_list = sorted([(value, key) for (key, value) in enr_dict.items()])

    nid2name = {}
    for ne in net['nodes']:
        nid2name[ne['entity_id']] = ne['name']

    pwy_enr_smry = ""
    header = ["Pathway ID", "Name", "p-value", "FeatureSet ID (# genes)"]
    fields = []
    objects = []
    for i in range(len(enr_list)):
        pwy_en = enr_list[i]
        if float(pwy_en[0]) > float(p_value): continue
        cgenes = set(nc.get_gene_list(pwy_en[1]))
        cgenes = list(cgenes.intersection(qcdss))
        cfs = genelist2fs(cgenes)
        fields.append([
            pwy_en[1], nid2name[pwy_en[1]],
            "{:12.10f}".format(float(pwy_en[0])),
            out_id + "_to_" + pwy_en[1] + "({})".format(len(cgenes))
        ])
        objects.append({
            'type': 'KBaseSearch.FeatureSet',
            'data': cfs,
            'name': out_id + "_to_" + pwy_en[1],
            'meta': {
                'original': feature_set_id,
                'ref_wsid': ref_wsid,
                'ref_net': ref_network,
                'pwy_id': pwy_en[1]
            }
        })
        if i < 3:
            pwy_enr_smry += pwy_en[1] + "(" + "{:6.4f}".format(float(
                pwy_en[0])) + ")" + nid2name[pwy_en[1]] + "\n"

    data = {'table': [header] + fields}
    meth.advance("Saving output to Workspace")

    objects.append({
        'type': 'KBaseSearch.FeatureSet',
        'data': fs,
        'name': out_id,
        'meta': {
            'original': feature_set_id,
            'ref_wsid': ref_wsid,
            'ref_net': ref_network,
            'pwy_enr_summary': pwy_enr_smry
        }
    })
    ws.save_objects({'workspace': meth.workspace_id, 'objects': objects})

    meth.advance("Returning object")
    return json.dumps(data)
Ejemplo n.º 3
0
def go_anno_net(meth, net_obj_id=None):
    """Add Gene Ontology annotation to network gene nodes

    :param net_obj_id: Network object id
    :type net_obj_id: kbtypes.KBaseNetworks.Network
    :return: Workspace id
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 5

    meth.advance("Prepare annotation service")
    #gc = GWAS(URLS.gwas, token=meth.token)

    # load from current or other workspace
    wsid = meth.workspace_id
    # save to current workspace
    ws_save_id = meth.workspace_id

    meth.advance("Load network object")
    wsd = Workspace2(token=meth.token, wsid=wsid)
    oc = Ontology(url=URLS.ontology)

    net_object = wsd.get(net_obj_id)
    nc = Node(net_object['nodes'], net_object['edges'])

    idc = IDServerAPI(URLS.ids)
    cdmic = CDMI_API(URLS.cdmi)
    cdmie = CDMI_EntityAPI(URLS.cdmi)
    #idm = IdMap(URLS.idmap)
    gids = [
        i for i in sorted(nc.ugids.keys()) if 'CDS' in i or 'locus' in i or (
            not 'clst' in i and not i.startswith('cluster') and 'ps.' not in i)
    ]

    meth.advance("Get relationships from central data model")
    #eids = idc.kbase_ids_to_external_ids(gids)
    eids = kb_id2ext_id(idc, gids, 100)
    gids2cds = ids2cds(gids)
    cgids = gids2cds.values()
    cds2l = cds2locus(cgids)
    #mrnas_l = cdmie.get_relationship_Encompasses(gids, [], ['to_link'], [])
    #mrnas = dict((i[1]['from_link'], i[1]['to_link']) for i in mrnas_l)
    #locus_l = cdmie.get_relationship_Encompasses(mrnas.values(), [], ['to_link'], [])
    #locus = dict((i[1]['from_link'], i[1]['to_link']) for i in locus_l)
    #lgids = [locus[mrnas[i]] for i in gids if i in mrnas.keys()]  # ignore original locus ids in gids
    lgids = cds2l.values()

    meth.advance("Annotate ({:d} nodes, {:d} edges)".format(
        len(net_object['nodes']), len(net_object['edges'])))
    #ots = oc.get_goidlist(lgids, ['biological_process'], ['IEA'])
    ots = oc.get_goidlist(cgids, [], [])
    oan = ()  #oc.get_go_annotation(lgids)
    funcs = cdmic.fids_to_functions(lgids)
    funcs_org = cdmic.fids_to_functions(cgids)
    annotate_nodes(net_object,
                   ots=ots,
                   oan=oan,
                   funcs=funcs,
                   funcs_org=funcs_org,
                   eids=eids,
                   gids2cds=gids2cds,
                   cds2l=cds2l)

    meth.advance("Save annotated object to workspace {}".format(ws_save_id))
    obj = {
        'type': 'KBaseNetworks.Network',
        'data': net_object,
        'name': net_obj_id + ".ano",
        'meta': {
            'original': net_obj_id
        }
    }
    wsd.save_objects({'workspace': ws_save_id, 'objects': [obj]})

    return _workspace_output(net_obj_id + ".ano")
Ejemplo n.º 4
0
def featureset_go_anal(meth,
                       feature_set_id=None,
                       p_value=0.05,
                       ec='IEA',
                       domain='biological_process',
                       out_id=None):
    """This method annotate GO terms and execute GO enrichment test

    :param feature_set_id: FeatureSet workspace object id
    :type feature_set_id: kbtypes.KBaseSearch.FeatureSet
    :param p_value: p-value cutoff
    :type p_value: kbtypes.Unicode
    :param ec: Evidence code list (comma separated, IEA,ISS,IDA,IEP,IPI,RCA ..)
    :type ec:kbtypes.Unicode
    :param domain: Domain list (comma separated, biological_process,molecular_function,cellular_component)
    :type domain: kbtypes.Unicode
    :param out_id: Output FeatureSet object identifier
    :type out_id: kbtypes.KBaseSearch.FeatureSet
    :return: New workspace object
    :rtype: kbtypes.Unicode
    :output_widget: GeneTableWidget
    """
    meth.stages = 4
    meth.advance("Prepare Enrichment Test")

    oc = Ontology(url=URLS.ontology)
    ws = Workspace2(token=meth.token, wsid=meth.workspace_id)
    fs = ws.get(feature_set_id)
    qid2cds = ids2cds(fs['elements'].keys())
    cds2l = cds2locus(qid2cds.values())
    cdmic = CDMI_API(URLS.cdmi)
    lfunc = cdmic.fids_to_functions(cds2l.values())

    meth.advance("Annotate GO Term")
    ec = ec.replace(" ", "")
    domain = domain.replace(" ", "")
    ec_list = [i for i in ec.split(',')]
    domain_list = [i for i in domain.split(',')]
    ots = oc.get_goidlist(list(set(qid2cds.values())), domain_list, ec_list)
    go_key = lambda go, i, ext: "go.{}.{:d}.{}".format(go, i, ext)
    go2cds = {}
    for gid in fs['elements']:
        lid = qid2cds[gid]
        if 'data' in fs['elements'][gid]:
            if not fs['elements'][gid]['data']['function']:
                fs['elements'][gid]['data']['function'] = lfunc[cds2l[lid]]
        if 'metadata' not in fs['elements'][gid]:
            fs['elements'][gid]['metadata'] = {}
        if lid in ots:
            go_enr_list = []
            for lcnt, go in enumerate(ots[lid].keys()):
                if go not in go2cds: go2cds[go] = set()
                go2cds[go].add(lid)
                for i, goen in enumerate(ots[lid][go]):
                    for ext in "domain", "ec", "desc":
                        fs['elements'][gid]['metadata'][go_key(
                            go, i, ext)] = goen[ext]
                        fs['elements'][gid]['metadata'][go_key(
                            go, i, ext)] = goen[ext]

    meth.advance("Execute Enrichment Test")
    enr_list = oc.get_go_enrichment(list(set(qid2cds.values())), domain_list,
                                    ec_list, 'hypergeometric', 'GO')
    enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False)
    header = [
        "GO ID", "Description", "Domain", "p-value", "FeatureSet ID (# genes)"
    ]
    fields = []
    objects = []
    go_enr_smry = ""
    for i in range(len(enr_list)):
        goen = enr_list[i]
        if goen['pvalue'] > float(p_value): continue
        cfs = genelist2fs(list(go2cds[goen['goID']]))
        goid = goen['goID'].replace(":", "")
        fields.append([
            goen['goID'], goen['goDesc'][0], goen['goDesc'][1],
            "{:12.10f}".format(goen['pvalue']),
            "{}_to_{} ({})".format(out_id, goid, len(go2cds[goen['goID']]))
        ])
        objects.append({
            'type': 'KBaseSearch.FeatureSet',
            'data': cfs,
            'name': out_id + "_to_" + goid,
            'meta': {
                'original': feature_set_id,
                'domain': domain,
                'ec': ec,
                'GO_ID': goen['goID']
            }
        })
        if i < 3:
            go_enr_smry += goen['goID'] + "(" + "{:6.4f}".format(
                goen['pvalue']) + ")" + goen['goDesc'][0] + "\n"
    go_enr_smry
    data = {'table': [header] + fields}

    meth.advance("Saving output to Workspace")
    objects.append({
        'type': 'KBaseSearch.FeatureSet',
        'data': fs,
        'name': out_id,
        'meta': {
            'original': feature_set_id,
            'enr_summary': go_enr_smry
        }
    })
    ws.save_objects({'workspace': meth.workspace_id, 'objects': objects})
    return json.dumps(data)
Ejemplo n.º 5
0
def go_enrch_net(meth, net_obj_id=None, p_value=0.05, ec=None, domain=None):
    """Identify Gene Ontology terms enriched in individual network clusters

    :param net_obj_id: Cluster object id
    :type net_obj_id: kbtypes.KBaseNetworks.Network
    :param p_value: p-value cutoff
    :type p_value: kbtypes.Unicode
    :param ec: Evidence code list (comma separated, IEA, ...)
    :type ec:kbtypes.Unicode
    :param domain: Domain list (comma separated, biological_process, ...)
    :type domain: kbtypes.Unicode
    :return: Workspace id
    :rtype: kbtypes.Unicode
    :output_widget: GeneTableWidget
    """
#    :default p_value: 0.05
    meth.stages = 3

    meth.advance("Prepare enrichment test")
    gc = GWAS(URLS.gwas, token=meth.token)

    ec = ec.replace(" ","")
    domain = domain.replace(" ","")
    ec_list = [ i for i in ec.split(',')]
    domain_list = [ i for i in domain.split(',')]


    wsd = Workspace2(token=meth.token, wsid=meth.workspace_id)
    oc = Ontology(url=URLS.ontology)

    net_object = wsd.get_objects([{'workspace' : meth.workspace_id, 'name' : net_obj_id}]);
    nc = Node(net_object[0]['data']['nodes'], net_object[0]['data']['edges'])

    gids = [ i for i in sorted(nc.ugids.keys()) if 'CDS' in i or 'locus' in i or (not 'clst' in i and not i.startswith('cluster'))]
    
    gids2cds = ids2cds(gids)
   
    meth.advance("Run enrichment test for each clusters")
    rows = []
    for hr_nd in net_object[0]['data']['nodes']:
        gid = hr_nd['entity_id']
        if not (gid.startswith('cluster.') or 'clst' in gid ): continue
        glist = nc.get_gene_list(gid)
        # now everything is in CDS id
        #llist = []
        #for i in glist:
        #    if i in mrnas: i = mrnas[i]
        #    if i in locus: i = locus[i]
        #    if 'locus' in i: llist.append(i)
        #llist = [ locus[mrnas[i]] ]; # it will ignore orignal locus ids (TODO: keep locus)
        cds_gl = [gids2cds[i] for i in glist]

        enr_list = oc.get_go_enrichment(cds_gl, domain_list, ec_list, 'hypergeometric', 'GO')
        
        enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False)
        go_enr_smry = "";
        go_enr_anns = ["", "", ""]
        for i in range(len(enr_list)):
          goen = enr_list[i]
          if goen['pvalue'] > float(p_value) : continue
          hr_nd['user_annotations']['gce.'+goen['goID']+".desc" ] = goen['goDesc'][0]
          hr_nd['user_annotations']['gce.'+goen['goID']+".domain" ] = goen['goDesc'][1]
          hr_nd['user_annotations']['gce.'+goen['goID']+".p_value" ] = `goen['pvalue']`
          if i < 3 :
            go_enr_smry += goen['goID']+"(" + "{:6.4f}".format(goen['pvalue']) + ")" + goen['goDesc'][0] + "\n"
            go_enr_anns[i] = goen['goID']+"(" + "{:6.4f}".format(goen['pvalue']) + ")" + goen['goDesc'][0]
        hr_nd['user_annotations']['go_enrichnment_annotation'] =  go_enr_smry
        rows.append([gid,len(glist),go_enr_anns[0],go_enr_anns[1],go_enr_anns[2]])

    wsd.save_objects({'workspace' : meth.workspace_id, 'objects' : [{'type' : 'KBaseNetworks.Network', 'data' : net_object[0]['data'], 'name' : net_obj_id + ".cenr", 'meta' : {'orginal' : net_obj_id}}]})

    rows = sorted(rows, key=lambda x: x[1], reverse=True)

    #meth.debug("rows: {}".format(rows))
    header = ["Cluster ID", "# of Genes", "Annotation1", "Annotation2", "Annotation3"]
    data = {'table': [header] + rows}
    return json.dumps(data)
Ejemplo n.º 6
0
def go_anno_net(meth, net_obj_id=None):
    """Add Gene Ontology annotation to network gene nodes

    :param net_obj_id: Network object id
    :type net_obj_id: kbtypes.KBaseNetworks.Network
    :return: Workspace id
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 5

    meth.advance("Prepare annotation service")
    #gc = GWAS(URLS.gwas, token=meth.token)

    # load from current or other workspace
    wsid =  meth.workspace_id
    # save to current workspace
    ws_save_id = meth.workspace_id

    meth.advance("Load network object")
    wsd = Workspace2(token=meth.token, wsid=wsid)
    oc = Ontology(url=URLS.ontology)

    net_object = wsd.get(net_obj_id)
    nc = Node(net_object['nodes'], net_object['edges'])

    idc = IDServerAPI(URLS.ids)
    cdmic = CDMI_API(URLS.cdmi)
    cdmie = CDMI_EntityAPI(URLS.cdmi)
    #idm = IdMap(URLS.idmap)
    gids = [i for i in sorted(nc.ugids.keys())
            if 'CDS' in i or 'locus' in i or (not 'clst' in i and not i.startswith('cluster') and 'ps.' not in i )]

    meth.advance("Get relationships from central data model")
    #eids = idc.kbase_ids_to_external_ids(gids)
    eids = kb_id2ext_id(idc, gids, 100)
    gids2cds = ids2cds(gids)
    cgids    = gids2cds.values()
    cds2l    = cds2locus(cgids)
    #mrnas_l = cdmie.get_relationship_Encompasses(gids, [], ['to_link'], [])
    #mrnas = dict((i[1]['from_link'], i[1]['to_link']) for i in mrnas_l)
    #locus_l = cdmie.get_relationship_Encompasses(mrnas.values(), [], ['to_link'], [])
    #locus = dict((i[1]['from_link'], i[1]['to_link']) for i in locus_l)
    #lgids = [locus[mrnas[i]] for i in gids if i in mrnas.keys()]  # ignore original locus ids in gids
    lgids = cds2l.values()

    meth.advance("Annotate ({:d} nodes, {:d} edges)".format(
                 len(net_object['nodes']), len(net_object['edges'])))
    #ots = oc.get_goidlist(lgids, ['biological_process'], ['IEA'])
    ots = oc.get_goidlist(cgids, [], [])
    oan = () #oc.get_go_annotation(lgids)
    funcs = cdmic.fids_to_functions(lgids)
    funcs_org = cdmic.fids_to_functions(cgids)
    annotate_nodes(net_object, ots=ots, oan=oan, funcs=funcs, funcs_org=funcs_org, eids=eids,
                   gids2cds=gids2cds, cds2l=cds2l)

    meth.advance("Save annotated object to workspace {}".format(ws_save_id))
    obj = {
        'type': 'KBaseNetworks.Network',
        'data': net_object,
        'name': net_obj_id + ".ano",
        'meta': {
            'original': net_obj_id
        }
    }
    wsd.save_objects({'workspace': ws_save_id, 'objects': [obj]})

    return _workspace_output(net_obj_id + ".ano")
Ejemplo n.º 7
0
def featureset_net_enr(meth, feature_set_id=None, p_value=None, ref_wsid="KBasePublicNetwork", ref_network=None, out_id=None):
    """This method annotate GO terms and execute GO enrichment test

    :param feature_set_id: FeatureSet workspace object id
    :type feature_set_id: kbtypes.KBaseSearch.FeatureSet
    :param p_value: p-value cutoff
    :type p_value: kbtypes.Unicode
    :param ref_wsid: Reference Network workspace id (optional, default to current workspace)
    :type ref_wsid: kbtypes.Unicode
    :param ref_network: Reference Network object name
    :type ref_network:kbtypes.KBaseNetworks.Network
    :param out_id: Output FeatureSet object identifier
    :type out_id: kbtypes.KBaseSearch.FeatureSet
    :return: New workspace object
    :rtype: kbtypes.Unicode
    :output_widget: GeneTableWidget
    """
    meth.stages = 3
    meth.advance("Prepare Enrichment Test")

    oc = Ontology(url=URLS.ontology)
    ws = Workspace2(token=meth.token, wsid=meth.workspace_id)
    fs = ws.get(feature_set_id)
    if  not ref_wsid : ref_wsid = meth.workspace_id
    ws2 = Workspace2(token=meth.token, wsid=ref_wsid)
    net = ws2.get(ref_network)

    # checking user input
    if 'edges' not in net or 'nodes' not in net or 'elements' not in fs: return "{}"

    qid2cds = ids2cds(fs['elements'].keys())
    # parse networks object
    nc = Node(net['nodes'],net['edges']);

    meth.advance("Execute Enrichment Test")
    qcdss = set(qid2cds.values())
    enr_dict = oc.association_test(list(qcdss), ref_wsid, ref_network, '', 'hypergeometric', 'none', p_value)
    enr_list = sorted([(value,key) for (key,value) in enr_dict.items()])


    nid2name = {}
    for ne in net['nodes']:
      nid2name[ne['entity_id']] = ne['name']

    pwy_enr_smry = ""
    header = ["Pathway ID", "Name", "p-value", "FeatureSet ID (# genes)"]
    fields = []
    objects = []
    for i in range(len(enr_list)):
      pwy_en = enr_list[i]
      if float(pwy_en[0]) > float(p_value) : continue
      cgenes = set(nc.get_gene_list(pwy_en[1]))
      cgenes = list(cgenes.intersection(qcdss))
      cfs = genelist2fs(cgenes)
      fields.append([pwy_en[1], nid2name[pwy_en[1]], "{:12.10f}".format(float(pwy_en[0])), out_id + "_to_" + pwy_en[1] + "({})".format(len(cgenes))])
      objects.append({'type' : 'KBaseSearch.FeatureSet', 'data' : cfs, 'name' : out_id + "_to_" + pwy_en[1], 'meta' : {'original' : feature_set_id, 'ref_wsid' : ref_wsid, 'ref_net' : ref_network, 'pwy_id' :pwy_en[1]}})
      if i < 3 :
        pwy_enr_smry += pwy_en[1]+"(" + "{:6.4f}".format(float(pwy_en[0])) + ")" + nid2name[pwy_en[1]] + "\n"

    data = {'table': [header] + fields}
    meth.advance("Saving output to Workspace")

    objects.append({'type' : 'KBaseSearch.FeatureSet', 'data' : fs, 'name' : out_id, 'meta' : {'original' : feature_set_id, 'ref_wsid' : ref_wsid, 'ref_net' : ref_network, 'pwy_enr_summary' :pwy_enr_smry}})
    ws.save_objects({'workspace' : meth.workspace_id, 'objects' :objects})


    meth.advance("Returning object")
    return json.dumps(data)
Ejemplo n.º 8
0
def featureset_go_anal(meth, feature_set_id=None, p_value=0.05, ec='IEA', domain='biological_process', out_id=None):
    """This method annotate GO terms and execute GO enrichment test

    :param feature_set_id: FeatureSet workspace object id
    :type feature_set_id: kbtypes.KBaseSearch.FeatureSet
    :param p_value: p-value cutoff
    :type p_value: kbtypes.Unicode
    :param ec: Evidence code list (comma separated, IEA,ISS,IDA,IEP,IPI,RCA ..)
    :type ec:kbtypes.Unicode
    :param domain: Domain list (comma separated, biological_process,molecular_function,cellular_component)
    :type domain: kbtypes.Unicode
    :param out_id: Output FeatureSet object identifier
    :type out_id: kbtypes.KBaseSearch.FeatureSet
    :return: New workspace object
    :rtype: kbtypes.Unicode
    :output_widget: GeneTableWidget
    """
    meth.stages = 4
    meth.advance("Prepare Enrichment Test")

    oc = Ontology(url=URLS.ontology)
    ws = Workspace2(token=meth.token, wsid=meth.workspace_id)
    fs = ws.get(feature_set_id)
    qid2cds = ids2cds(fs['elements'].keys())
    cds2l   = cds2locus(qid2cds.values())
    cdmic = CDMI_API(URLS.cdmi)
    lfunc = cdmic.fids_to_functions(cds2l.values())

    meth.advance("Annotate GO Term")
    ec = ec.replace(" ","")
    domain = domain.replace(" ","")
    ec_list = [ i for i in ec.split(',')]
    domain_list = [ i for i in domain.split(',')]
    ots = oc.get_goidlist(list(set(qid2cds.values())), domain_list, ec_list)
    go_key = lambda go, i, ext: "go.{}.{:d}.{}".format(go, i, ext)
    go2cds = {}
    for gid in fs['elements']:
      lid = qid2cds[gid]
      if 'data' in fs['elements'][gid]:
        if not fs['elements'][gid]['data']['function']: fs['elements'][gid]['data']['function'] = lfunc[cds2l[lid]]
      if 'metadata' not in fs['elements'][gid]: fs['elements'][gid]['metadata'] = {}
      if lid in ots:
          go_enr_list = []
          for lcnt, go in enumerate(ots[lid].keys()):
              if go not in go2cds: go2cds[go] = set()
              go2cds[go].add(lid)
              for i, goen in enumerate(ots[lid][go]):
                  for ext in "domain", "ec", "desc":
                      fs['elements'][gid]['metadata'][go_key(go, i, ext)] = goen[ext]
                      fs['elements'][gid]['metadata'][go_key(go, i, ext)] = goen[ext]

    meth.advance("Execute Enrichment Test")
    enr_list = oc.get_go_enrichment(list(set(qid2cds.values())), domain_list, ec_list, 'hypergeometric', 'GO')
    enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False)
    header = ["GO ID", "Description", "Domain", "p-value", "FeatureSet ID (# genes)"]
    fields = []
    objects = []
    go_enr_smry = ""
    for i in range(len(enr_list)):
      goen = enr_list[i]
      if goen['pvalue'] > float(p_value) : continue
      cfs = genelist2fs(list(go2cds[goen['goID']]))
      goid = goen['goID'].replace(":","")
      fields.append([goen['goID'], goen['goDesc'][0], goen['goDesc'][1], "{:12.10f}".format(goen['pvalue']), "{}_to_{} ({})".format(out_id, goid,len(go2cds[goen['goID']])) ])
      objects.append({'type' : 'KBaseSearch.FeatureSet', 'data' : cfs, 'name' : out_id + "_to_" + goid, 'meta' : {'original' : feature_set_id, 'domain' : domain, 'ec' : ec, 'GO_ID' :goen['goID']}})
      if i < 3 :
        go_enr_smry += goen['goID']+"(" + "{:6.4f}".format(goen['pvalue']) + ")" + goen['goDesc'][0] + "\n"
    go_enr_smry
    data = {'table': [header] + fields}


    meth.advance("Saving output to Workspace")
    objects.append({'type' : 'KBaseSearch.FeatureSet', 'data' : fs, 'name' : out_id, 'meta' : {'original' : feature_set_id, 'enr_summary' : go_enr_smry}})
    ws.save_objects({'workspace' : meth.workspace_id, 'objects' :objects})
    return json.dumps(data)