Esempio n. 1
0
def genelist2fs(gl):
    qid2cds = ids2cds(gl)
    fs = {
        "description": "Feature set generated by " + ",".join(gl),
        "elements": {}
    }
    cdmie = CDMI_EntityAPI(URLS.cdmi)
    cdmic = CDMI_API(URLS.cdmi)
    cds_ids = qid2cds.values()
    cds2l = cds2locus(cds_ids)
    lfunc = cdmic.fids_to_functions(cds2l.values())

    fm = cdmie.get_entity_Feature(
        cds_ids,
        ['feature_type', 'source_id', 'sequence_length', 'function', 'alias'])
    for i in cds_ids:
        if i in fm:
            if not fm[i]['function'] and cds2l[i] in lfunc:
                fm[i]['function'] = lfunc[cds2l[i]]
            fs['elements'][i] = {
                "data": {
                    'type': fm[i]['feature_type'],
                    'id': i,
                    'dna_sequence_length': int(fm[i]['sequence_length']),
                    'function': fm[i]['function'],
                    'aliases': fm[i]['alias']
                }
            }
    return fs
Esempio n. 2
0
def genelist2fs(gl):
    qid2cds = ids2cds(gl)
    fs = {"description" : "Feature set generated by " + ",".join(gl),
          "elements" : {}
         }
    cdmie = CDMI_EntityAPI(URLS.cdmi)
    cdmic = CDMI_API(URLS.cdmi)
    cds_ids = qid2cds.values()
    cds2l = cds2locus(cds_ids);
    lfunc = cdmic.fids_to_functions(cds2l.values())

    fm = cdmie.get_entity_Feature(cds_ids,['feature_type', 'source_id', 'sequence_length', 'function', 'alias'])
    for i in cds_ids:
      if i in fm:
        if not fm[i]['function'] and cds2l[i] in lfunc:
          fm[i]['function'] = lfunc[cds2l[i]]
        fs['elements'][i] = {"data" : { 'type' : fm[i]['feature_type'], 'id' : i, 'dna_sequence_length' : int(fm[i]['sequence_length']), 'function' : fm[i]['function'], 'aliases' : fm[i]['alias']}}
    return fs
Esempio n. 3
0
def go_anno_net(meth, net_obj_id=None):
    """Add Gene Ontology annotation to network gene nodes

    :param net_obj_id: Network object id
    :type net_obj_id: kbtypes.KBaseNetworks.Network
    :return: Workspace id
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 5

    meth.advance("Prepare annotation service")
    #gc = GWAS(URLS.gwas, token=meth.token)

    # load from current or other workspace
    wsid = meth.workspace_id
    # save to current workspace
    ws_save_id = meth.workspace_id

    meth.advance("Load network object")
    wsd = Workspace2(token=meth.token, wsid=wsid)
    oc = Ontology(url=URLS.ontology)

    net_object = wsd.get(net_obj_id)
    nc = Node(net_object['nodes'], net_object['edges'])

    idc = IDServerAPI(URLS.ids)
    cdmic = CDMI_API(URLS.cdmi)
    cdmie = CDMI_EntityAPI(URLS.cdmi)
    #idm = IdMap(URLS.idmap)
    gids = [
        i for i in sorted(nc.ugids.keys()) if 'CDS' in i or 'locus' in i or (
            not 'clst' in i and not i.startswith('cluster') and 'ps.' not in i)
    ]

    meth.advance("Get relationships from central data model")
    #eids = idc.kbase_ids_to_external_ids(gids)
    eids = kb_id2ext_id(idc, gids, 100)
    gids2cds = ids2cds(gids)
    cgids = gids2cds.values()
    cds2l = cds2locus(cgids)
    #mrnas_l = cdmie.get_relationship_Encompasses(gids, [], ['to_link'], [])
    #mrnas = dict((i[1]['from_link'], i[1]['to_link']) for i in mrnas_l)
    #locus_l = cdmie.get_relationship_Encompasses(mrnas.values(), [], ['to_link'], [])
    #locus = dict((i[1]['from_link'], i[1]['to_link']) for i in locus_l)
    #lgids = [locus[mrnas[i]] for i in gids if i in mrnas.keys()]  # ignore original locus ids in gids
    lgids = cds2l.values()

    meth.advance("Annotate ({:d} nodes, {:d} edges)".format(
        len(net_object['nodes']), len(net_object['edges'])))
    #ots = oc.get_goidlist(lgids, ['biological_process'], ['IEA'])
    ots = oc.get_goidlist(cgids, [], [])
    oan = ()  #oc.get_go_annotation(lgids)
    funcs = cdmic.fids_to_functions(lgids)
    funcs_org = cdmic.fids_to_functions(cgids)
    annotate_nodes(net_object,
                   ots=ots,
                   oan=oan,
                   funcs=funcs,
                   funcs_org=funcs_org,
                   eids=eids,
                   gids2cds=gids2cds,
                   cds2l=cds2l)

    meth.advance("Save annotated object to workspace {}".format(ws_save_id))
    obj = {
        'type': 'KBaseNetworks.Network',
        'data': net_object,
        'name': net_obj_id + ".ano",
        'meta': {
            'original': net_obj_id
        }
    }
    wsd.save_objects({'workspace': ws_save_id, 'objects': [obj]})

    return _workspace_output(net_obj_id + ".ano")
Esempio n. 4
0
def featureset_go_anal(meth,
                       feature_set_id=None,
                       p_value=0.05,
                       ec='IEA',
                       domain='biological_process',
                       out_id=None):
    """This method annotate GO terms and execute GO enrichment test

    :param feature_set_id: FeatureSet workspace object id
    :type feature_set_id: kbtypes.KBaseSearch.FeatureSet
    :param p_value: p-value cutoff
    :type p_value: kbtypes.Unicode
    :param ec: Evidence code list (comma separated, IEA,ISS,IDA,IEP,IPI,RCA ..)
    :type ec:kbtypes.Unicode
    :param domain: Domain list (comma separated, biological_process,molecular_function,cellular_component)
    :type domain: kbtypes.Unicode
    :param out_id: Output FeatureSet object identifier
    :type out_id: kbtypes.KBaseSearch.FeatureSet
    :return: New workspace object
    :rtype: kbtypes.Unicode
    :output_widget: GeneTableWidget
    """
    meth.stages = 4
    meth.advance("Prepare Enrichment Test")

    oc = Ontology(url=URLS.ontology)
    ws = Workspace2(token=meth.token, wsid=meth.workspace_id)
    fs = ws.get(feature_set_id)
    qid2cds = ids2cds(fs['elements'].keys())
    cds2l = cds2locus(qid2cds.values())
    cdmic = CDMI_API(URLS.cdmi)
    lfunc = cdmic.fids_to_functions(cds2l.values())

    meth.advance("Annotate GO Term")
    ec = ec.replace(" ", "")
    domain = domain.replace(" ", "")
    ec_list = [i for i in ec.split(',')]
    domain_list = [i for i in domain.split(',')]
    ots = oc.get_goidlist(list(set(qid2cds.values())), domain_list, ec_list)
    go_key = lambda go, i, ext: "go.{}.{:d}.{}".format(go, i, ext)
    go2cds = {}
    for gid in fs['elements']:
        lid = qid2cds[gid]
        if 'data' in fs['elements'][gid]:
            if not fs['elements'][gid]['data']['function']:
                fs['elements'][gid]['data']['function'] = lfunc[cds2l[lid]]
        if 'metadata' not in fs['elements'][gid]:
            fs['elements'][gid]['metadata'] = {}
        if lid in ots:
            go_enr_list = []
            for lcnt, go in enumerate(ots[lid].keys()):
                if go not in go2cds: go2cds[go] = set()
                go2cds[go].add(lid)
                for i, goen in enumerate(ots[lid][go]):
                    for ext in "domain", "ec", "desc":
                        fs['elements'][gid]['metadata'][go_key(
                            go, i, ext)] = goen[ext]
                        fs['elements'][gid]['metadata'][go_key(
                            go, i, ext)] = goen[ext]

    meth.advance("Execute Enrichment Test")
    enr_list = oc.get_go_enrichment(list(set(qid2cds.values())), domain_list,
                                    ec_list, 'hypergeometric', 'GO')
    enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False)
    header = [
        "GO ID", "Description", "Domain", "p-value", "FeatureSet ID (# genes)"
    ]
    fields = []
    objects = []
    go_enr_smry = ""
    for i in range(len(enr_list)):
        goen = enr_list[i]
        if goen['pvalue'] > float(p_value): continue
        cfs = genelist2fs(list(go2cds[goen['goID']]))
        goid = goen['goID'].replace(":", "")
        fields.append([
            goen['goID'], goen['goDesc'][0], goen['goDesc'][1],
            "{:12.10f}".format(goen['pvalue']),
            "{}_to_{} ({})".format(out_id, goid, len(go2cds[goen['goID']]))
        ])
        objects.append({
            'type': 'KBaseSearch.FeatureSet',
            'data': cfs,
            'name': out_id + "_to_" + goid,
            'meta': {
                'original': feature_set_id,
                'domain': domain,
                'ec': ec,
                'GO_ID': goen['goID']
            }
        })
        if i < 3:
            go_enr_smry += goen['goID'] + "(" + "{:6.4f}".format(
                goen['pvalue']) + ")" + goen['goDesc'][0] + "\n"
    go_enr_smry
    data = {'table': [header] + fields}

    meth.advance("Saving output to Workspace")
    objects.append({
        'type': 'KBaseSearch.FeatureSet',
        'data': fs,
        'name': out_id,
        'meta': {
            'original': feature_set_id,
            'enr_summary': go_enr_smry
        }
    })
    ws.save_objects({'workspace': meth.workspace_id, 'objects': objects})
    return json.dumps(data)
Esempio n. 5
0
def go_anno_net(meth, net_obj_id=None):
    """Add Gene Ontology annotation to network gene nodes

    :param net_obj_id: Network object id
    :type net_obj_id: kbtypes.KBaseNetworks.Network
    :return: Workspace id
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 5

    meth.advance("Prepare annotation service")
    #gc = GWAS(URLS.gwas, token=meth.token)

    # load from current or other workspace
    wsid =  meth.workspace_id
    # save to current workspace
    ws_save_id = meth.workspace_id

    meth.advance("Load network object")
    wsd = Workspace2(token=meth.token, wsid=wsid)
    oc = Ontology(url=URLS.ontology)

    net_object = wsd.get(net_obj_id)
    nc = Node(net_object['nodes'], net_object['edges'])

    idc = IDServerAPI(URLS.ids)
    cdmic = CDMI_API(URLS.cdmi)
    cdmie = CDMI_EntityAPI(URLS.cdmi)
    #idm = IdMap(URLS.idmap)
    gids = [i for i in sorted(nc.ugids.keys())
            if 'CDS' in i or 'locus' in i or (not 'clst' in i and not i.startswith('cluster') and 'ps.' not in i )]

    meth.advance("Get relationships from central data model")
    #eids = idc.kbase_ids_to_external_ids(gids)
    eids = kb_id2ext_id(idc, gids, 100)
    gids2cds = ids2cds(gids)
    cgids    = gids2cds.values()
    cds2l    = cds2locus(cgids)
    #mrnas_l = cdmie.get_relationship_Encompasses(gids, [], ['to_link'], [])
    #mrnas = dict((i[1]['from_link'], i[1]['to_link']) for i in mrnas_l)
    #locus_l = cdmie.get_relationship_Encompasses(mrnas.values(), [], ['to_link'], [])
    #locus = dict((i[1]['from_link'], i[1]['to_link']) for i in locus_l)
    #lgids = [locus[mrnas[i]] for i in gids if i in mrnas.keys()]  # ignore original locus ids in gids
    lgids = cds2l.values()

    meth.advance("Annotate ({:d} nodes, {:d} edges)".format(
                 len(net_object['nodes']), len(net_object['edges'])))
    #ots = oc.get_goidlist(lgids, ['biological_process'], ['IEA'])
    ots = oc.get_goidlist(cgids, [], [])
    oan = () #oc.get_go_annotation(lgids)
    funcs = cdmic.fids_to_functions(lgids)
    funcs_org = cdmic.fids_to_functions(cgids)
    annotate_nodes(net_object, ots=ots, oan=oan, funcs=funcs, funcs_org=funcs_org, eids=eids,
                   gids2cds=gids2cds, cds2l=cds2l)

    meth.advance("Save annotated object to workspace {}".format(ws_save_id))
    obj = {
        'type': 'KBaseNetworks.Network',
        'data': net_object,
        'name': net_obj_id + ".ano",
        'meta': {
            'original': net_obj_id
        }
    }
    wsd.save_objects({'workspace': ws_save_id, 'objects': [obj]})

    return _workspace_output(net_obj_id + ".ano")
Esempio n. 6
0
def featureset_go_anal(meth, feature_set_id=None, p_value=0.05, ec='IEA', domain='biological_process', out_id=None):
    """This method annotate GO terms and execute GO enrichment test

    :param feature_set_id: FeatureSet workspace object id
    :type feature_set_id: kbtypes.KBaseSearch.FeatureSet
    :param p_value: p-value cutoff
    :type p_value: kbtypes.Unicode
    :param ec: Evidence code list (comma separated, IEA,ISS,IDA,IEP,IPI,RCA ..)
    :type ec:kbtypes.Unicode
    :param domain: Domain list (comma separated, biological_process,molecular_function,cellular_component)
    :type domain: kbtypes.Unicode
    :param out_id: Output FeatureSet object identifier
    :type out_id: kbtypes.KBaseSearch.FeatureSet
    :return: New workspace object
    :rtype: kbtypes.Unicode
    :output_widget: GeneTableWidget
    """
    meth.stages = 4
    meth.advance("Prepare Enrichment Test")

    oc = Ontology(url=URLS.ontology)
    ws = Workspace2(token=meth.token, wsid=meth.workspace_id)
    fs = ws.get(feature_set_id)
    qid2cds = ids2cds(fs['elements'].keys())
    cds2l   = cds2locus(qid2cds.values())
    cdmic = CDMI_API(URLS.cdmi)
    lfunc = cdmic.fids_to_functions(cds2l.values())

    meth.advance("Annotate GO Term")
    ec = ec.replace(" ","")
    domain = domain.replace(" ","")
    ec_list = [ i for i in ec.split(',')]
    domain_list = [ i for i in domain.split(',')]
    ots = oc.get_goidlist(list(set(qid2cds.values())), domain_list, ec_list)
    go_key = lambda go, i, ext: "go.{}.{:d}.{}".format(go, i, ext)
    go2cds = {}
    for gid in fs['elements']:
      lid = qid2cds[gid]
      if 'data' in fs['elements'][gid]:
        if not fs['elements'][gid]['data']['function']: fs['elements'][gid]['data']['function'] = lfunc[cds2l[lid]]
      if 'metadata' not in fs['elements'][gid]: fs['elements'][gid]['metadata'] = {}
      if lid in ots:
          go_enr_list = []
          for lcnt, go in enumerate(ots[lid].keys()):
              if go not in go2cds: go2cds[go] = set()
              go2cds[go].add(lid)
              for i, goen in enumerate(ots[lid][go]):
                  for ext in "domain", "ec", "desc":
                      fs['elements'][gid]['metadata'][go_key(go, i, ext)] = goen[ext]
                      fs['elements'][gid]['metadata'][go_key(go, i, ext)] = goen[ext]

    meth.advance("Execute Enrichment Test")
    enr_list = oc.get_go_enrichment(list(set(qid2cds.values())), domain_list, ec_list, 'hypergeometric', 'GO')
    enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False)
    header = ["GO ID", "Description", "Domain", "p-value", "FeatureSet ID (# genes)"]
    fields = []
    objects = []
    go_enr_smry = ""
    for i in range(len(enr_list)):
      goen = enr_list[i]
      if goen['pvalue'] > float(p_value) : continue
      cfs = genelist2fs(list(go2cds[goen['goID']]))
      goid = goen['goID'].replace(":","")
      fields.append([goen['goID'], goen['goDesc'][0], goen['goDesc'][1], "{:12.10f}".format(goen['pvalue']), "{}_to_{} ({})".format(out_id, goid,len(go2cds[goen['goID']])) ])
      objects.append({'type' : 'KBaseSearch.FeatureSet', 'data' : cfs, 'name' : out_id + "_to_" + goid, 'meta' : {'original' : feature_set_id, 'domain' : domain, 'ec' : ec, 'GO_ID' :goen['goID']}})
      if i < 3 :
        go_enr_smry += goen['goID']+"(" + "{:6.4f}".format(goen['pvalue']) + ")" + goen['goDesc'][0] + "\n"
    go_enr_smry
    data = {'table': [header] + fields}


    meth.advance("Saving output to Workspace")
    objects.append({'type' : 'KBaseSearch.FeatureSet', 'data' : fs, 'name' : out_id, 'meta' : {'original' : feature_set_id, 'enr_summary' : go_enr_smry}})
    ws.save_objects({'workspace' : meth.workspace_id, 'objects' :objects})
    return json.dumps(data)