def gene_network2ws(meth, feature_set_id=None, out_id=None): """ Query all available network data in KBase central store. :param feature_set_id: FeatureSet workspace object id :type feature_set_id: kbtypes.KBaseSearch.FeatureSet :param out_id: Output Networks object identifier :type out_id: kbtypes.KBaseNetworks.Network :return: New workspace object :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 3 meth.advance("init GWAS service") gc = GWAS(URLS.gwas, token=meth.token) meth.advance("Retrieve genes from workspace") ws = Workspace2(token=meth.token, wsid=meth.workspace_id) fs = ws.get(feature_set_id) qid2cds = ids2cds(fs['elements'].keys()) gl_str = ",".join(list(set(qid2cds.values()))); meth.advance("Running GeneList to Networks") argsx = {"ws_id" : meth.workspace_id, "inobj_id" : gl_str, "outobj_id": out_id} try: gl_oid = gc.genelist_to_networks(argsx) except Exception as err: raise GWASException("submit job failed: {}".format(err)) meth.advance("Returning object") return _workspace_output(out_id)
def gwas_run_gwas2(meth, genotype=None, kinship_matrix=None, traits=None, out=None): """Computes association between each SNP and a trait of interest that has been scored across a large number of individuals. This method takes Filtered SNP object, kinship matrix, trait object as input and computes association. :param genotype: Population variation object :type genotype: kbtypes.KBaseGwasData.GwasPopulationVariation :param kinship_matrix: Kinship matrix object id :type kinship_matrix: kbtypes.KBaseGwasData.GwasPopulationKinship :param traits: Trait object id :type traits: kbtypes.KBaseGwasData.GwasPopulationTrait :param out: Output :type out: kbtypes.KBaseGwasData.GwasTopVariations :return: New workspace object :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 3 meth.advance("init GWAS service") gc = GWAS(URLS.gwas, token=meth.token) argsx = {"ws_id" : meth.workspace_id, "variation_id" : genotype, "trait_id" : traits, "kinship_id": kinship_matrix, "out_id" : out, "comment" : "comment"} meth.advance("submit job to run GWAS analysis") try: jid = gc.run_gwas(argsx) except Exception as err: raise GWASException("submit job failed: {}".format(err)) if not jid: raise GWASException(2, "submit job failed, no job id") AweJob.URL = URLS.awe AweJob(meth, started="GWAS analysis using emma", running="GWAS analysis using emma").run(jid[0]) return _workspace_output(out)
def gwas_run_kinship(meth, filtered_variation=None, out=None, comment=None): """Computes the n by n kinship matrix for a set of n related subjects. The kinship matrix defines pairwise genetic relatedness among individuals and is estimated by using all genotyped markers. This requires the filtered SNPs as input. :param filtered_variation: Population variation, filtered :type filtered_variation: kbtypes.KBaseGwasData.GwasPopulationVariation :param out: Computed Kinship matrix :type out: kbtypes.KBaseGwasData.GwasPopulationKinship :param comment: Comment :type comment: kbtypes.Unicode :return: New workspace object :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 3 meth.advance("init GWAS service") gc = GWAS(URLS.gwas, token=meth.token) argsx = {"ws_id" : meth.workspace_id, "inobj_id" : filtered_variation, "outobj_id" : out, "comment" : "comment"} meth.advance("submit job to select_random_snps") try: jid = gc.calculate_kinship_matrix(argsx) except Exception as err: raise GWASException("submit job failed: {}".format(err)) if not jid: raise GWASException(2, "submit job failed, no job id") AweJob.URL = URLS.awe AweJob(meth, started="Calculate Kinship matrix", running="Kinship matrix").run(jid[0]) return _workspace_output(out)
def maf(meth, maf=0.05, variation=None, out=None, comment=None): """Perform filtering on Minor allele frequency (MAF). Minor allele frequency (MAF) refers to the frequency at which the least common <a href="http://en.wikipedia.org/wiki/Allele">allele</a> occurs in a given population. :param maf: Minor allele frequency :type maf: kbtypes.Numeric :param variation: Population variation object :type variation: kbtypes.KBaseGwasData.GwasPopulationVariation :param out: Population variation, filtered :type out: kbtypes.KBaseGwasData.GwasPopulationVariation :param comment: Comment :type comment: kbtypes.Unicode :return: Workspace ID of filtered data :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 3 meth.advance("init GWAS service") gc = GWAS(URLS.gwas, token=meth.token) argsx = {"ws_id" : meth.workspace_id, "inobj_id" : variation, "outobj_id" : out, "minor_allele_frequency" : maf, "comment" : "comment"} meth.advance("submit job to filter VCF") try: jid = gc.prepare_variation(argsx) except Exception as err: raise GWASException("submit job failed: {}".format(err)) if not jid: raise GWASException(2, "submit job failed, no job id") AweJob.URL = URLS.awe AweJob(meth, started="run VCF", running="VCF").run(jid[0]) return _workspace_output(out)
def gwas_variation_to_genes(meth, workspaceID=None, gwasObjectID=None, num2snps=None, pmin=None, distance=None, gl_out=None, fs_out=None): """This method takes the top SNPs obtained after GWAS analysis as input (TopVariations) object, -log (pvalue) cutoff and a distance parameter as input. For each significant SNP that passes the p-value cutoff, genes are searched in the window specified by the distance parameter. :param workspaceID: Workspace (use current if empty) :type workspaceID: kbtypes.Unicode :param gwasObjectID: GWAS analysis MLM result object :type gwasObjectID: kbtypes.KBaseGwasData.GwasTopVariations :param num2snps: Number to snps :type num2snps: kbtypes.Numeric :default num2snps: 100 :param pmin: Minimum pvalue (-log10) :type pmin: kbtypes.Numeric :default pmin: 4 :param distance: Distance in bp around SNP to look for genes :type distance: kbtypes.Numeric :default distance: 10000 :param gl_out: Output GwasGeneLint workspace object name :type gl_out: kbtypes.KBaseGwasData.GwasGeneList :param fs_out: Output FeatureSet workspace object name :type fs_out: kbtypes.KBaseSearch.FeatureSet :return: Workspace objectID of gwas results :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 3 if not workspaceID: workspaceID = meth.workspace_id meth.advance("init GWAS service") gc = GWAS(URLS.gwas, token=meth.token) meth.advance("Running Variations to Genes") argsx = {"ws_id" : meth.workspace_id, "variation_id" : gwasObjectID, "out_id": gl_out, "num2snps" : num2snps, "pmin": pmin, "distance" : distance, "comment" : "comment"} try: gl_oid = gc.variations_to_genes(argsx) except Exception as err: raise GWASException("submit job failed: {}".format(err)) ws = Workspace2(token=meth.token, wsid=meth.workspace_id) raw_data = ws.get(gl_out) gl = [ gr[2] for gr in raw_data['genes']] fs = genelist2fs(gl) ws.save_objects({'workspace' : meth.workspace_id, 'objects' :[{'type' : 'KBaseSearch.FeatureSet', 'data' : fs, 'name' : fs_out, 'meta' : {'original' : gl_out}}]}) meth.advance("Returning object") return json.dumps({'values': [ ["Workspace GwasGeneList object", gl_out], ["Workspace FeatureSet object", fs_out] ]})
def gwas_run_gwas2(meth, genotype=None, kinship_matrix=None, traits=None, out=None): """Computes association between each SNP and a trait of interest that has been scored across a large number of individuals. This method takes Filtered SNP object, kinship matrix, trait object as input and computes association. :param genotype: Population variation object :type genotype: kbtypes.KBaseGwasData.GwasPopulationVariation :param kinship_matrix: Kinship matrix object id :type kinship_matrix: kbtypes.KBaseGwasData.GwasPopulationKinship :param traits: Trait object id :type traits: kbtypes.KBaseGwasData.GwasPopulationTrait :param out: Output :type out: kbtypes.KBaseGwasData.GwasTopVariations :return: New workspace object :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 3 meth.advance("init GWAS service") gc = GWAS(URLS.gwas, token=meth.token) argsx = { "ws_id": meth.workspace_id, "variation_id": genotype, "trait_id": traits, "kinship_id": kinship_matrix, "out_id": out, "comment": "comment" } meth.advance("submit job to run GWAS analysis") try: jid = gc.run_gwas(argsx) except Exception as err: raise GWASException("submit job failed: {}".format(err)) if not jid: raise GWASException(2, "submit job failed, no job id") AweJob.URL = URLS.awe AweJob(meth, started="GWAS analysis using emma", running="GWAS analysis using emma").run(jid[0]) return _workspace_output(out)
def gene_network2ws(meth, obj_id=None, out_id=None): """This method displays a gene list along with functional annotation in a table. :param obj_id: Gene List workspace object identifier. :type obj_id: kbtypes.KBaseGwasData.GwasGeneList :param out_id: Output Networks object identifier :type out_id: kbtypes.KBaseNetworks.Network :return: New workspace object :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ # :param workspace_id: Workspace name (if empty, defaults to current workspace) # :type workspace_id: kbtypes.Unicode meth.stages = 3 meth.advance("init GWAS service") gc = GWAS(URLS.gwas, token=meth.token) meth.advance("Retrieve genes from workspace") # if not workspace_id: # meth.debug("Workspace ID is empty, setting to current ({})".format(meth.workspace_id)) # workspace_id = meth.workspace_id ws = Workspace2(token=meth.token, wsid=meth.workspace_id) raw_data = ws.get(obj_id) gl = [gr[2] for gr in raw_data['genes']] gl_str = ",".join(gl) meth.advance("Running GeneList to Networks") argsx = { "ws_id": meth.workspace_id, "inobj_id": gl_str, "outobj_id": out_id } try: gl_oid = gc.genelist_to_networks(argsx) except Exception as err: raise GWASException("submit job failed: {}".format(err)) #if not gl_oid: # it may return empty string based on current script # raise GWASException(2, "submit job failed, no job id") meth.advance("Returning object") return _workspace_output(out_id)
def gene_network2ws(meth, obj_id=None, out_id=None): """This method displays a gene list along with functional annotation in a table. :param obj_id: Gene List workspace object identifier. :type obj_id: kbtypes.KBaseGwasData.GwasGeneList :param out_id: Output Networks object identifier :type out_id: kbtypes.KBaseNetworks.Network :return: New workspace object :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ # :param workspace_id: Workspace name (if empty, defaults to current workspace) # :type workspace_id: kbtypes.Unicode meth.stages = 3 meth.advance("init GWAS service") gc = GWAS(URLS.gwas, token=meth.token) meth.advance("Retrieve genes from workspace") # if not workspace_id: # meth.debug("Workspace ID is empty, setting to current ({})".format(meth.workspace_id)) # workspace_id = meth.workspace_id ws = Workspace2(token=meth.token, wsid=meth.workspace_id) raw_data = ws.get(obj_id) gl = [ gr[2] for gr in raw_data['genes']] gl_str = ",".join(gl); meth.advance("Running GeneList to Networks") argsx = {"ws_id" : meth.workspace_id, "inobj_id" : gl_str, "outobj_id": out_id} try: gl_oid = gc.genelist_to_networks(argsx) except Exception as err: raise GWASException("submit job failed: {}".format(err)) #if not gl_oid: # it may return empty string based on current script # raise GWASException(2, "submit job failed, no job id") meth.advance("Returning object") return _workspace_output(out_id)
def maf(meth, maf=0.05, variation=None, out=None, comment=None): """Perform filtering on Minor allele frequency (MAF). Minor allele frequency (MAF) refers to the frequency at which the least common <a href="http://en.wikipedia.org/wiki/Allele">allele</a> occurs in a given population. :param maf: Minor allele frequency :type maf: kbtypes.Numeric :param variation: Population variation object :type variation: kbtypes.KBaseGwasData.GwasPopulationVariation :param out: Population variation, filtered :type out: kbtypes.KBaseGwasData.GwasPopulationVariation :param comment: Comment :type comment: kbtypes.Unicode :return: Workspace ID of filtered data :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 3 meth.advance("init GWAS service") gc = GWAS(URLS.gwas, token=meth.token) argsx = { "ws_id": meth.workspace_id, "inobj_id": variation, "outobj_id": out, "minor_allele_frequency": maf, "comment": "comment" } meth.advance("submit job to filter VCF") try: jid = gc.prepare_variation(argsx) except Exception as err: raise GWASException("submit job failed: {}".format(err)) if not jid: raise GWASException(2, "submit job failed, no job id") AweJob.URL = URLS.awe AweJob(meth, started="run VCF", running="VCF").run(jid[0]) return _workspace_output(out)
def gwas_run_kinship(meth, filtered_variation=None, out=None, comment=None): """Computes the n by n kinship matrix for a set of n related subjects. The kinship matrix defines pairwise genetic relatedness among individuals and is estimated by using all genotyped markers. This requires the filtered SNPs as input. :param filtered_variation: Population variation, filtered :type filtered_variation: kbtypes.KBaseGwasData.GwasPopulationVariation :param out: Computed Kinship matrix :type out: kbtypes.KBaseGwasData.GwasPopulationKinship :param comment: Comment :type comment: kbtypes.Unicode :return: New workspace object :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 3 meth.advance("init GWAS service") gc = GWAS(URLS.gwas, token=meth.token) argsx = { "ws_id": meth.workspace_id, "inobj_id": filtered_variation, "outobj_id": out, "comment": "comment" } meth.advance("submit job to select_random_snps") try: jid = gc.calculate_kinship_matrix(argsx) except Exception as err: raise GWASException("submit job failed: {}".format(err)) if not jid: raise GWASException(2, "submit job failed, no job id") AweJob.URL = URLS.awe AweJob(meth, started="Calculate Kinship matrix", running="Kinship matrix").run(jid[0]) return _workspace_output(out)
def gene_network2ws(meth, feature_set_id=None, out_id=None): """ Query all available network data in KBase central store. :param feature_set_id: FeatureSet workspace object id :type feature_set_id: kbtypes.KBaseSearch.FeatureSet :param out_id: Output Networks object identifier :type out_id: kbtypes.KBaseNetworks.Network :return: New workspace object :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 3 meth.advance("init GWAS service") gc = GWAS(URLS.gwas, token=meth.token) meth.advance("Retrieve genes from workspace") ws = Workspace2(token=meth.token, wsid=meth.workspace_id) fs = ws.get(feature_set_id) qid2cds = ids2cds(fs['elements'].keys()) gl_str = ",".join(list(set(qid2cds.values()))) meth.advance("Running GeneList to Networks") argsx = { "ws_id": meth.workspace_id, "inobj_id": gl_str, "outobj_id": out_id } try: gl_oid = gc.genelist_to_networks(argsx) except Exception as err: raise GWASException("submit job failed: {}".format(err)) meth.advance("Returning object") return _workspace_output(out_id)
def go_enrch_net(meth, net_obj_id=None, p_value=0.05, ec=None, domain=None): """Identify Gene Ontology terms enriched in individual network clusters :param net_obj_id: Cluster object id :type net_obj_id: kbtypes.KBaseNetworks.Network :param p_value: p-value cutoff :type p_value: kbtypes.Unicode :param ec: Evidence code list (comma separated, IEA, ...) :type ec:kbtypes.Unicode :param domain: Domain list (comma separated, biological_process, ...) :type domain: kbtypes.Unicode :return: Workspace id :rtype: kbtypes.Unicode :output_widget: GeneTableWidget """ # :default p_value: 0.05 meth.stages = 3 meth.advance("Prepare enrichment test") gc = GWAS(URLS.gwas, token=meth.token) ec = ec.replace(" ", "") domain = domain.replace(" ", "") ec_list = [i for i in ec.split(',')] domain_list = [i for i in domain.split(',')] wsd = Workspace2(token=meth.token, wsid=meth.workspace_id) oc = Ontology(url=URLS.ontology) net_object = wsd.get_objects([{ 'workspace': meth.workspace_id, 'name': net_obj_id }]) nc = Node(net_object[0]['data']['nodes'], net_object[0]['data']['edges']) gids = [ i for i in sorted(nc.ugids.keys()) if 'CDS' in i or 'locus' in i or ( not 'clst' in i and not i.startswith('cluster')) ] gids2cds = ids2cds(gids) meth.advance("Run enrichment test for each clusters") rows = [] for hr_nd in net_object[0]['data']['nodes']: gid = hr_nd['entity_id'] if not (gid.startswith('cluster.') or 'clst' in gid): continue glist = nc.get_gene_list(gid) # now everything is in CDS id #llist = [] #for i in glist: # if i in mrnas: i = mrnas[i] # if i in locus: i = locus[i] # if 'locus' in i: llist.append(i) #llist = [ locus[mrnas[i]] ]; # it will ignore orignal locus ids (TODO: keep locus) cds_gl = [gids2cds[i] for i in glist] enr_list = oc.get_go_enrichment(cds_gl, domain_list, ec_list, 'hypergeometric', 'GO') enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False) go_enr_smry = "" go_enr_anns = ["", "", ""] for i in range(len(enr_list)): goen = enr_list[i] if goen['pvalue'] > float(p_value): continue hr_nd['user_annotations']['gce.' + goen['goID'] + ".desc"] = goen['goDesc'][0] hr_nd['user_annotations']['gce.' + goen['goID'] + ".domain"] = goen['goDesc'][1] hr_nd['user_annotations']['gce.' + goen['goID'] + ".p_value"] = ` goen['pvalue'] ` if i < 3: go_enr_smry += goen['goID'] + "(" + "{:6.4f}".format( goen['pvalue']) + ")" + goen['goDesc'][0] + "\n" go_enr_anns[i] = goen['goID'] + "(" + "{:6.4f}".format( goen['pvalue']) + ")" + goen['goDesc'][0] hr_nd['user_annotations']['go_enrichnment_annotation'] = go_enr_smry rows.append( [gid, len(glist), go_enr_anns[0], go_enr_anns[1], go_enr_anns[2]]) wsd.save_objects({ 'workspace': meth.workspace_id, 'objects': [{ 'type': 'KBaseNetworks.Network', 'data': net_object[0]['data'], 'name': net_obj_id + ".cenr", 'meta': { 'orginal': net_obj_id } }] }) rows = sorted(rows, key=lambda x: x[1], reverse=True) #meth.debug("rows: {}".format(rows)) header = [ "Cluster ID", "# of Genes", "Annotation1", "Annotation2", "Annotation3" ] data = {'table': [header] + rows} return json.dumps(data)
def gwas_variation_to_genes(meth, workspaceID=None, gwasObjectID=None, num2snps=None, pmin=None, distance=None, gl_out=None, fs_out=None): """This method takes the top SNPs obtained after GWAS analysis as input (TopVariations) object, -log (pvalue) cutoff and a distance parameter as input. For each significant SNP that passes the p-value cutoff, genes are searched in the window specified by the distance parameter. :param workspaceID: Workspace (use current if empty) :type workspaceID: kbtypes.Unicode :param gwasObjectID: GWAS analysis MLM result object :type gwasObjectID: kbtypes.KBaseGwasData.GwasTopVariations :param num2snps: Number to snps :type num2snps: kbtypes.Numeric :default num2snps: 100 :param pmin: Minimum pvalue (-log10) :type pmin: kbtypes.Numeric :default pmin: 4 :param distance: Distance in bp around SNP to look for genes :type distance: kbtypes.Numeric :default distance: 10000 :param gl_out: Output GwasGeneLint workspace object name :type gl_out: kbtypes.KBaseGwasData.GwasGeneList :param fs_out: Output FeatureSet workspace object name :type fs_out: kbtypes.KBaseSearch.FeatureSet :return: Workspace objectID of gwas results :rtype: kbtypes.Unicode :output_widget: ValueListWidget """ meth.stages = 3 if not workspaceID: workspaceID = meth.workspace_id meth.advance("init GWAS service") gc = GWAS(URLS.gwas, token=meth.token) meth.advance("Running Variations to Genes") argsx = { "ws_id": meth.workspace_id, "variation_id": gwasObjectID, "out_id": gl_out, "num2snps": num2snps, "pmin": pmin, "distance": distance, "comment": "comment" } try: gl_oid = gc.variations_to_genes(argsx) except Exception as err: raise GWASException("submit job failed: {}".format(err)) ws = Workspace2(token=meth.token, wsid=meth.workspace_id) raw_data = ws.get(gl_out) gl = [gr[2] for gr in raw_data['genes']] fs = genelist2fs(gl) ws.save_objects({ 'workspace': meth.workspace_id, 'objects': [{ 'type': 'KBaseSearch.FeatureSet', 'data': fs, 'name': fs_out, 'meta': { 'original': gl_out } }] }) meth.advance("Returning object") return json.dumps({ 'values': [["Workspace GwasGeneList object", gl_out], ["Workspace FeatureSet object", fs_out]] })