def load_kegg(gene, organism): k = KEGG() result_line = '' try: a = k.get_pathway_by_gene(gene, organism) if a: k_list = list(a.values()) result_line = ', '.join(k_list) except: print(" Gene '{0}' is not in KEGG database".format(gene)) return result_line
def kegg(inputInteractions): from bioservices.kegg import KEGG k = KEGG() interactions = [] for items in inputInteractions: print(items[1].getName()) try: pathways = k.get_pathway_by_gene(items[1].getName(), "hsa") #print(pathways) if pathways: for key, value in list(pathways.items()): interactions.append([items[0], value]) except AttributeError: print("Gene name error!!!!!!!!!") return interactions
else: blast_text = 'NULL' if pfamHits.get(ids) != None: pfam_text = pfamHits[ids] else: pfam_text = 'NULL' if prositeHits.get(ids) != None: prosite_text = prositeHits[ids] else: prosite_text = 'NULL' # Get the KEGG hits kegg = KEGG() kegg_text = '' gene_id = gene_ids[ids] KEGG_IDs = kegg.get_pathway_by_gene(gene_id, "acb") if KEGG_IDs != None: for KEGG_ID in KEGG_IDs: kegg_text += KEGG_IDs[KEGG_ID] + ' [' + KEGG_ID + ']; ' kegg_text = kegg_text[:-2] else: kegg_text = 'NULL' comments = 'NULL' row = ids + '\t' + blast_text + '\t' + pfam_text + '\t' + prosite_text + '\t' + kegg_text + '\t' + GO_IDs + '\t' + comments + '\n' output.write(row) output.close()
# extract all relations from all pathways from bioservices.kegg import KEGG kegg = KEGG() output = open("/home/enrico/Desktop/Snakemake/Kegg.txt", 'w') input = open("/home/enrico/Desktop/Snakemake/RNA-Seq-counts.txt", "r", encoding="latin-1") line = input.readline().strip() Header = input.readline().strip() try: while line: line = input.readline().strip() ID, WCFS1_glc_1, WCFS1_glc_2, WCFS1_rib_1, WCFS1_rib_2, NC8_glc_1, NC8_glc_2, NC8_rib_1, NC8_rib_2 = line.strip( ).split("\t") res = str(kegg.get_pathway_by_gene(ID, "lpl")).strip('[]') if res != "None": output.write(ID + "\t" + res + "\n") else: output.write(ID + "\t" + "No pathway found" + "\n") except: pass input.close() output.close()
else: dr['refseq_genomic'] = 'NA' dr['refseq_rna'] = 'NA' dr['refseq_protein'] = 'NA' # go if dq.has_key('go'): dr['go.cc'] = str(ld2dl(dq['go']['CC'])).replace('u"', "").replace( '"', "").replace("u'", "").replace( "'", "") if dq['go'].has_key('CC') else 'NA' dr['go.mf'] = str(ld2dl(dq['go']['MF'])).replace('u"', "").replace( '"', "").replace("u'", "").replace( "'", "") if dq['go'].has_key('MF') else 'NA' dr['go.bp'] = str(ld2dl(dq['go']['BP'])).replace('u"', "").replace( '"', "").replace("u'", "").replace( "'", "") if dq['go'].has_key('BP') else 'NA' else: dr['go.cc'] = 'NA' dr['go.mf'] = 'NA' dr['go.bp'] = 'NA' # kegg if len(re.findall(kt + ':' + dr['entrezgene'] + "\t", kge)) != 0: dr['kegg'] = str(kg.get_pathway_by_gene(dr['entrezgene'], kt)).replace( 'u"', "").replace('"', "").replace("u'", "").replace("'", "") else: dr['kegg'] = 'NA' sys.stderr.write("\t".join([dr[kr] for kr in lr]) + "\n") # write if fi != "-": f.close()
class KEGGPathways: """ KEGG PATHWAY Database API """ def __init__(self, organism="H**o sapiens"): self.database = KEGG() self.organism = self.get_organism_code(organism.lower()) def search_by_gene(self, gene_name: str): """ Args: gene_name: gene name (ex. 'BRCA2') Returns: Dictionary with ids of all pathways containing given gene as keys and their full names as values. """ try: pathways = self.database.get_pathway_by_gene( gene_name, self.organism) return pathways if pathways else {} except AttributeError: return {} def get_pathway(self, pathway_id: str, self_loops: bool = False): """ Args: pathway_id: KEGG pathway id (ex. 'hsa04110') self_loops: information about whether or not include self loops in returned graph Returns: `networkx.DiGraph` object: Directed graph depicting pathway, with a comma-separated string containing gene names as graph nodes and directed edges representing interactions between genes. Each edge has weight 'type', which is a list of interaction types between two nodes. """ G = nx.DiGraph() try: pathway = self.database.parse_kgml_pathway(pathway_id) except TypeError: # incorrect pathway_id pathway = None if pathway: names = {} for entry in pathway['entries']: # only intra-pathway interactions taken into account if entry['gene_names']: names[entry['id']] = { 'name': entry['gene_names'], 'type': entry['type'] } for rel in pathway['relations']: if rel['entry1'] in names.keys( ) and rel['entry2'] in names.keys(): e1 = names[rel['entry1']]['name'] e2 = names[rel['entry2']]['name'] G.add_node(e1, type=names[rel['entry1']]['type']) G.add_node(e2, type=names[rel['entry2']]['type']) if G.has_edge(e1, e2): G[e1][e2]['type'] = G[e1][e2]['type'] + [rel['name']] else: # assumption of interaction direction entry1 -> entry2 #TODO: validate if e1 != e2 or (e1 == e2 and self_loops): G.add_edge(e1, e2, type=[rel['name']]) not_gene_nodes = [] for node in G.nodes(): # only interactions between genes if G.node[node]['type'] != 'gene': for in_edge in G.in_edges(node): for out_edge in G.out_edges(node): if in_edge[0] != out_edge[1] or ( in_edge[0] == out_edge[1] and self_loops): G.add_edge(in_edge[0], out_edge[1], type=['indirect']) not_gene_nodes.append(node) G.remove_nodes_from(not_gene_nodes) return G def fetch_organism_codes(self): """ Returns: Dictionary with organisms as keys, and KEGG organism codes as values { 'h**o sapiens' : 'hsa', 'human' : 'hsa', ... } """ codes = {} for line in self.database.list('organism').split('\n'): if line: code = line.split('\t')[1] org = line.split('\t')[2] if '(' in org: org = [x.strip().lower() for x in org[:-1].split('(')] for o in org: codes[o] = code else: codes[org] = code return codes def get_organism_code(self, org: str): """ Args: org: organism name (ex. 'H**o sapiens', 'human') - lowercase and uppercase optional Returns: str: KEGG organism code """ codes = self.fetch_organism_codes() try: return codes[org] except KeyError: print('Invalid organism name.') raise def get_gene_code(self, gen: str): """ Args: gen: gene name (ex. 'FGR', 'NIPAL1') Returns: KEGG gene code """ code_gen = self.database.find(self.organism, gen) if code_gen == str('\n'): code_gen = str() print('Invalid gene name: ' + str(gen)) return code_gen
len(A) # In[79]: #pwd # In[81]: with open('./pathway_baicalin_386_genes.txt', 'w') as f: for i in A: try: listname = Symbol.get(i) n = str(listname[0]) if (n != 'nan'): j = k.get_pathway_by_gene(n, "hsa") if (j != None): for keys, values in j.items(): print(n, keys, values, sep="\t") #f.write("%s \n" %n) #for keys,values in j.items(): # f.write("%s %s %s\n" %(n,keys,values)) except AttributeError as err: print('error') # In[134]: #Print algorithms #for i in A: # try: # listname=Symbol.get(i)