def create_keggids_csv(filename, org): ''' Extract keggids for an organism and save it to a csv file args: filename is the file containing gene name/ locus for all the organism genes org is the abrievation of the organism in kegg ''' #Open csv as panda dataframe (df) df = pd.read_csv(filename, sep="\t", tupleize_cols=1) gene_list = tuple(df['Locus'].tolist()) bid_list = tuple(df['Locus tag'].tolist()) kid_list = [] k = KEGG() #find keggid for each genes for gene in bid_list: kstrg = (k.find(org, gene)) kid_list.append(kstrg.split()[1]) #create new df and save it to csv new_df = pd.DataFrame(columns=['gene', 'b_id', 'kegg_id']) new_df.gene = gene_list new_df.b_id = bid_list new_df.kegg_id = kid_list new_df.to_csv("ecoli_keggids.csv", sep="\t", index=False)
def queryKegg(theIDs): print("Currently querying KEGG...") k = KEGG() keggData = list() IDlist = list() for id in theIDs: ids = id[3:] query = k.find("acb", ids) query = query.split('\t') finalQuery = query[0] data = k.get(finalQuery) dictData = k.parse(data) keggData.append(dictData) IDlist.append(ids) return keggData, IDlist
def search(query, source="wikipathways", result_format="xml", species=None, genes=None, user=None): path_array = [] if source.lower() in ["wikipathways", "all"] and species is None: url = "http://webservice.wikipathways.org/" ext = "/findPathwaysByText?query=" + str(query) r = requests.get(url + ext, headers={"Content-Type": "application/json"}) if not r.ok: r.raise_for_status() sys.exit() tree = ET.ElementTree(ET.fromstring(r.text)) root = tree.getroot() for child in root: temp_path_dict = {} for subchild in child: if subchild.tag == "{http://www.wikipathways.org/webservice}id": temp_path_dict["identifier"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}score": temp_path_dict["score"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}url": temp_path_dict["url"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}name": temp_path_dict["name"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}species": temp_path_dict["species"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}revision": temp_path_dict["revision"] = subchild.text temp_path = gnomics.objects.pathway.Pathway( identifier=temp_path_dict["identifier"], identifier_type="WikiPathways ID", name=temp_path_dict["name"], taxon=temp_path_dict["species"], source="WikiPathways") if temp_path_dict["identifier"] not in path_array: path_array.append(temp_path) elif source.lower() in ["wikipathways", "all"] and species is not None: url = "http://webservice.wikipathways.org/" ext = "/findPathwaysByText?query=" + str(query) + "&species=" + str( species) r = requests.get(url + ext, headers={"Content-Type": "application/json"}) if not r.ok: r.raise_for_status() sys.exit() tree = ET.ElementTree(ET.fromstring(r.text)) root = tree.getroot() path_array = [] for child in root: temp_path_dict = {} for subchild in child: if subchild.tag == "{http://www.wikipathways.org/webservice}id": temp_path_dict["identifier"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}score": temp_path_dict["score"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}url": temp_path_dict["url"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}name": temp_path_dict["name"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}species": temp_path_dict["species"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}revision": temp_path_dict["revision"] = subchild.text temp_path = gnomics.objects.pathway.Pathway( identifier=temp_path_dict["identifier"], identifier_type="WikiPathways ID", name=temp_path_dict["name"], taxon=temp_path_dict["species"], source="WikiPathways") if temp_path_dict["identifier"] not in path_array: path_array.append(temp_path) if source.lower() in ["kegg", "all"] and genes is not None: k = KEGG() elif source.lower() in ["kegg", "all"] and genes is None: k = KEGG() list_of_pathways = k.find("pathway", query) temp_path_list = list_of_pathways.split("\n") for thing in temp_path_list: temp_split = thing.split("\t") if len(temp_split) != 1: path_id = temp_split[0].strip().split(":")[1] path_name = temp_split[1].strip() if "map" in path_id: temp_path = gnomics.objects.pathway.Pathway( identifier=path_id, identifier_type="KEGG MAP PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) elif "ko" in path_id: temp_path = gnomics.objects.pathway.Pathway( identifier=path_id, identifier_type="KEGG KO PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) elif "ec" in path_id: temp_path = gnomics.objects.pathway.Pathway( identifier=path_id, identifier_type="KEGG EC PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) elif "rn" in path_id: temp_path = gnomics.objects.pathway.Pathway( identifier=path_id, identifier_type="KEGG RN PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) else: print(k.get(path_id)) return path_array
class KEGGPathways: """ KEGG PATHWAY Database API """ def __init__(self, organism="H**o sapiens"): self.database = KEGG() self.organism = self.get_organism_code(organism.lower()) def search_by_gene(self, gene_name: str): """ Args: gene_name: gene name (ex. 'BRCA2') Returns: Dictionary with ids of all pathways containing given gene as keys and their full names as values. """ try: pathways = self.database.get_pathway_by_gene( gene_name, self.organism) return pathways if pathways else {} except AttributeError: return {} def get_pathway(self, pathway_id: str, self_loops: bool = False): """ Args: pathway_id: KEGG pathway id (ex. 'hsa04110') self_loops: information about whether or not include self loops in returned graph Returns: `networkx.DiGraph` object: Directed graph depicting pathway, with a comma-separated string containing gene names as graph nodes and directed edges representing interactions between genes. Each edge has weight 'type', which is a list of interaction types between two nodes. """ G = nx.DiGraph() try: pathway = self.database.parse_kgml_pathway(pathway_id) except TypeError: # incorrect pathway_id pathway = None if pathway: names = {} for entry in pathway['entries']: # only intra-pathway interactions taken into account if entry['gene_names']: names[entry['id']] = { 'name': entry['gene_names'], 'type': entry['type'] } for rel in pathway['relations']: if rel['entry1'] in names.keys( ) and rel['entry2'] in names.keys(): e1 = names[rel['entry1']]['name'] e2 = names[rel['entry2']]['name'] G.add_node(e1, type=names[rel['entry1']]['type']) G.add_node(e2, type=names[rel['entry2']]['type']) if G.has_edge(e1, e2): G[e1][e2]['type'] = G[e1][e2]['type'] + [rel['name']] else: # assumption of interaction direction entry1 -> entry2 #TODO: validate if e1 != e2 or (e1 == e2 and self_loops): G.add_edge(e1, e2, type=[rel['name']]) not_gene_nodes = [] for node in G.nodes(): # only interactions between genes if G.node[node]['type'] != 'gene': for in_edge in G.in_edges(node): for out_edge in G.out_edges(node): if in_edge[0] != out_edge[1] or ( in_edge[0] == out_edge[1] and self_loops): G.add_edge(in_edge[0], out_edge[1], type=['indirect']) not_gene_nodes.append(node) G.remove_nodes_from(not_gene_nodes) return G def fetch_organism_codes(self): """ Returns: Dictionary with organisms as keys, and KEGG organism codes as values { 'h**o sapiens' : 'hsa', 'human' : 'hsa', ... } """ codes = {} for line in self.database.list('organism').split('\n'): if line: code = line.split('\t')[1] org = line.split('\t')[2] if '(' in org: org = [x.strip().lower() for x in org[:-1].split('(')] for o in org: codes[o] = code else: codes[org] = code return codes def get_organism_code(self, org: str): """ Args: org: organism name (ex. 'H**o sapiens', 'human') - lowercase and uppercase optional Returns: str: KEGG organism code """ codes = self.fetch_organism_codes() try: return codes[org] except KeyError: print('Invalid organism name.') raise def get_gene_code(self, gen: str): """ Args: gen: gene name (ex. 'FGR', 'NIPAL1') Returns: KEGG gene code """ code_gen = self.database.find(self.organism, gen) if code_gen == str('\n'): code_gen = str() print('Invalid gene name: ' + str(gen)) return code_gen
def search(query, source="wikipathways", result_format="xml", species=None, genes=None, user=None): path_array = [] if source.lower() in ["wikipathways", "all"] and species is None: url = "http://webservice.wikipathways.org/" ext = "/findPathwaysByText?query=" + str(query) r = requests.get(url+ext, headers={"Content-Type": "application/json"}) if not r.ok: r.raise_for_status() sys.exit() tree = ET.ElementTree(ET.fromstring(r.text)) root = tree.getroot() for child in root: temp_path_dict = {} for subchild in child: if subchild.tag == "{http://www.wikipathways.org/webservice}id": temp_path_dict["identifier"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}score": temp_path_dict["score"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}url": temp_path_dict["url"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}name": temp_path_dict["name"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}species": temp_path_dict["species"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}revision": temp_path_dict["revision"] = subchild.text temp_path = gnomics.objects.pathway.Pathway(identifier = temp_path_dict["identifier"], identifier_type = "WikiPathways ID", name = temp_path_dict["name"], taxon = temp_path_dict["species"], source = "WikiPathways") if temp_path_dict["identifier"] not in path_array: path_array.append(temp_path) elif source.lower() in ["wikipathways", "all"] and species is not None: url = "http://webservice.wikipathways.org/" ext = "/findPathwaysByText?query=" + str(query) + "&species=" + str(species) r = requests.get(url+ext, headers={"Content-Type": "application/json"}) if not r.ok: r.raise_for_status() sys.exit() tree = ET.ElementTree(ET.fromstring(r.text)) root = tree.getroot() path_array = [] for child in root: temp_path_dict = {} for subchild in child: if subchild.tag == "{http://www.wikipathways.org/webservice}id": temp_path_dict["identifier"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}score": temp_path_dict["score"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}url": temp_path_dict["url"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}name": temp_path_dict["name"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}species": temp_path_dict["species"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}revision": temp_path_dict["revision"] = subchild.text temp_path = gnomics.objects.pathway.Pathway(identifier = temp_path_dict["identifier"], identifier_type = "WikiPathways ID", name = temp_path_dict["name"], taxon = temp_path_dict["species"], source = "WikiPathways") if temp_path_dict["identifier"] not in path_array: path_array.append(temp_path) if source.lower() in ["kegg", "all"] and genes is not None: k = KEGG() elif source.lower() in ["kegg", "all"] and genes is None: k = KEGG() list_of_pathways = k.find("pathway", query) temp_path_list = list_of_pathways.split("\n") for thing in temp_path_list: temp_split = thing.split("\t") if len(temp_split) != 1: path_id = temp_split[0].strip().split(":")[1] path_name = temp_split[1].strip() if "map" in path_id: temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG MAP PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) elif "ko" in path_id: temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG KO PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) elif "ec" in path_id: temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG EC PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) elif "rn" in path_id: temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG RN PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) else: print(k.get(path_id)) return path_array