Python KEGG.find Examples

Programming Language: Python

Namespace/Package Name: bioservices.kegg

Class/Type: KEGG

Method/Function: find

Examples at hotexamples.com: 5

Python KEGG.find - 5 examples found. These are the top rated real world Python examples of bioservices.kegg.KEGG.find extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

KEGG(30)

get(28)

parse(19)

get_pathway_by_gene(7)

find(4)

organism(3)

parse_kgml_pathway(3)

list(2)

lookfor_organism(1)

lookfor_pathway(1)

show_pathway(1)

Example #1

Show file

def create_keggids_csv(filename, org):
    '''
    Extract keggids for an organism and save it to a csv file
        
        args: filename is the file containing gene name/ locus for all the organism genes
              org is the abrievation of the organism in kegg

    '''

    #Open csv as panda dataframe (df)
    df = pd.read_csv(filename, sep="\t", tupleize_cols=1)
    gene_list = tuple(df['Locus'].tolist())
    bid_list = tuple(df['Locus tag'].tolist())
    kid_list = []

    k = KEGG()

    #find keggid for each genes
    for gene in bid_list:
        kstrg = (k.find(org, gene))
        kid_list.append(kstrg.split()[1])

    #create new df and save it to csv
    new_df = pd.DataFrame(columns=['gene', 'b_id', 'kegg_id'])
    new_df.gene = gene_list
    new_df.b_id = bid_list
    new_df.kegg_id = kid_list

    new_df.to_csv("ecoli_keggids.csv", sep="\t", index=False)

Example #2

Show file

File: queryDatabases.py Project: stsmr95/CSE182-Biological-Databases

def queryKegg(theIDs):
    print("Currently querying KEGG...")
    k = KEGG()
    keggData = list()
    IDlist = list()

    for id in theIDs:
        ids = id[3:]
        query = k.find("acb", ids)
        query = query.split('\t')
        finalQuery = query[0]
        data = k.get(finalQuery)
        dictData = k.parse(data)

        keggData.append(dictData)
        IDlist.append(ids)

    return keggData, IDlist

Example #3

Show file

def search(query,
           source="wikipathways",
           result_format="xml",
           species=None,
           genes=None,
           user=None):
    path_array = []

    if source.lower() in ["wikipathways", "all"] and species is None:
        url = "http://webservice.wikipathways.org/"
        ext = "/findPathwaysByText?query=" + str(query)
        r = requests.get(url + ext,
                         headers={"Content-Type": "application/json"})

        if not r.ok:
            r.raise_for_status()
            sys.exit()

        tree = ET.ElementTree(ET.fromstring(r.text))
        root = tree.getroot()
        for child in root:
            temp_path_dict = {}
            for subchild in child:
                if subchild.tag == "{http://www.wikipathways.org/webservice}id":
                    temp_path_dict["identifier"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}score":
                    temp_path_dict["score"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}url":
                    temp_path_dict["url"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}name":
                    temp_path_dict["name"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}species":
                    temp_path_dict["species"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}revision":
                    temp_path_dict["revision"] = subchild.text

            temp_path = gnomics.objects.pathway.Pathway(
                identifier=temp_path_dict["identifier"],
                identifier_type="WikiPathways ID",
                name=temp_path_dict["name"],
                taxon=temp_path_dict["species"],
                source="WikiPathways")

            if temp_path_dict["identifier"] not in path_array:
                path_array.append(temp_path)

    elif source.lower() in ["wikipathways", "all"] and species is not None:
        url = "http://webservice.wikipathways.org/"
        ext = "/findPathwaysByText?query=" + str(query) + "&species=" + str(
            species)
        r = requests.get(url + ext,
                         headers={"Content-Type": "application/json"})

        if not r.ok:
            r.raise_for_status()
            sys.exit()

        tree = ET.ElementTree(ET.fromstring(r.text))
        root = tree.getroot()
        path_array = []
        for child in root:
            temp_path_dict = {}
            for subchild in child:
                if subchild.tag == "{http://www.wikipathways.org/webservice}id":
                    temp_path_dict["identifier"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}score":
                    temp_path_dict["score"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}url":
                    temp_path_dict["url"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}name":
                    temp_path_dict["name"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}species":
                    temp_path_dict["species"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}revision":
                    temp_path_dict["revision"] = subchild.text

            temp_path = gnomics.objects.pathway.Pathway(
                identifier=temp_path_dict["identifier"],
                identifier_type="WikiPathways ID",
                name=temp_path_dict["name"],
                taxon=temp_path_dict["species"],
                source="WikiPathways")

            if temp_path_dict["identifier"] not in path_array:
                path_array.append(temp_path)

    if source.lower() in ["kegg", "all"] and genes is not None:
        k = KEGG()

    elif source.lower() in ["kegg", "all"] and genes is None:
        k = KEGG()
        list_of_pathways = k.find("pathway", query)
        temp_path_list = list_of_pathways.split("\n")

        for thing in temp_path_list:
            temp_split = thing.split("\t")
            if len(temp_split) != 1:
                path_id = temp_split[0].strip().split(":")[1]
                path_name = temp_split[1].strip()

                if "map" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(
                        identifier=path_id,
                        identifier_type="KEGG MAP PATHWAY ID",
                        source="KEGG",
                        name=path_name)
                    path_array.append(temp_path)
                elif "ko" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(
                        identifier=path_id,
                        identifier_type="KEGG KO PATHWAY ID",
                        source="KEGG",
                        name=path_name)
                    path_array.append(temp_path)
                elif "ec" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(
                        identifier=path_id,
                        identifier_type="KEGG EC PATHWAY ID",
                        source="KEGG",
                        name=path_name)
                    path_array.append(temp_path)
                elif "rn" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(
                        identifier=path_id,
                        identifier_type="KEGG RN PATHWAY ID",
                        source="KEGG",
                        name=path_name)
                    path_array.append(temp_path)
                else:
                    print(k.get(path_id))

    return path_array

Example #4

Show file

File: databases.py Project: vaira123/pathways-analysis

class KEGGPathways:
    """
    KEGG PATHWAY Database API
    """
    def __init__(self, organism="H**o sapiens"):
        self.database = KEGG()
        self.organism = self.get_organism_code(organism.lower())

    def search_by_gene(self, gene_name: str):
        """

        Args:
            gene_name: gene name (ex. 'BRCA2')

        Returns:
            Dictionary with ids of all pathways containing given gene as keys and their full names as values.

        """
        try:
            pathways = self.database.get_pathway_by_gene(
                gene_name, self.organism)
            return pathways if pathways else {}
        except AttributeError:
            return {}

    def get_pathway(self, pathway_id: str, self_loops: bool = False):
        """

        Args:
            pathway_id: KEGG pathway id (ex. 'hsa04110')
            self_loops: information about whether or not include self loops in returned graph

        Returns:
            `networkx.DiGraph` object: Directed graph depicting pathway, with a comma-separated string
            containing gene names as graph nodes and directed edges representing interactions between genes.
            Each edge has weight 'type', which is a list of interaction types between two nodes.

        """

        G = nx.DiGraph()
        try:
            pathway = self.database.parse_kgml_pathway(pathway_id)
        except TypeError:
            # incorrect pathway_id
            pathway = None

        if pathway:
            names = {}
            for entry in pathway['entries']:
                # only intra-pathway interactions taken into account
                if entry['gene_names']:
                    names[entry['id']] = {
                        'name': entry['gene_names'],
                        'type': entry['type']
                    }

            for rel in pathway['relations']:
                if rel['entry1'] in names.keys(
                ) and rel['entry2'] in names.keys():
                    e1 = names[rel['entry1']]['name']
                    e2 = names[rel['entry2']]['name']
                    G.add_node(e1, type=names[rel['entry1']]['type'])
                    G.add_node(e2, type=names[rel['entry2']]['type'])
                    if G.has_edge(e1, e2):
                        G[e1][e2]['type'] = G[e1][e2]['type'] + [rel['name']]
                    else:
                        # assumption of interaction direction entry1 -> entry2 #TODO: validate
                        if e1 != e2 or (e1 == e2 and self_loops):
                            G.add_edge(e1, e2, type=[rel['name']])

        not_gene_nodes = []
        for node in G.nodes():
            # only interactions between genes
            if G.node[node]['type'] != 'gene':
                for in_edge in G.in_edges(node):
                    for out_edge in G.out_edges(node):
                        if in_edge[0] != out_edge[1] or (
                                in_edge[0] == out_edge[1] and self_loops):
                            G.add_edge(in_edge[0],
                                       out_edge[1],
                                       type=['indirect'])
                not_gene_nodes.append(node)
        G.remove_nodes_from(not_gene_nodes)

        return G

    def fetch_organism_codes(self):
        """

        Returns:
            Dictionary with organisms as keys, and KEGG organism codes as values
            {   'h**o sapiens' : 'hsa',
                'human' : 'hsa',
                ...
            }

        """
        codes = {}
        for line in self.database.list('organism').split('\n'):
            if line:
                code = line.split('\t')[1]
                org = line.split('\t')[2]
                if '(' in org:
                    org = [x.strip().lower() for x in org[:-1].split('(')]
                    for o in org:
                        codes[o] = code
                else:
                    codes[org] = code
        return codes

    def get_organism_code(self, org: str):
        """

        Args:
            org: organism name (ex. 'H**o sapiens', 'human') - lowercase and uppercase optional

        Returns:
            str: KEGG organism code

        """
        codes = self.fetch_organism_codes()
        try:
            return codes[org]
        except KeyError:
            print('Invalid organism name.')
            raise

    def get_gene_code(self, gen: str):
        """

        Args:
            gen: gene name (ex. 'FGR', 'NIPAL1')

        Returns:
            KEGG gene code

        """
        code_gen = self.database.find(self.organism, gen)

        if code_gen == str('\n'):
            code_gen = str()
            print('Invalid gene name: ' + str(gen))
        return code_gen

Example #5

Show file

File: search.py Project: Superraptor/Gnomics

def search(query, source="wikipathways", result_format="xml", species=None, genes=None, user=None):
    path_array = []
    
    if source.lower() in ["wikipathways", "all"] and species is None:
        url = "http://webservice.wikipathways.org/"
        ext = "/findPathwaysByText?query=" + str(query)
        r = requests.get(url+ext, headers={"Content-Type": "application/json"})

        if not r.ok:
            r.raise_for_status()
            sys.exit()

        tree = ET.ElementTree(ET.fromstring(r.text))
        root = tree.getroot()
        for child in root:
            temp_path_dict = {}
            for subchild in child:
                if subchild.tag == "{http://www.wikipathways.org/webservice}id":
                    temp_path_dict["identifier"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}score":
                    temp_path_dict["score"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}url":
                    temp_path_dict["url"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}name":
                    temp_path_dict["name"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}species":
                    temp_path_dict["species"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}revision":
                    temp_path_dict["revision"] = subchild.text
                
            temp_path = gnomics.objects.pathway.Pathway(identifier = temp_path_dict["identifier"], identifier_type = "WikiPathways ID", name = temp_path_dict["name"], taxon = temp_path_dict["species"], source = "WikiPathways")
            
            if temp_path_dict["identifier"] not in path_array:
                path_array.append(temp_path)
    
    elif source.lower() in ["wikipathways", "all"] and species is not None:
        url = "http://webservice.wikipathways.org/"
        ext = "/findPathwaysByText?query=" + str(query) + "&species=" + str(species)
        r = requests.get(url+ext, headers={"Content-Type": "application/json"})

        if not r.ok:
            r.raise_for_status()
            sys.exit()

        tree = ET.ElementTree(ET.fromstring(r.text))
        root = tree.getroot()
        path_array = []
        for child in root:
            temp_path_dict = {}
            for subchild in child:
                if subchild.tag == "{http://www.wikipathways.org/webservice}id":
                    temp_path_dict["identifier"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}score":
                    temp_path_dict["score"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}url":
                    temp_path_dict["url"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}name":
                    temp_path_dict["name"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}species":
                    temp_path_dict["species"] = subchild.text
                elif subchild.tag == "{http://www.wikipathways.org/webservice}revision":
                    temp_path_dict["revision"] = subchild.text
                
            temp_path = gnomics.objects.pathway.Pathway(identifier = temp_path_dict["identifier"], identifier_type = "WikiPathways ID", name = temp_path_dict["name"], taxon = temp_path_dict["species"], source = "WikiPathways")
            
            if temp_path_dict["identifier"] not in path_array:
                path_array.append(temp_path)
    
    if source.lower() in ["kegg", "all"] and genes is not None:
        k = KEGG()
        
    elif source.lower() in ["kegg", "all"] and genes is None:
        k = KEGG()
        list_of_pathways = k.find("pathway", query)
        temp_path_list = list_of_pathways.split("\n")
        
        for thing in temp_path_list:
            temp_split = thing.split("\t")
            if len(temp_split) != 1:
                path_id = temp_split[0].strip().split(":")[1]
                path_name = temp_split[1].strip()

                if "map" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG MAP PATHWAY ID", source="KEGG", name=path_name)
                    path_array.append(temp_path)
                elif "ko" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG KO PATHWAY ID", source="KEGG", name=path_name)
                    path_array.append(temp_path)
                elif "ec" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG EC PATHWAY ID", source="KEGG", name=path_name)
                    path_array.append(temp_path)
                elif "rn" in path_id:
                    temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG RN PATHWAY ID", source="KEGG", name=path_name)
                    path_array.append(temp_path)
                else:
                    print(k.get(path_id))
        
    return path_array