예제 #1
0
파일: kegg.py 프로젝트: lv601/WS2016
 def get(dbentry, option=None):
     # Create a static method which invokes the REST.kegg_get function from the Biopython module.
     # Return the result as a bytes string or a KEGG_Entries type when option is None
     if option == None:
         return KEGG_Entry(REST.kegg_get(dbentry).read())
     else:
         return str.encode(REST.kegg_get(dbentry).read())
예제 #2
0
def extractGeneFromPathway(pathway):
    # pathway = 'path:hsa00230'
    print('query http://rest.kegg.jp/get/path:%s' % pathway)
    pathway_file = REST.kegg_get(pathway).read()  # query and read each pathway
    # iterate through each KEGG pathway file, keeping track of which section
    # of the file we're in, only read the gene in each pathway
    return extractGeneFromLocalPathway(pathway_file)
예제 #3
0
def main():
    query = sys.argv[1].replace(" ", "+")
    result = KEGG_REST.kegg_find('PATHWAY', query)
    result_txt = result.read().split('\n')
    if len(result_txt) == 1:
        print("Search found no results")
        return

    choice = 0
    if len(result_txt) > 2:
        print("More than 1 result:")
        for index, r in enumerate(result_txt):
            output = r.split("\t")
            if len(output) == 2:
                print(str(index) + "\t" + output[1])
        choice = int(input("Which one? "))

    identifier = result_txt[choice].split("\t")[0].strip()
    identifier = identifier.replace("map", "hsa")

    pathway_kgml = KEGG_REST.kegg_get(identifier, "kgml")
    pathway = KEGG_KGML_PARSER.read(pathway_kgml)

    for i in pathway.genes:
        print(i.name)
예제 #4
0
def ecnumber_to_genes(ECnumber):
    """
    Dictionary of organism => gene for a reaction.
    """
    # TODO: Make sure the genes addresses are stripped correctly.

    text = REST.kegg_get('ec:' + ECnumber).read()

    try:
        start_index = text.index('GENES') + 5
        end_index = text.index('DBLINKS')
    except ValueError:
        ec_index = text.find('Now EC ')
        if ec_index == -1:
            return {}
        else:
            new_ec = text[start_index + 6:start_index + 20].split(',')[0]
            return ecnumber_to_genes(new_ec)
    gene_list = text[start_index:end_index].split('\n')
    gene_list = [g.strip().split(': ') for g in gene_list]

    gene_dict = {}
    for entry in gene_list:
        split_seq = list(filter(None, re.split("[, \-:()]+", entry)))
        f_seq = []
        if split_seq == []:
            continue
        for entry in split_seq:
            f_seq.append(entry.strip())
        gene_dict[f_seq[0]] = list(filter(lambda x: x.isdigit(), f_seq))

    return gene_dict
예제 #5
0
def main():
    "main code block"
    # parse databse argument as variable
    db = args.database

    # get list of all entries in database
    entries_df = fetch_entry_list(db)
    id_list = entries_df.id
    handle = 'KEGG_{}_db_entries'.format(db)

    # if command line option selected, output csv of all database entries
    if args.entrylist:
        print('\nwriting list of {} database entries to csv file'.format(db))
        entries_df.to_csv(handle + '.csv')

    # fetch all records from specified KEGG database
    total_entries = len(id_list)
    print('\nbeginning retrieval of {} records from KEGG {} database\n'.format(
        total_entries, db))

    # write records to output txt file as being fetched
    counter = total_entries
    with open(handle + '.txt', 'a') as f:
        for id in id_list:
            print('retrieving record {} of {}: {}'.format(
                total_entries - counter + 1, total_entries, id))
            f.writelines(REST.kegg_get(id))
            counter -= 1
예제 #6
0
파일: kegg.py 프로젝트: LSARP/lrg-omics
def get(ID="K18766", db_id=None):
    """
    Fetch data from the KEGG database.
    -----
    Args:
    ID: str
    db_id: str, default None
        - 'ko' : request to onthology database
        - 'ec' : request to enzyme database
        - 'cpd': request to compound database
        - 'rn' : request to reaction database

    If db_id is None the request will be directed
    based on the first letter of the ID.
      C -> compound
      K -> orthology
      R -> reaction
    all other requests will be directed to the enzyme database
    """

    db_keys = {"orthology": "ko", "enzyme": "ec", "compound": "cpd", "reaction": "rn"}
    if db_id is not None:
        db_key = db_keys[db_id]
    elif ID.startswith("C"):
        db_key = db_keys["compound"]
    elif ID.startswith("K"):
        db_key = db_keys["orthology"]
    elif ID.startswith("R"):
        db_key = db_keys["reaction"]
    else:
        db_key = db_keys["enzyme"]

    data = REST.kegg_get(f"{db_key}:{ID}").read().split("\n")

    return data
예제 #7
0
def kegggene_to_uniprotid(organism, kegggene):
    "Kegg gene to Uniprot ID"
    text = REST.kegg_get(organism.lower() + ':' + kegggene).read()
    uni_index = text.find('UniProt')

    if uni_index != -1:
        return text[uni_index + 9:uni_index + 15]
    else:
        return None
예제 #8
0
def get_kegg_annotation():
    """
    Get annotation ec:5.4.2.2 from kegg database and write to txt file.
    Print annotation
    :return:
    """
    request = REST.kegg_get("ec:5.4.2.2")
    open("ec_5.4.2.2.txt", "w").write(request.read())
    records = Enzyme.parse(open("ec_5.4.2.2.txt"))
    record = list(records)[0]
    print(record.classname)
def make_kegg(ec_list):
    from Bio.KEGG import REST
    for ec in ec_list:
        print("Tratar de kegg...")
        try:
            keggname = "Kegg\\"+ec+".txt"     
            if not os.path.isfile(keggname):
                request = REST.kegg_get(ec)
                open(keggname, 'wb').write(request.read())
                print("Kegg SUCCESS!!!")
        except:
                print('kegg request failed or file already exists')
예제 #10
0
def query_reversible_reaction(reaction_list):
    """
    get the list of reversible reaction
    input:list of reactions(list) eg)["R00709"]
    output:list of reversible reactions(list) 
    """

    reversible_reaction = []
    for reaction in reaction_list:
        reaction_file = REST.kegg_get(reaction).read()
        for i in reaction_file.rstrip().split("\n"):
            if i.startswith("EQUATION") and "<=>" in i:
                reversible_reaction.append(reaction)
    return reversible_reaction
예제 #11
0
def get_network(org, opt='ec'):
    # Creating a Parser Object
    graph = k.KeggParser()
    # Store pathways that doesn't have EC numbers
    error = []
    # Getting organism
    list1 = r.kegg_list('pathway', org).read()
    list1 = list1.split('\n')
    list1.remove('')
    print('Retrieving data from KEGG PATHWAY database. ' + str(time.ctime()))

    # For each path getting enzymes and reactions
    for path in list1:
        try:
            path = path.split('\t')
            # print (path[0])
            graph.genes = parse.read(r.kegg_get(path[0], 'kgml'))
            graph.genes_default = parse.read(
                r.kegg_get("path:" + opt + path[0][-5:], 'kgml'))
            graph.path = path
        except Exception:
            error.append(path[0])
            continue
        # print ("getting relations")
        graph.get_relations()
        # print ("getting reaction")
        graph.get_reactions()
    # print ('Unretrieved data',error)
    graph.ref = opt
    genes = 0
    for i in graph.ec_org_target.items():
        genes += len(i[1].split())

    # print (graph.ec_org_target.keys())
    # Building Graph
    graph.building_graph(2)
    return (graph)
예제 #12
0
    def __retrieve_equations(self, pathway):
        'Gets all equations of metabolism of the given pathway using KEGG API.'

        info_reaction_compounds = []
        list_reactions = self.__retrieve_reactions(pathway)

        for reaction in list_reactions:
            flat_file_reaction = REST.kegg_get(reaction).readlines()

            for line in flat_file_reaction:
                if line.startswith('EQUATION'):
                    equation = line.replace('EQUATION', '').strip()
                    reaction_compounds = reaction + ' : ' + equation + '\n'
                    info_reaction_compounds.append(reaction_compounds)

        return info_reaction_compounds
예제 #13
0
 def dictionary(self, name):
     """
     função que cria um dicionário com os id's do KEGG das reações/metabolitos como chaves e respetivos nomes como valores
     :param name: id do KEGG para a reação/substrato
     """
     if name not in self.metabolites.keys(
     ) and name not in self.reactions.keys():
         comp = Compound.parse(REST.kegg_get(name))
         for c in comp:
             names = []
             for n in c.name:
                 names.append(n.lower())
             if name.startswith('cpd:'):
                 self.metabolites[name] = names
             else:
                 self.reactions[name] = names
예제 #14
0
def kegggene_to_sequence(organism, kegggene):
    """
    Get the sequence of a gene
    """

    text = REST.kegg_get(organism.lower() + ':' + kegggene).read()

    start_index = text.index('AASEQ')
    end_index = text.index('NTSEQ')

    raw_code = text[start_index:end_index].split('\n', 1)[1]
    code = raw_code.split('\n')
    sequence = ''
    for piece in code:
        sequence = sequence + piece.strip()

    return sequence
예제 #15
0
 def _extract_cat_kegg(self, id):
     '''
     Parse the raw database file
     '''
     out = id + '|'
     try:
         record = REST.kegg_get(id).read()
         if record:
             pattern = re.search(r'DEFINITION\s*([^\n]*)', record,
                                 re.I | re.M)
             out += pattern[1] if pattern else ''
             pattern = re.search(r'PATHWAY\s*([^\n]*)', record, re.I | re.M)
             out += ";" + pattern[1] if pattern else ''
         pass
     except:
         pass
     return out
예제 #16
0
 def get_genes_for(pathways):
     for pathway in pathways:
         pathways[pathway]['geneid'] = set(); pathways[pathway]['gene_symbol'] = set()
         pathway_file = REST.kegg_get(pathway).read()  # query and read each pathway
         # iterate through each KEGG pathway file, keeping track of which section
         # of the file we're in, only read the gene in each pathway
         current_section = None
         for line in pathway_file.rstrip().split("\n"):
             section = line[:12].strip()  # section names are within 12 columns
             if not section == "":
                 current_section = section
             if current_section == "GENE":
                 try:
                     gene_identifiers, _ = line[12:].split("; ")[:2]
                     geneid, gene_symbol = gene_identifiers.split()
                     pathways[pathway]['geneid'].add(int(geneid))
                     pathways[pathway]['gene_symbol'].add(gene_symbol)
                 except: pass#print('Discarded:', line); 
def keggMet(tag):
    request = REST.kegg_get("lpn:" + tag)
    open("lpn:" + tag, "w").write(request.read())
    records = Enzyme.parse(open("lpn:" + tag))
    record = list(records)[0]
    ofile = open("lpn:" + tag, "r")
    owrite = open("kegg/lpn:" + tag, "w")
    flagPath = flagMotifs = flagOrtho = 1
    for line in ofile:
        if "ORTHOLOGY" in line or flagPath == 0:
            if flagOrtho == 1:
                flagOrtho = 0
                owrite.write(line)
            elif "ORGANISM" in line:
                flagOrtho = 1
            else:
                owrite.write(line)
        if "PATHWAY" in line or flagPath == 0:
            if flagPath == 1:
                flagPath = 0
                owrite.write(line)
            elif "BRITE" in line or "MODULE" in line:
                flagPath = 1
            else:
                owrite.write(line)
        if "MOTIF" in line or flagMotifs == 0:
            if flagMotifs == 1:
                flagMotifs = 0
                owrite.write(line)
            elif "DBLINKS" in line:
                flagMotifs = 1
            else:
                owrite.write(line)
        if "NAME" in line:
            name = re.split(r' ', line)
            owrite.write("GENE NAME: " + name[-1])
        if "REACTIONS" in line or flagReactions == 0:
            if flagReactions == 1:
                flagReactions = 0
                owrite.write(line)
            elif "COMPOUND" in line:
                flagReactions = 1
            else:
                owrite.write(line)
예제 #18
0
def queryPathwayByKid(kid):
    kid_pathway = []
    # pathwayDic = {}
    # result = REST.kegg_get('ath:ArthCp053').read()
    try:
        result = REST.kegg_get(kid).read()
    except:
        print('not found %s' % kid)
        return kid_pathway
    if 'PATHWAY' not in result: return []
    current_section = None
    for line in result.rstrip().split("\n"):
        section = line[:12].strip()  # section names are within 12 columns
        if not section == "":
            current_section = section
        if current_section == "PATHWAY":
            pid, pname = line[12:].split("  ")
            kid_pathway.append((kid, pid, pname))
    return kid_pathway
예제 #19
0
def kegg_rest(type: str,
              argument: str,
              optional_argument: str = "",
              sleep_time: float = .5) -> List[str]:
    """This function calls Biopython's KEGG REST function and returns the lines as a string list.

    All empty lines are deleted from the list as they do not contain any information.

    Arguments
    ----------
    * type: str ~ The KEGG REST action. Can be either 'info', 'get', 'link' or 'list.
    * argument: str ~ The argument for the KEGG order.
    * optional_argument: str="" ~ The second argument which is necessary for 'link' and 'list' actions
      to work correctly.
    * sleep_time: float=10.0 ~ The time that shall be waited after a REST action is performed.
      Its default value of 10.0 seconds is in accordance with the NCBI
      rule that its servers shall not be contacted more often than every
      10 seconds. KEGG might have lower or higher required sleep times,
      but I did not find any specified time step.
    """
    # Execute correct Biotpython KEGG REST function.
    if type == "info":
        kegg_data = REST.kegg_info(argument)
    elif type == "get":
        kegg_data = REST.kegg_get(argument)
    elif type == "link":
        kegg_data = REST.kegg_link(argument, optional_argument)
    elif type == "list":
        kegg_data = REST.kegg_list(argument, optional_argument)
    elif type == "find":
        kegg_data = REST.kegg_find(argument, optional_argument)

    # Wait the sleep time doing nothing.
    time.sleep(sleep_time)

    # Get one string per line of the KEGG REST result.
    lines: List[str] = kegg_data.read().split("\n")

    # Delete empty lines.
    not_empty_lines: List[str] = [i for i in lines if len(i) > 0]

    return not_empty_lines
예제 #20
0
def get(ID='K18766', db=None):
    db_keys = {
        'orthology': 'ko',
        'enzyme': 'ec',
        'compound': 'cpd',
        'reaction': 'rn'
    }
    if db is not None:
        db_key = db_keys[db]
    elif ID.startswith('C'):
        db_key = db_keys['compound']
    elif ID.startswith('K'):
        db_key = db_keys['orthology']
    elif ID.startswith('R'):
        db_key = db_keys['reaction']
    else:
        db_key = db_keys['enzyme']

    data = REST.kegg_get(f"{db_key}:{ID}").read().split('\n')
    return data
예제 #21
0
def parse_reversible_reactions(reaction_list: list):
    """
    parse_reversible_reactions() queries the KEGG database with the input
        reaction list, and parses the results for all reactions that have been
        annotated with "<=>" in the reaction equation, which suggests that the
        catalyzed reaction is reversible

    Args:
        reaction_list (list): contains KEGG reaction IDs (e.g. 'R00709')

    Returns:
        list: contains KEGG IDs of reversible reactions
    """

    reversible_reaction = []
    for reaction in reaction_list:
        reaction_file = REST.kegg_get(reaction).read()
        for i in reaction_file.rstrip().split("\n"):
            if i.startswith("EQUATION") and "<=>" in i:
                reversible_reaction.append(reaction)
    return reversible_reaction
예제 #22
0
파일: groupings.py 프로젝트: irbraun/oats
    def save_all_kegg_pathway_files(paths):
        """Uses the KEGG REST API to find and save all pathway data files for each species in the input dictionary.
		
		Args:
		    paths (dict of str:str): A mapping between strings referencing species and paths to the output directory for each.
		"""
        for species, path in paths.items():
            pathways = REST.kegg_list("pathway", species)
            for pathway in pathways:

                # Get the pathway file contents through the REST API.
                pathway_id = pathway.split()[0]
                pathway_file = REST.kegg_get(dbentries=pathway_id).read()

                # Where should the contents of the obtained file be written?
                pathway_id_str = pathway_id.replace(":", "_")
                filename = os.path.join(path, "{}.txt".format(pathway_id_str))
                if not os.path.exists(path):
                    os.makedirs(path)
                with open(filename, "w") as outfile:
                    outfile.write(pathway_file)
예제 #23
0
def main():
    # 'hsa', 'dme'
    for pwid in ['dre', 'cel', 'sce', 'mmu']:
        human_pathways = REST.kegg_list('pathway', pwid).read()

        pathways = [line for line in human_pathways.strip().split('\n')]

        # Get the genes for pathways and add them to a list
        pathways_dict = defaultdict(list)
        for pathway in pathways:
            entry, description = pathway.split('\t')
            pathway_file = REST.kegg_get(
                entry).read()  # query and read each pathway

            # iterate through each KEGG pathway file, keeping track of which section
            # of the file we're in, only read the gene in each pathway
            current_section = None
            for line in pathway_file.rstrip().split('\n'):
                section = line[:12].strip(
                )  # section names are within 12 columns
                if not section == '':
                    current_section = section

                if current_section == 'GENE':
                    if len(line[12:].split('; ')) > 1:
                        gene_identifiers, *gene_description = line[12:].split(
                            '; ')
                        gene_id, gene_symbol = gene_identifiers.split()
                        pathways_dict[description.split(' - ')[0]].append(
                            gene_symbol)

        with open('in/{0}.gmt'.format(pwid), 'w') as pw_file:
            out_pw = [
                '{0}\t\t{1}\n'.format(desc,
                                      '\t'.join(sorted(pathways_dict[desc])))
                for desc in sorted(pathways_dict)
                if len(pathways_dict[desc]) > 4
            ]
            pw_file.writelines(out_pw)
    return None
예제 #24
0
def find_kegg(genes):
    count=0
    lpl_pathways = REST.kegg_list("pathway", "lpl").read()
    entries = []
    for line in lpl_pathways.rstrip().split("\n"):
        entry, description = line.split("\t")
        #print(line)
        entries.append(entry)
    print(entries)               
    pathway = {}
    for i in genes:
        for entry in entries:
            count+=1
            get = REST.kegg_get(entry, option=None)
            get_read = get.readlines()
            if any(i in s for s in get_read):
                print(entry)
                print(i)                
            #checkt of j als een k in de dictionary staat, maakt een lijst van alle values van de key wanneer dit zo is en updat de key met de ljst+ nieuwe gen id)
                if i in pathway:
                    k = pathway.get(i)
                    #print(k)
                    m = []
                    if isinstance(k, list):
                        for l in k: 
                            m.append(l)
                    else:
                        m.append(k)
                    if entry not in m:
                        m.append(entry)
                    pathway.update({i:m})
                   #voegt j als een nieuwe key toe aan de dictionary    
                else:
                    pathway[i] = []
                    pathway.update({i:entry})
            print(pathway, count)
    print(pathway)
    return pathway
예제 #25
0
def main(pathway):
    print "Fetching gene names related to pathway %s from the current KEGG database..." % pathway
    promotor_gene_accessions = []

    pathway_file = REST.kegg_get(pathway).read()  # query and read each pathway

    # iterate through each KEGG pathway file, keeping track of which section
    # of the file we're in, only read the gene in each pathway
    current_section = None
    for line in pathway_file.rstrip().split("\n"):
        section = line[:12].strip()  # section names are within 12 columns
        if not section == "":
            current_section = section

        if current_section == "GENE":
            gene_identifiers, gene_description = line[12:].split("; ")
            gene_id, gene_symbol = gene_identifiers.split()

            if gene_symbol not in promotor_gene_accessions:
                promotor_gene_accessions.append(
                    "promotor_region_" + gene_symbol + "_" + gene_id)

    return promotor_gene_accessions
예제 #26
0
    def update_files(base_dir="/data/databases/kegg/"):
        for db in ["pathway", "ko", "cpd", "brite"]:
            with open(base_dir + db + ".txt", "w") as h:
                data = REST.kegg_list(db).read()

                h.write(data)
        # wget http://www.kegg.jp/kegg-bin/download_htext?htext=br08901.keg&format=json&filedir=
        # wget http://www.kegg.jp/kegg-bin/download_htext?htext=br08001.keg&format=json&filedir=

        L = list(open(base_dir + "pathway.txt"))

        for pathway in tqdm(L):
            pw = "ko" + pathway.split()[0].split(":map")[1]
            kgmlpath = base_dir + "ko/" + pw + ".kgml"
            if not os.path.exists(kgmlpath):
                with open(kgmlpath, "w") as h:

                    try:
                        data = REST.kegg_get(pw, option="kgml").read()
                        h.write(data)
                        sleep(1)
                    except:
                        pass
예제 #27
0
def download_data(entry, fw):
    request = REST.kegg_get(entry)
    data = request.read()
    splitted = data.split("///")
    splitted1 = [
        s[s.find("DEFINITION") +
          len("DEFINITION"):s.find("\n", s.find("DEFINITION"))].strip()
        for s in splitted if s.strip() != ""
    ]
    for i in range(len(entry)):
        print(entry[i], end="\t", file=fw)
        print(splitted1[i], end="\t", file=fw)
        current_section = None
        pathways = []
        for line in splitted[i].rstrip().split("\n"):
            section = line[:12].strip()  # section names are within 12 columns
            if not section == "":
                current_section = section
            if current_section == "PATHWAY":
                pathway = line[12:]
                pathway = pathway[pathway.find(" "):].strip()
                pathways.append(pathway)
        print(";".join(pathways), file=fw)
예제 #28
0
def amount_pathway_genes():
    """
    Function to measure the amount repair genes in a human pathway.
    :return:
    """
    human_pathways = REST.kegg_list("pathway", "hsa").read()

    # Filter all human pathways for repair pathways
    repair_pathways = []
    for line in human_pathways.rstrip().split("\n"):
        entry, description = line.split("\t")
        if "repair" in description:
            repair_pathways.append(entry)

    # Get the genes for pathways and add them to a list
    repair_genes = []
    for pathway in repair_pathways:
        pathway_file = REST.kegg_get(pathway).read()  # query and read each pathway

        # iterate through each KEGG pathway file, keeping track of which section
        # of the file we're in, only read the gene in each pathway
        current_section = None
        for line in pathway_file.rstrip().split("\n"):
            section = line[:12].strip()  # section names are within 12 columns
            if not section == "":
                current_section = section

            if current_section == "GENE":
                gene_identifiers, gene_description = line[12:].split("; ")
                gene_id, gene_symbol = gene_identifiers.split()

                if not gene_symbol in repair_genes:
                    repair_genes.append(gene_symbol)

    print("There are %d repair pathways and %d repair genes. The genes are:" % \
          (len(repair_pathways), len(repair_genes)))
    print(", ".join(repair_genes))
예제 #29
0
def t_KEGG_Query():
    """Tests Bio.KEGG API Wrapper"""
    print("Testing Bio.KEGG.query\n\n")

    # info tests
    resp = REST.kegg_info("kegg")
    resp.read()
    print(resp.url)

    resp = REST.kegg_info("pathway")
    resp.read()
    print(resp.url)

    # list tests
    resp = REST.kegg_list("pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("organism")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("T01001")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    # find tests
    resp = REST.kegg_find("genes", "shiga+toxin")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("genes", ["shiga", "toxin"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "C7H10O5", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "O5C7", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "174.05", "exact_mass")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "300-310", "mol_weight")
    resp.read()
    print(resp.url)

    # get tests
    resp = REST.kegg_get("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100", "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"], "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa05130", "image")
    resp.read()
    print(resp.url)

    # conv tests
    resp = REST.kegg_conv("eco", "ncbi-geneid")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-geneid", "eco")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    # link tests
    resp = REST.kegg_link("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("hsa", "pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)
예제 #30
0
def main():
    # Expects name of pathway as argument
    # Get the KGML from KEGG
    query = sys.argv[1].replace(" ", "+")
    result = KEGG_REST.kegg_find('PATHWAY', query)
    result_txt = result.read().split('\n')
    if len(result_txt) == 1:
        print("Search found no results")
        return

    choice = 0
    if len(result_txt) > 2:
        print("More than 1 result:")
        for index, r in enumerate(result_txt):
            output = r.split("\t")
            if len(output) == 2:
                print(str(index) + "\t" + output[1])
        choice = int(input("Which one? "))

    identifier = result_txt[choice].split("\t")[0].strip()
    identifier = identifier.replace("map", ORGANISM)

    pathway_kgml = KEGG_REST.kegg_get(identifier, "kgml")
    pathway = KEGG_KGML_PARSER.read(pathway_kgml)
    config = configparser.ConfigParser()
    config.read("server_config")
    if not "KGML2NEO4J" in config:
        print("Server config not found!")
        return

    username = config["KGML2NEO4J"]['username']
    password = config["KGML2NEO4J"]['password']
    server_uri = config["KGML2NEO4J"]['uri']

    db = database(server_uri, username, password)

    db.run_query("MATCH (n) DETACH DELETE n")

    query = "CREATE "
    query_list = [
        db.make_gene_query(pathway.genes),
        db.make_compound_query(pathway.compounds),
        db.make_reaction_query(pathway.reaction_entries),
        db.make_map_query(pathway.maps),
        db.make_relations_query(pathway.relations)
    ]

    for q in query_list:
        if len(q) > 0:
            query += q + ","
    query = query[:-1]

    db.run_query(query)

    # Merge matching nodes
    merge_query = """MATCH (n1),(n2)
                    WHERE ANY (x IN n1.name WHERE x IN n2.name) and id(n1) < id(n2)
                    WITH [n1,n2] as ns
                    CALL apoc.refactor.mergeNodes(ns) YIELD node
                    RETURN node"""

    db.run_query(merge_query)
예제 #31
0
    entry, description = line.split("\t")
    human_pathways_dict[entry] = description
    if "DNA" in description:
        repair_pathways.append(entry)
        repair_pathways_dict[entry] = description

rejected = []
gene_dict = dict((gene, []) for gene in gene_list)

i = 0
len_ = len(human_pathways_dict.keys())
for pathway in human_pathways_dict.keys():
    i += 1
    print str(i) + ' // ' + str(len_)
    #print pathway
    pathway_file = REST.kegg_get(pathway).read()
    current_section = None
    for line in pathway_file.rstrip().split("\n"):
        section = line[:12].strip()  # section names are within 12 columns
        if not section == "":
            current_section = section
        if current_section == "GENE":
            try:
                gene_identifiers, gene_description = line[12:].split("; ")
                gene_id, gene_symbol = gene_identifiers.split()
                if gene_symbol in gene_dict.keys():
                    gene_dict[gene_symbol].append(pathway)
            except:
                rejected.append(pathway)

print "!!     KEGG     !!"
예제 #32
0
파일: downloadKEGG.py 프로젝트: yw595/SEED
#open("ec_5.4.2.2.txt",'w').write(request.read())
#records = Enzyme.parse(open("ec_5.4.2.2.txt"))
#record = list(records)[0]
#print(record.classname)
#print(record.entry)
organisms = REST.kegg_list("organism").read()
organismlist = []
for line in organisms.rstrip().split("\n"):
    #print(line)
    code = line.split("\t")[1]
    organismlist.append(code)

#print(organismlist)

#parser = KGML_parser.KGMLparser()
#open("human_map.xml",'w').write(REST.kegg_get("hsa05130",option="kgml").read())
human_map = KGML_parser.read(REST.kegg_get("hsa01100",option="kgml"))
cpds = human_map.compounds
for cpd in cpds:
    print(cpd.name)
    graphics = cpd.graphics
    for graphic in graphics:
        print(graphic.x)

rxns = human_map.reaction_entries
for rxn in rxns:
    print(rxn.name)
    graphics = rxn.graphics
    for graphic in graphics:
        print(graphic.x)
예제 #33
0
파일: main.py 프로젝트: fafnir1990/GCA
    human_pathways_dict[entry] = description
    if "DNA" in description:
        repair_pathways.append(entry)
        repair_pathways_dict[entry] = description


rejected = []
gene_dict = dict((gene,[]) for gene in gene_list)

i = 0
len_ = len(human_pathways_dict.keys())
for pathway in human_pathways_dict.keys():
    i += 1
    print str(i) + ' // ' + str(len_)
    #print pathway
    pathway_file = REST.kegg_get(pathway).read()
    current_section = None
    for line in pathway_file.rstrip().split("\n"):
        section = line[:12].strip()  # section names are within 12 columns
        if not section == "":
            current_section = section
        if current_section == "GENE":
            try:
                gene_identifiers, gene_description = line[12:].split("; ")
                gene_id, gene_symbol = gene_identifiers.split()
                if gene_symbol in gene_dict.keys():
                    gene_dict[gene_symbol].append(pathway)
            except:
                rejected.append(pathway)

print "!!     KEGG     !!"
예제 #34
0
def t_KEGG_Query():
    """Tests Bio.KEGG API Wrapper"""
    print("Testing Bio.KEGG.query\n\n")

    # info tests
    resp = REST.kegg_info("kegg")
    resp.read()
    print(resp.url)

    resp = REST.kegg_info("pathway")
    resp.read()
    print(resp.url)

    # list tests
    resp = REST.kegg_list("pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("organism")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("T01001")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    # find tests
    resp = REST.kegg_find("genes", "shiga+toxin")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("genes", ["shiga", "toxin"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "C7H10O5", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "O5C7", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "174.05", "exact_mass")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "300-310", "mol_weight")
    resp.read()
    print(resp.url)

    # get tests
    resp = REST.kegg_get("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100", "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"], "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa05130", "image")
    resp.read()
    print(resp.url)

    # conv tests
    resp = REST.kegg_conv("eco", "ncbi-geneid")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-geneid", "eco")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    # link tests
    resp = REST.kegg_link("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("hsa", "pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)
예제 #35
0
from Bio.KEGG import REST
from Bio.KEGG import Enzyme

request = REST.kegg_get("ec:7.1.2.2")
# 참고: ATP synthase의 EC번호이다
open("ec_7.1.2.2.txt", "w").write(request.read())
records = Enzyme.parse(open("ec_7.1.2.2.txt"))
record = list(records)[0]
print(record.classname)