Beispiel #1
0
def kooo(cccc):
    mydog5 = ""
    mydog = REST.kegg_find("genes", cccc).read()
    #print(result)
    mydog1 = re.findall('^\S+', mydog)[0]
    #print(mydog1)
    mydog3 = REST.kegg_link("ko", mydog1).read()
    print("xxx", mydog3)
    if (len(mydog3) < 4):
        return (mydog5)
    mydog4 = re.findall('ko:\S+', mydog3)[0]
    #print(mydog4)
    mydog5 = REST.kegg_link("genes", mydog4).read()
    return (mydog5)
def add_kegg_descript2(hit):
    try:
        desc = REST.kegg_find("genes", hit).read()
        try:
            K = re.search(r"K[0-9]{5}", desc)
            KEGG = K.group(0)
        except:
            KEGG = "none"
        try:
            a = re.search(r"(?<=K[0-9]{5}).*", desc).replace("\n", "")
            ann = a.group(0)
        except:
            try:
                ann = desc.split("\t")[1].split(";")[0].replace("\n", "")
            except:
                ann = "none"
        try:
            mod = REST.kegg_link('module', hit).read()
            module = mod.split(":")[2].split("_")[-1].replace("\n", "")
        except:
            module = "none"

    except:
        module = "none"
        KEGG = "none"
        ann = "none"
    ann = reduce_func_len(ann)
    return strip_lines_list([module, KEGG, ann])
Beispiel #3
0
    def __get_all_information(self):
        if self.byModules:
            info = self.get_raw_data().split("\n")
            self.modules = []
            go = False
            for text in info:

                if re.search("\AMODULE", text):
                    new_line = re.sub('\s+', "\t", text)
                    module = new_line.split("\t")[1]
                    self.modules.append(module)
                    go = True
                elif (re.search("\ADISEASE", text)
                      or re.search("\ADBLINKS", text)
                      or re.search("\AREFERENCE", text)
                      or re.search("\AKO_PATHWAY", text)):
                    go = False
                elif go:
                    new_line = re.sub('\s+', "\t", text)
                    module = new_line.split("\t")[1]
                    self.modules.append(module)
        else:

            data = kegg_api.kegg_link("reaction", "path:" + self.id)
            self.reactions = []
            for line in data:
                reaction = line.strip().split("\t")[1].split(":")[1]
                self.reactions.append(reaction)
Beispiel #4
0
    def __retrieve_reactions(self, pathway):
        'Gets all reactions of the given pathway using KEGG API.'

        reactions_kegg = REST.kegg_link('rn', pathway).read()
        df_reactions = self.__to_dataframe(reactions_kegg)[1]
        
        return [reaction.replace('rn:', '') for reaction in df_reactions]
def add_kegg_descript2(hit):
    try:
        desc= REST.kegg_find("genes", hit).read()
        try:
            K=re.search(r"K[0-9]{5}", desc)
            KEGG=K.group(0)
        except:
            KEGG="none"
        try:
            a=re.search(r"(?<=K[0-9]{5}).*", desc).replace("\n","")
            ann=a.group(0)
        except:
            try:
                ann=desc.split("\t")[1].split(";")[0].replace("\n","")
            except:
                ann="none"
        try:
            mod=REST.kegg_link('module', hit).read()
            module=mod.split(":")[2].split("_")[-1].replace("\n","")
        except:
            module="none"
        
    except:
        module="none"
        KEGG="none"
        ann="none"
    ann=reduce_func_len(ann)
    return strip_lines_list([module, KEGG, ann])
Beispiel #6
0
    def _download_links(self,
                        dbs=["pathway", "enzyme", "reaction", "compound"]):
        """
        Returns jsons of mappings between each db (default: map (pathway), ec, rn, cpd).
        """

        for sourcedb, targetdb in itertools.permutations(dbs, 2):

            links_raw = REST.kegg_link(targetdb, sourcedb)
            links = [s.split('\t') for s in links_raw.read().splitlines()]

            d = dict()
            for i in links:
                if i[0] in d:
                    d[i[0]].append(i[1])
                else:
                    d[i[0]] = [i[1]]

            ## Write json of all entry ids and names
            link_fname = sourcedb + "_" + targetdb
            links_path = os.path.join(self.path, 'links')
            if not os.path.exists(links_path):
                os.makedirs(links_path)
            link_path = os.path.join(links_path, link_fname + ".json")
            with open(link_path, 'w') as f:
                json.dump(d, f, indent=2)
Beispiel #7
0
def get_kegg_path_to_gene_map(species):

    """Map kegg paths to genes."""

    kegg_list = REST.kegg_link(species, "pathway")

    clean_kegg_path_to_gene = re.compile(r"path:{0}|{0}:|\n".format(species))

    rowdicts = []
    for kegg_info in kegg_list:

        kegg_info = re.sub(clean_kegg_path_to_gene, "", kegg_info)
        kegg_data = kegg_info.split("\t")

        rowdict = {"kegg_pathway": kegg_data[0], "kegg_gene": kegg_data[1]}
        rowdicts.append(rowdict)

    return DataFrame.from_dict(rowdicts)
Beispiel #8
0
def get_interaction(target, source):
    try:
        interaction = REST.kegg_link(target, source).readlines()
        source_db = []
        target_db = []

        for line in interaction:
            individual = line.strip().split('\t')
            source_db.append(individual[0])
            target_db.append(individual[1])
    
        df_interaction = pd.DataFrame({source: source_db, target: target_db})
        df_interaction.replace('.+(?<=\:)','', regex=True, inplace=True)
        df_interaction.to_csv("./database/"+source+'_'+target+'.tsv', sep = '\t', index = None)
        
    except HTTPError:
        sys.exit("ERROR: Please select the available database according to the following URL. (http://www.kegg.jp/kegg/rest/keggapi.html) ")
        
    return df_interaction
Beispiel #9
0
def kegg_rest(type: str,
              argument: str,
              optional_argument: str = "",
              sleep_time: float = .5) -> List[str]:
    """This function calls Biopython's KEGG REST function and returns the lines as a string list.

    All empty lines are deleted from the list as they do not contain any information.

    Arguments
    ----------
    * type: str ~ The KEGG REST action. Can be either 'info', 'get', 'link' or 'list.
    * argument: str ~ The argument for the KEGG order.
    * optional_argument: str="" ~ The second argument which is necessary for 'link' and 'list' actions
      to work correctly.
    * sleep_time: float=10.0 ~ The time that shall be waited after a REST action is performed.
      Its default value of 10.0 seconds is in accordance with the NCBI
      rule that its servers shall not be contacted more often than every
      10 seconds. KEGG might have lower or higher required sleep times,
      but I did not find any specified time step.
    """
    # Execute correct Biotpython KEGG REST function.
    if type == "info":
        kegg_data = REST.kegg_info(argument)
    elif type == "get":
        kegg_data = REST.kegg_get(argument)
    elif type == "link":
        kegg_data = REST.kegg_link(argument, optional_argument)
    elif type == "list":
        kegg_data = REST.kegg_list(argument, optional_argument)
    elif type == "find":
        kegg_data = REST.kegg_find(argument, optional_argument)

    # Wait the sleep time doing nothing.
    time.sleep(sleep_time)

    # Get one string per line of the KEGG REST result.
    lines: List[str] = kegg_data.read().split("\n")

    # Delete empty lines.
    not_empty_lines: List[str] = [i for i in lines if len(i) > 0]

    return not_empty_lines
Beispiel #10
0
def get_kegg_path_to_gene_map(species):
    """Map kegg paths to genes."""

    kegg_list = REST.kegg_link(species, "pathway")

    clean_kegg_path_to_gene = re.compile(r"path:{0}|{0}:|\n".format(species))

    rowdicts = []
    for kegg_info in kegg_list:

        try:
            kegg_info = kegg_info.decode("utf-8")
        except AttributeError:
            pass
        kegg_info = re.sub(clean_kegg_path_to_gene, "", kegg_info)
        kegg_data = kegg_info.split("\t")

        rowdict = {"kegg_pathway": kegg_data[0], "entrezgene": kegg_data[1]}
        rowdicts.append(rowdict)

    return DataFrame.from_dict(rowdicts)
Beispiel #11
0
def create_link_dicts(target_db, source_db):
    ## Get KEGG ko-reaction mapping--this may be useful for verifying module reaction-kos are associated correctly
    ## The ordering of source and target doesn't matter for the content of the output, just the ordering of the output
    raw_links = REST.kegg_link(target_db, source_db)
    link_list = [s.split('\t') for s in raw_links.read().splitlines()]
    target_source_dict = dict()
    source_target_dict = dict()
    for row in link_list:
        s = row[0].split(":")[1]
        t = row[1].split(":")[1]
        if s in source_target_dict:
            source_target_dict[s].add(t)
        else:
            source_target_dict[s] = {t}

        if t in target_source_dict:
            target_source_dict[t].add(s)
        else:
            target_source_dict[t] = {s}

    return source_target_dict, target_source_dict
Beispiel #12
0
def create_a_to_b_dict(a,b):
    """Create dictionary mapping from KEGG database `a` entries to database `b` entries"""

    abbrv_dict = {"pathway":	"path",
                "brite":	"br",	
                "module":	"md",	
                "orthology":	"ko",	
                "genome":	"gn",	
                "compound":	"cpd",	
                "glycan":	"gl",	
                "reaction":	"rn",	
                "rclass":	"rc",	
                "enzyme":	"ec",	
                "network":	"ne",	
                "variant":	"hsa_var",
                "disease":	"ds",	
                "drug":	"dr",	
                "dgroup":	"dg"}

    ab_link = REST.kegg_link(a,b)
    ab_list = [i.split() for i in ab_link]
    ab_dict = dict()
    for pair in ab_list:
        if pair[0].startswith(abbrv_dict[a]):
            aid = pair[0].split(abbrv_dict[a]+":")[1]
            bid = pair[1].split(abbrv_dict[b]+":")[1]
        else:
            aid = pair[1].split(abbrv_dict[a]+":")[1]
            bid = pair[0].split(abbrv_dict[b]+":")[1]
        if aid in ab_dict:
            ab_dict[aid].append(bid)
        else:
            ab_dict[aid] = [bid]

    for aid,blist in ab_dict.items():
        ab_dict[aid] = set(blist)
        
    return ab_dict
            if fields[0] == 'GENE':
                want = 1
                ## The line with GENE is different
                print(fields[2].rstrip(';'))
## We reached the next section of the file
            elif want == 1 and re.match('^\S', line):
                sys.exit()
## We're still in the list of genes
            if want == 1 and len(fields) > 1:
                print(fields[1].rstrip(';'))

# Get genes involved with fatty-acid biosynthesis in Kitasatospora

####Get gene ontology terms#########

result = REST.kegg_link("compound", "map00061").read()
#print(human_pathways)
# Filter all human pathways for repair pathways
####repair_pathways = []
####for line in human_pathways.rstrip().split("n"):
#entry, description = line.split("\t")
####print(line)
"""	if "repair" in description:
		repair_pathways.append(entry)
#print(repair_pathways)
# Get the genes for pathways and add them to a list
repair_genes = []
for pathway in repair_pathways:
	pathway_file = REST.kegg_get(pathway).read()  # query and read each pathway
	print(pathway_file)
    # iterate through each KEGG pathway file, keeping track of which section
Beispiel #14
0
def t_KEGG_Query():
    """Tests Bio.KEGG API Wrapper"""
    print("Testing Bio.KEGG.query\n\n")

    # info tests
    resp = REST.kegg_info("kegg")
    resp.read()
    print(resp.url)

    resp = REST.kegg_info("pathway")
    resp.read()
    print(resp.url)

    # list tests
    resp = REST.kegg_list("pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("organism")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("T01001")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    # find tests
    resp = REST.kegg_find("genes", "shiga+toxin")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("genes", ["shiga", "toxin"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "C7H10O5", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "O5C7", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "174.05", "exact_mass")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "300-310", "mol_weight")
    resp.read()
    print(resp.url)

    # get tests
    resp = REST.kegg_get("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100", "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"], "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa05130", "image")
    resp.read()
    print(resp.url)

    # conv tests
    resp = REST.kegg_conv("eco", "ncbi-geneid")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-geneid", "eco")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    # link tests
    resp = REST.kegg_link("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("hsa", "pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)
Beispiel #15
0
def t_KEGG_Query():
    """Tests Bio.KEGG API Wrapper"""
    print("Testing Bio.KEGG.query\n\n")

    # info tests
    resp = REST.kegg_info("kegg")
    resp.read()
    print(resp.url)

    resp = REST.kegg_info("pathway")
    resp.read()
    print(resp.url)

    # list tests
    resp = REST.kegg_list("pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("organism")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("T01001")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_list("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_list(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    # find tests
    resp = REST.kegg_find("genes", "shiga+toxin")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("genes", ["shiga", "toxin"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "C7H10O5", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "O5C7", "formula")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "174.05", "exact_mass")
    resp.read()
    print(resp.url)

    resp = REST.kegg_find("compound", "300-310", "mol_weight")
    resp.read()
    print(resp.url)

    # get tests
    resp = REST.kegg_get("cpd:C01290+gl:G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["cpd:C01290", "gl:G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("C01290+G00092")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["C01290", "G00092"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa:10458+ece:Z5100", "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get(["hsa:10458", "ece:Z5100"], "aaseq")
    resp.read()
    print(resp.url)

    resp = REST.kegg_get("hsa05130", "image")
    resp.read()
    print(resp.url)

    # conv tests
    resp = REST.kegg_conv("eco", "ncbi-geneid")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-geneid", "eco")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_conv("ncbi-gi", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)

    # link tests
    resp = REST.kegg_link("pathway", "hsa")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("hsa", "pathway")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", "hsa:10458+ece:Z5100")
    resp.read()
    print(resp.url)

    resp = REST.kegg_link("pathway", ["hsa:10458", "ece:Z5100"])
    resp.read()
    print(resp.url)
Beispiel #16
0
]

# Dictionary of lists for each pathways' enzymes' ec number

pathway_ec = {}
# Dictionary of lists of enzymes a bacteria has in KEGG pathway
organism_enzymes = {}

# Populate dictionary with the organism list
for organism in organism_list:
    organism_enzymes[organism] = {}

# Populate dictionary with lists of enzymes in that
# pathway
for pathway in pathway_list:
    ec_read = REST.kegg_link('ec', pathway).read()
    pathway_ec[pathway] = []
    for line in ec_read.rstrip().split('\n'):
        path, ec = line.split('\t')
        pathway_ec[pathway].append(ec[3:])

count = 0
# Go through each pathway and collect all the orthologs
for pathway in pathway_list:
    print(pathway)
    orthology_read = REST.kegg_link('ko', pathway).read()
    # For each ortholog, find the ec number and if
    # an organism has the gene for it
    for line in orthology_read.rstrip().split('\n'):
        pathway, ortholog = line.split('\t')
        to_ec = REST.kegg_link('ec', ortholog[3:]).read()
Beispiel #17
0
def get_kos(pathway):
    kos = REST.kegg_link('ko', pathway).read()
    kos = [i.split('\t') for i in kos.split('\n')]
    return kos
Beispiel #18
0
def get_genes(ko):
    genes = REST.kegg_link('genes', ko).read()
    genes = [i.split('\t') for i in genes.split('\n')]
    return genes