def test_read_and_write_KGML_files(self): """Read KGML from, and write KGML to, local files. Check we read/write the correct number of elements. """ for p in self.data: # Test opening file with open(p.infilename) as f: pathway = read(f) # Do we have the correct number of elements of each type self.assertEqual( ( len(pathway.entries), len(pathway.orthologs), len(pathway.compounds), len(pathway.maps), ), p.element_counts, ) # Test writing file with open(p.outfilename, "w") as f: f.write(pathway.get_KGML()) # Can we read the file we wrote? with open(p.outfilename) as f: pathway = read(f) # Do we have the correct number of elements of each type self.assertEqual( ( len(pathway.entries), len(pathway.orthologs), len(pathway.compounds), len(pathway.maps), ), p.element_counts, )
def test_read_and_write_KGML_files(self): """Read KGML from, and write KGML to, local files. Check we read/write the correct number of elements. """ for p in self.data: # Test opening file with open(p.infilename, 'rU') as f: pathway = read(f) # Do we have the correct number of elements of each type self.assertEqual((len(pathway.entries), len(pathway.orthologs), len(pathway.compounds), len(pathway.maps)), p.element_counts) # Test writing file with open(p.outfilename, 'w') as f: f.write(pathway.get_KGML()) # Can we read the file we wrote? with open(p.outfilename, 'rU') as f: pathway = read(f) # Do we have the correct number of elements of each type self.assertEqual((len(pathway.entries), len(pathway.orthologs), len(pathway.compounds), len(pathway.maps)), p.element_counts)
def test_render_KGML_modify(self): """Rendering of KGML to PDF, with modification.""" # We test rendering of the original KGML for KO01100, # modifying line width for the lipid pathway p = self.data with open(p[0].infilename) as f: pathway = read(f) mod_rs = [ e for e in pathway.orthologs if len(set(e.name.split()).intersection(self.ko_ids)) ] for r in mod_rs: for g in r.graphics: g.width = 10 kgml_map = KGMLCanvas(pathway) kgml_map.draw(p[0].output_stem + '_widths.pdf') # We test rendering of the original KGML for KO3070, # modifying the reaction colours for each ortholog entry with open(p[1].infilename) as f: pathway = read(f) orthologs = [e for e in pathway.orthologs] # Use Biopython's ColorSpiral to generate colours cs = ColorSpiral(a=2, b=0.2, v_init=0.85, v_final=0.5, jitter=0.03) colors = cs.get_colors(len(orthologs)) for o, c in zip(orthologs, colors): for g in o.graphics: g.bgcolor = c kgml_map = KGMLCanvas(pathway) pathway.image = p[1].pathway_image kgml_map.import_imagemap = p[1].show_pathway_image kgml_map.draw(p[1].output_stem + '_colors.pdf')
def test_render_KGML_modify(self): """ Rendering of KGML to PDF, with modification. """ # We test rendering of the original KGML for KO01100, # modifying line width for the lipid pathway p = self.data with open(p[0].infilename) as f: pathway = read(f) mod_rs = [e for e in pathway.orthologs if len(set(e.name.split()).intersection(self.ko_ids))] for r in mod_rs: for g in r.graphics: g.width = 10 kgml_map = KGMLCanvas(pathway) kgml_map.draw(p[0].output_stem + '_widths.pdf') # We test rendering of the original KGML for KO3070, # modifying the reaction colours for each ortholog entry with open(p[1].infilename) as f: pathway = read(f) orthologs = [e for e in pathway.orthologs] # Use Biopython's ColorSpiral to generate colours cs = ColorSpiral(a=2, b=0.2, v_init=0.85, v_final=0.5, jitter=0.03) colors = cs.get_colors(len(orthologs)) for o, c in zip(orthologs, colors): for g in o.graphics: g.bgcolor = c kgml_map = KGMLCanvas(pathway) pathway.image = p[1].pathway_image kgml_map.import_imagemap = p[1].show_pathway_image kgml_map.draw(p[1].output_stem + '_colors.pdf')
def test_render_KGML_transparency(self): """Rendering of KGML to PDF, with color alpha channel.""" # We test rendering of the original KGML for KO01100, # modifying alpha channel for the lipid pathway p = self.data with open(p[0].infilename) as f: pathway = read(f) mod_rs = [e for e in pathway.orthologs if len(set(e.name.split()).intersection(self.ko_ids))] for r in mod_rs: for g in r.graphics: # Modify hex colour directly by appending alpha channel # to hex string g.fgcolor = g.fgcolor + "77" g.width = 20 kgml_map = KGMLCanvas(pathway) kgml_map.draw(p[0].output_stem + "_transparency.pdf") # We test rendering of the original KGML for KO3070, # modifying the alpha channel for each ortholog entry with open(p[1].infilename) as f: pathway = read(f) orthologs = list(pathway.orthologs) # Use Biopython's ColorSpiral to generate colours cs = ColorSpiral(a=2, b=0.2, v_init=0.85, v_final=0.5, jitter=0.03) colors = cs.get_colors(len(orthologs)) for o, c in zip(orthologs, colors): # Modify color tuples to add alpha channel c = c + (0.5, ) for g in o.graphics: g.bgcolor = c kgml_map = KGMLCanvas(pathway) pathway.image = p[1].pathway_image kgml_map.import_imagemap = p[1].show_pathway_image kgml_map.draw(p[1].output_stem + "_transparency.pdf")
def test_render_KGML_transparency(self): """Rendering of KGML to PDF, with color alpha channel.""" # We test rendering of the original KGML for KO01100, # modifying alpha channel for the lipid pathway p = self.data with open(p[0].infilename) as f: pathway = read(f) mod_rs = [e for e in pathway.orthologs if len(set(e.name.split()).intersection(self.ko_ids))] for r in mod_rs: for g in r.graphics: # Modify hex colour directly by appending alpha channel # to hex string g.fgcolor = g.fgcolor + "77" g.width = 20 kgml_map = KGMLCanvas(pathway) kgml_map.draw(p[0].output_stem + '_transparency.pdf') # We test rendering of the original KGML for KO3070, # modifying the alpha channel for each ortholog entry with open(p[1].infilename) as f: pathway = read(f) orthologs = [e for e in pathway.orthologs] # Use Biopython's ColorSpiral to generate colours cs = ColorSpiral(a=2, b=0.2, v_init=0.85, v_final=0.5, jitter=0.03) colors = cs.get_colors(len(orthologs)) for o, c in zip(orthologs, colors): # Modify color tuples to add alpha channel c = c + (0.5, ) for g in o.graphics: g.bgcolor = c kgml_map = KGMLCanvas(pathway) pathway.image = p[1].pathway_image kgml_map.import_imagemap = p[1].show_pathway_image kgml_map.draw(p[1].output_stem + '_transparency.pdf')
def getPathway(pathName): try: response = requests.get('http://rest.kegg.jp/get/' + pathName + '/kgml') except requests.exceptions.HTTPError as err: raise SystemExit(err) pathway = read(response.text) return pathway
def test_render_KGML_basic(self): """Basic rendering of KGML: write to PDF without modification.""" # We test rendering of the original KEGG KGML using only local # files. for p in self.data: with open(p.infilename, 'rU') as f: pathway = read(f) pathway.image = p.pathway_image kgml_map = KGMLCanvas(pathway) kgml_map.import_imagemap = p.show_pathway_image kgml_map.draw(p.output_stem + '_original.pdf')
def test_render_KGML_import_map(self): """Basic rendering of KGML: use imported imagemap. Uses the URL indicated in the .xml file. This test may fail if the imagemap is not available (e.g. if there is not a web connection), and may look odd if the remote imagemap has changed since the local KGML file was downloaded. """ # We test rendering of the original KEGG KGML using imported files for p in self.data: with open(p.infilename) as f: pathway = read(f) kgml_map = KGMLCanvas(pathway, import_imagemap=True) kgml_map.draw(p.output_stem + "_importmap.pdf")
def test_render_KGML_import_map(self): """Basic rendering of KGML: use imported imagemap Uses the URL indicated in the .xml file. This test may fail if the imagemap is not available (e.g. if there is not a web connection), and may look odd if the remote imagemap has changed since the local KGML file was downloaded. """ # We test rendering of the original KEGG KGML using imported files for p in self.data: with open(p.infilename, 'rU') as f: pathway = read(f) kgml_map = KGMLCanvas(pathway, import_imagemap=True) kgml_map.draw(p.output_stem + '_importmap.pdf')
def parse(self, file): def _set_gene(way, path): gene_name = [ e.name for e in way.entries.values() if e.type == 'gene' ] ent_id = [e.id for e in way.entries.values() if e.type == 'gene'] name = _parse_name(gene_name) for i in range(len(name)): path[ent_id[i]] = Node(name[i]) def _parse_name(nodes): names = [] for node in nodes: names.append(node.split()) for i in range(0, len(names)): temp = [] for name in names[i]: temp.append(name.split(':')[1]) names[i] = temp return names def _set_weight(way, path): for relation in way.relations: if relation.entry1.type == 'gene': path[relation.entry1.id].weight += 1 if relation.entry2.type == 'gene': path[relation.entry2.id].weight += 1 def _change_keys(path): i = 0 for key in list(path.keys()): path[i] = path.pop(key) i = i + 1 f = open('D:/NIR/bernmix_method/hsaFiles/' + file) pathway = read(f, 'r') _set_gene(pathway, self.path) _set_weight(pathway, self.path) _change_keys(self.path)
def init(self): with open(self.ko_list_path) as h: for koline in h.readlines(): ko, desc = koline.replace("#160;", "").split("\t") gene = desc.split(";")[0] desc2 = ";".join(desc.split(";")[1:]) desc = desc2.strip().split("[EC:")[0] ecs = None if len(desc2.split("[EC:")) > 1: ecs = desc2.split("[EC:")[1].replace("]", "") ecs = ["ec:" + x for x in ecs.split()] self.ko_dict[ko]["genes"] = gene.split() self.ko_dict[ko]["ecs"] = ecs self.ko_dict[ko]["desc"] = desc self.ko_dict[ko]["pathways"] = [] for kgml_path in glob.glob(self.kgmls_dir + "/*.kgml"): if os.path.getsize(kgml_path) > 100: kgml = read(open(kgml_path)) pw_name = "kegg_pw:" + kgml.name.split(":ko")[1] ko_react = defaultdict(lambda: []) for ortholog in kgml.orthologs: for ko in ortholog.name.split(): if ":" in ko and "pathways" in self.ko_dict[ko]: self.ko_dict[ko]["pathways"].append(pw_name) if ortholog.reaction: ko_react[ko.split(":")[1]].append( ortholog.reaction) self.pw_dict[pw_name] = { "name": pw_name, "title": kgml.title, "reactions": dict(ko_react) } self.ko_dict = dict(self.ko_dict)
def __init__(self, filePath): self.compoundsGraph = {} self.bioKGML = read(open(filePath, 'r')) self._fillGraph()
def parse_kegg(org_id, strain, sourcedb, session): # get pathways for organism specified by org_id pathways = kegg_list(database='pathway', org=org_id).read().split('path:') path_ids = [] # make list of path ids to iterate through for path in pathways: if path != '': path_ids.append(path[:8]) # iterate through each path and obtain interactions for path in path_ids: # get kgml representation of path kgml_path = read(kegg_get(path, option='kgml')) path_name = kgml_path._getname() # dictionary of compounds in current path (node_id: kegg_id) # compound._getid() returns node id (only relevant in context of current path) # compound._getname() returns kegg id (relevant in overall KEGG DB) compound_ids = {} for compound in kgml_path.compounds: compound_ids[compound._getid()] = compound._getname()[-6:] # go through each relation in path for relation in kgml_path.relations: relation_type = relation.element.attrib['type'] # ignore maplink relations if relation_type == 'maplink': continue # relation._getentry1/2() returns protein id (locus) or compound id (KEGG id) entries = [relation._getentry1()._getname(), relation._getentry2()._getname()] # if one or both interactors are listed as undefined, move on to next interaction if (entries[0] == 'undefined') | (entries[1] == 'undefined'): continue # list to hold existing interactors interactors = [[], []] # list to hold new metabolite ids for interactions with metabolites not yet in the database new_metabolites = [[], []] # go through each entry in the relation for num in range(0, 2): # each entry may contain >1 id; go through all of them for id in entries[num].split(' '): if id == '': continue # if interactor is not protein or compound, continue if (id.split(':')[0] != org_id) & (id.split(':')[1] not in kegg_compounds): continue # check if the id is a kegg id by searching in kegg_compounds kegg_id= None if id.split(':')[1] in kegg_compounds: kegg_id = id.split(':')[1] # check if interactor (protein) already exists if (kegg_id is None) & (org_id != 'eco'): interactor = session.query(Interactor).get(id.split(':')[1]) if interactor is not None: # make sure to add None value; this will be needed to create interaction reference later # None is appended rather than the interactor id because the interactor is not an ortholog interactors[num].append([interactor, None]) # if it doesnt exist, it's not a valid protein, so check if it is a valid compound elif kegg_id is not None: interactor = session.query(Metabolite).filter_by(kegg = kegg_id).first() # if metabolite with id was not found, append the kegg_id to new_metabolites to create if interactor is None: new_metabolites[num].append(kegg_id) else: # if the metabolite was found, add it to the existing interactor list interactors[num].append([interactor, interactor.id]) # if parsing E. coli path, add all orthologs to interactor list elif org_id == 'eco': for ortholog in session.query(OrthologEcoli).filter_by(ortholog_id = id.split(':')[1], strain_protein = strain).all(): if ortholog is not None: # add the id of the ecoli protein for the interaction reference later interactors[num].append([ortholog.protein, id.split(':')[1]]) # create list of interactor pairs from two separate lists interactor_pairs = [] # create interactor pairs from interactors which already exist in db for interactor1 in interactors[0]: for interactor2 in interactors[1]: if (interactor1[0].type != 'm') | (interactor2[0].type != 'm'): interactor_pairs.append([interactor1, interactor2]) # create interactor pair from interactors and new metabolites for interactor1 in interactors[0]: for id in new_metabolites[1]: # ignore interactor pairs which would result in m-m interactions if interactor1[0].type == 'm': continue # Note: can query metabolite with kegg only because we updated the metabolite info first metabolite = session.query(Metabolite).filter_by(kegg = id).first() if metabolite is None: metabolite = Metabolite(id = id, kegg = id, pubchem = kegg_compounds[id]['pubchem'], chebi = kegg_compounds[id]['chebi']) session.add(metabolite) interactor_pairs.append([interactor1, [metabolite, metabolite.id]]) for interactor1 in interactors[1]: for id in new_metabolites[0]: if interactor1[0].type == 'm': continue metabolite = session.query(Metabolite).filter_by(kegg = id).first() if metabolite is None: metabolite = Metabolite(id = id, kegg = id, pubchem = kegg_compounds[id]['pubchem'], chebi = kegg_compounds[id]['chebi']) session.add(metabolite) interactor_pairs.append([interactor1, [metabolite, metabolite.id]]) # if no interactor pairs were found, move on the the next interaction if len(interactor_pairs) == 0: continue # get all intermediates in reaction of type compound intermeds = [] for subtype in relation.element.iter(tag='subtype'): # if the subtype element is a compound, get its node id if 'compound' in subtype.attrib: compound_node_id = subtype.attrib['compound'] if compound_node_id is None: continue # if the node id was not stored in the compound ids for this path, move on to the next sybtype if int(compound_node_id) not in compound_ids: continue # if compound id is valid, either add existing matching metabolite or create new one and add kegg_id = compound_ids[int(compound_node_id)] metabolite = session.query(Metabolite).filter_by(kegg = kegg_id).first() if metabolite is None: metabolite = Metabolite(id=kegg_id, name=kegg_compounds[kegg_id]['name'], pubchem=kegg_compounds[kegg_id]['pubchem'], chebi=kegg_compounds[kegg_id]['chebi'], kegg=kegg_id) session.add(metabolite) intermeds.append([metabolite, metabolite.id]) # add protein - intermediate interactor pairs for interactor_list in interactors: for interactor in interactor_list: if interactor[0].type != 'm': for intermed in intermeds: interactor_pairs.append([interactor, intermed]) # go through each interaction pair and add interaction if it doesnt exist yet for interactor_pair in interactor_pairs: homogenous = (interactor_pair[0][0] == interactor_pair[1][0]) interaction = session.query(Interaction).filter(Interaction.interactors.contains(interactor_pair[0][0]), Interaction.interactors.contains(interactor_pair[1][0]), Interaction.homogenous == homogenous).first() source = session.query(InteractionSource).filter_by(data_source=sourcedb).first() #create interaction if it doesnt exist yet, add source to its sources if it isn't already if interaction is None: interaction = Interaction(type=interactor_pair[0][0].type + '-' + interactor_pair[1][0].type, strain=strain, homogenous=homogenous, interactors=[interactor_pair[0][0], interactor_pair[1][0]]) interaction.sources.append(source) if org_id == 'eco': interaction.ortholog_derived = 'Ecoli' session.add(interaction), session.commit() elif source not in interaction.sources: interaction.sources.append(source) # in case the interaction already existed, make sure interactor_a and interactor_b variables for # new interaction reference match up with the first and second interactors of the existing # interaction (so it's easy to see which Pseudomonas interactor matches up with which Ecoli # ortholog if the org id is eco) interactor_a, interactor_b = None, None if org_id == 'eco': if interaction.interactors[0] == interactor_pair[0][0]: interactor_a = interactor_pair[0][1] interactor_b = interactor_pair[1][1] else: interactor_b = interactor_pair[0][1] interactor_a = interactor_pair[1][1] # search for reference reference = session.query(InteractionReference).filter_by(source_db='kegg', comment='in ' + path_name + ' path', interactor_a=interactor_a, interactor_b=interactor_b).first() # if the reference doesnt exist, create it, add it to the interaction's references and add the source # to the reference's sources if reference is None: reference = InteractionReference(source_db='kegg', comment='in ' + path_name + ' path', interactor_a=interactor_a, interactor_b=interactor_b) interaction.references.append(reference) reference.sources.append(source) # if the reference does exist, add it to the interaction's reference list and add the source to the # reference's source list if it isn't there already else: if interaction not in reference.interactions: reference.interactions.append(interaction) if source not in reference.sources: reference.sources.append(source) session.commit() print(sourcedb, session.query(Interaction).count())
def pathwayId2PathwayInfo(pid): url2 = 'curl http://rest.kegg.jp/get/' + pid + '/kgml' out = os.system(url2 + "> pathway.xml") pathway = read(open("pathway.xml")) return pathway