Example #1
0
    def test_read_and_write_KGML_files(self):
        """Read KGML from, and write KGML to, local files.

        Check we read/write the correct number of elements.
        """
        for p in self.data:
            # Test opening file
            with open(p.infilename) as f:
                pathway = read(f)
                # Do we have the correct number of elements of each type
                self.assertEqual(
                    (
                        len(pathway.entries),
                        len(pathway.orthologs),
                        len(pathway.compounds),
                        len(pathway.maps),
                    ),
                    p.element_counts,
                )
            # Test writing file
            with open(p.outfilename, "w") as f:
                f.write(pathway.get_KGML())
            # Can we read the file we wrote?
            with open(p.outfilename) as f:
                pathway = read(f)
                # Do we have the correct number of elements of each type
                self.assertEqual(
                    (
                        len(pathway.entries),
                        len(pathway.orthologs),
                        len(pathway.compounds),
                        len(pathway.maps),
                    ),
                    p.element_counts,
                )
    def test_read_and_write_KGML_files(self):
        """Read KGML from, and write KGML to, local files.

        Check we read/write the correct number of elements.
        """
        for p in self.data:
            # Test opening file
            with open(p.infilename, 'rU') as f:
                pathway = read(f)
                # Do we have the correct number of elements of each type
                self.assertEqual((len(pathway.entries),
                                  len(pathway.orthologs),
                                  len(pathway.compounds),
                                  len(pathway.maps)),
                                 p.element_counts)
            # Test writing file
            with open(p.outfilename, 'w') as f:
                f.write(pathway.get_KGML())
            # Can we read the file we wrote?
            with open(p.outfilename, 'rU') as f:
                pathway = read(f)
                # Do we have the correct number of elements of each type
                self.assertEqual((len(pathway.entries),
                                  len(pathway.orthologs),
                                  len(pathway.compounds),
                                  len(pathway.maps)),
                                 p.element_counts)
Example #3
0
 def test_render_KGML_modify(self):
     """Rendering of KGML to PDF, with modification."""
     # We test rendering of the original KGML for KO01100,
     # modifying line width for the lipid pathway
     p = self.data
     with open(p[0].infilename) as f:
         pathway = read(f)
         mod_rs = [
             e for e in pathway.orthologs
             if len(set(e.name.split()).intersection(self.ko_ids))
         ]
         for r in mod_rs:
             for g in r.graphics:
                 g.width = 10
         kgml_map = KGMLCanvas(pathway)
         kgml_map.draw(p[0].output_stem + '_widths.pdf')
     # We test rendering of the original KGML for KO3070,
     # modifying the reaction colours for each ortholog entry
     with open(p[1].infilename) as f:
         pathway = read(f)
         orthologs = [e for e in pathway.orthologs]
         # Use Biopython's ColorSpiral to generate colours
         cs = ColorSpiral(a=2, b=0.2, v_init=0.85, v_final=0.5, jitter=0.03)
         colors = cs.get_colors(len(orthologs))
         for o, c in zip(orthologs, colors):
             for g in o.graphics:
                 g.bgcolor = c
         kgml_map = KGMLCanvas(pathway)
         pathway.image = p[1].pathway_image
         kgml_map.import_imagemap = p[1].show_pathway_image
         kgml_map.draw(p[1].output_stem + '_colors.pdf')
 def test_render_KGML_modify(self):
     """ Rendering of KGML to PDF, with modification.
     """
     # We test rendering of the original KGML for KO01100,
     # modifying line width for the lipid pathway
     p = self.data
     with open(p[0].infilename) as f:
         pathway = read(f)
         mod_rs = [e for e in pathway.orthologs if
                 len(set(e.name.split()).intersection(self.ko_ids))]
         for r in mod_rs:
             for g in r.graphics:
                 g.width = 10
         kgml_map = KGMLCanvas(pathway)
         kgml_map.draw(p[0].output_stem + '_widths.pdf')
     # We test rendering of the original KGML for KO3070,
     # modifying the reaction colours for each ortholog entry
     with open(p[1].infilename) as f:
         pathway = read(f)
         orthologs = [e for e in pathway.orthologs]
         # Use Biopython's ColorSpiral to generate colours
         cs = ColorSpiral(a=2, b=0.2, v_init=0.85, v_final=0.5, 
                          jitter=0.03)
         colors = cs.get_colors(len(orthologs))
         for o, c in zip(orthologs, colors):
             for g in o.graphics:
                 g.bgcolor = c
         kgml_map = KGMLCanvas(pathway)
         pathway.image = p[1].pathway_image
         kgml_map.import_imagemap = p[1].show_pathway_image
         kgml_map.draw(p[1].output_stem + '_colors.pdf')
Example #5
0
 def test_render_KGML_transparency(self):
     """Rendering of KGML to PDF, with color alpha channel."""
     # We test rendering of the original KGML for KO01100,
     # modifying alpha channel for the lipid pathway
     p = self.data
     with open(p[0].infilename) as f:
         pathway = read(f)
         mod_rs = [e for e in pathway.orthologs if
                   len(set(e.name.split()).intersection(self.ko_ids))]
         for r in mod_rs:
             for g in r.graphics:
                 # Modify hex colour directly by appending alpha channel
                 # to hex string
                 g.fgcolor = g.fgcolor + "77"
                 g.width = 20
         kgml_map = KGMLCanvas(pathway)
         kgml_map.draw(p[0].output_stem + "_transparency.pdf")
     # We test rendering of the original KGML for KO3070,
     # modifying the alpha channel for each ortholog entry
     with open(p[1].infilename) as f:
         pathway = read(f)
         orthologs = list(pathway.orthologs)
         # Use Biopython's ColorSpiral to generate colours
         cs = ColorSpiral(a=2, b=0.2, v_init=0.85, v_final=0.5,
                          jitter=0.03)
         colors = cs.get_colors(len(orthologs))
         for o, c in zip(orthologs, colors):
             # Modify color tuples to add alpha channel
             c = c + (0.5, )
             for g in o.graphics:
                 g.bgcolor = c
         kgml_map = KGMLCanvas(pathway)
         pathway.image = p[1].pathway_image
         kgml_map.import_imagemap = p[1].show_pathway_image
         kgml_map.draw(p[1].output_stem + "_transparency.pdf")
 def test_render_KGML_transparency(self):
     """Rendering of KGML to PDF, with color alpha channel."""
     # We test rendering of the original KGML for KO01100,
     # modifying alpha channel for the lipid pathway
     p = self.data
     with open(p[0].infilename) as f:
         pathway = read(f)
         mod_rs = [e for e in pathway.orthologs if
                   len(set(e.name.split()).intersection(self.ko_ids))]
         for r in mod_rs:
             for g in r.graphics:
                 # Modify hex colour directly by appending alpha channel
                 # to hex string
                 g.fgcolor = g.fgcolor + "77"
                 g.width = 20
         kgml_map = KGMLCanvas(pathway)
         kgml_map.draw(p[0].output_stem + '_transparency.pdf')
     # We test rendering of the original KGML for KO3070,
     # modifying the alpha channel for each ortholog entry
     with open(p[1].infilename) as f:
         pathway = read(f)
         orthologs = [e for e in pathway.orthologs]
         # Use Biopython's ColorSpiral to generate colours
         cs = ColorSpiral(a=2, b=0.2, v_init=0.85, v_final=0.5,
                          jitter=0.03)
         colors = cs.get_colors(len(orthologs))
         for o, c in zip(orthologs, colors):
             # Modify color tuples to add alpha channel
             c = c + (0.5, )
             for g in o.graphics:
                 g.bgcolor = c
         kgml_map = KGMLCanvas(pathway)
         pathway.image = p[1].pathway_image
         kgml_map.import_imagemap = p[1].show_pathway_image
         kgml_map.draw(p[1].output_stem + '_transparency.pdf')
Example #7
0
def getPathway(pathName):
    try:
        response = requests.get('http://rest.kegg.jp/get/' + pathName +
                                '/kgml')
    except requests.exceptions.HTTPError as err:
        raise SystemExit(err)
    pathway = read(response.text)
    return pathway
Example #8
0
 def test_render_KGML_basic(self):
     """Basic rendering of KGML: write to PDF without modification."""
     # We test rendering of the original KEGG KGML using only local
     # files.
     for p in self.data:
         with open(p.infilename, 'rU') as f:
             pathway = read(f)
             pathway.image = p.pathway_image
             kgml_map = KGMLCanvas(pathway)
             kgml_map.import_imagemap = p.show_pathway_image
             kgml_map.draw(p.output_stem + '_original.pdf')
 def test_render_KGML_basic(self):
     """Basic rendering of KGML: write to PDF without modification."""
     # We test rendering of the original KEGG KGML using only local
     # files.
     for p in self.data:
         with open(p.infilename, 'rU') as f:
             pathway = read(f)
             pathway.image = p.pathway_image
             kgml_map = KGMLCanvas(pathway)
             kgml_map.import_imagemap = p.show_pathway_image
             kgml_map.draw(p.output_stem + '_original.pdf')
    def test_render_KGML_import_map(self):
        """Basic rendering of KGML: use imported imagemap.

        Uses the URL indicated in the .xml file.

        This test may fail if the imagemap is not available (e.g. if
        there is not a web connection), and may look odd if the remote
        imagemap has changed since the local KGML file was downloaded.
        """
        # We test rendering of the original KEGG KGML using imported files
        for p in self.data:
            with open(p.infilename) as f:
                pathway = read(f)
                kgml_map = KGMLCanvas(pathway, import_imagemap=True)
                kgml_map.draw(p.output_stem + "_importmap.pdf")
    def test_render_KGML_import_map(self):
        """Basic rendering of KGML: use imported imagemap

        Uses the URL indicated in the .xml file.

        This test may fail if the imagemap is not available (e.g. if
        there is not a web connection), and may look odd if the remote
        imagemap has changed since the local KGML file was downloaded.
        """
        # We test rendering of the original KEGG KGML using imported files
        for p in self.data:
            with open(p.infilename, 'rU') as f:
                pathway = read(f)
                kgml_map = KGMLCanvas(pathway, import_imagemap=True)
                kgml_map.draw(p.output_stem + '_importmap.pdf')
    def parse(self, file):
        def _set_gene(way, path):
            gene_name = [
                e.name for e in way.entries.values() if e.type == 'gene'
            ]
            ent_id = [e.id for e in way.entries.values() if e.type == 'gene']
            name = _parse_name(gene_name)
            for i in range(len(name)):
                path[ent_id[i]] = Node(name[i])

        def _parse_name(nodes):
            names = []
            for node in nodes:
                names.append(node.split())
            for i in range(0, len(names)):
                temp = []
                for name in names[i]:
                    temp.append(name.split(':')[1])
                names[i] = temp
            return names

        def _set_weight(way, path):

            for relation in way.relations:

                if relation.entry1.type == 'gene':
                    path[relation.entry1.id].weight += 1
                if relation.entry2.type == 'gene':
                    path[relation.entry2.id].weight += 1

        def _change_keys(path):
            i = 0
            for key in list(path.keys()):
                path[i] = path.pop(key)
                i = i + 1

        f = open('D:/NIR/bernmix_method/hsaFiles/' + file)
        pathway = read(f, 'r')
        _set_gene(pathway, self.path)
        _set_weight(pathway, self.path)
        _change_keys(self.path)
Example #13
0
    def init(self):

        with open(self.ko_list_path) as h:
            for koline in h.readlines():
                ko, desc = koline.replace("#160;", "").split("\t")
                gene = desc.split(";")[0]
                desc2 = ";".join(desc.split(";")[1:])
                desc = desc2.strip().split("[EC:")[0]
                ecs = None
                if len(desc2.split("[EC:")) > 1:
                    ecs = desc2.split("[EC:")[1].replace("]", "")
                    ecs = ["ec:" + x for x in ecs.split()]
                self.ko_dict[ko]["genes"] = gene.split()
                self.ko_dict[ko]["ecs"] = ecs
                self.ko_dict[ko]["desc"] = desc
                self.ko_dict[ko]["pathways"] = []

        for kgml_path in glob.glob(self.kgmls_dir + "/*.kgml"):
            if os.path.getsize(kgml_path) > 100:
                kgml = read(open(kgml_path))
                pw_name = "kegg_pw:" + kgml.name.split(":ko")[1]

                ko_react = defaultdict(lambda: [])

                for ortholog in kgml.orthologs:

                    for ko in ortholog.name.split():

                        if ":" in ko and "pathways" in self.ko_dict[ko]:
                            self.ko_dict[ko]["pathways"].append(pw_name)
                            if ortholog.reaction:
                                ko_react[ko.split(":")[1]].append(
                                    ortholog.reaction)

                self.pw_dict[pw_name] = {
                    "name": pw_name,
                    "title": kgml.title,
                    "reactions": dict(ko_react)
                }
        self.ko_dict = dict(self.ko_dict)
Example #14
0
 def __init__(self, filePath):
     self.compoundsGraph = {}
     self.bioKGML = read(open(filePath, 'r'))
     self._fillGraph()
Example #15
0
def parse_kegg(org_id, strain, sourcedb, session):
    # get pathways for organism specified by org_id
    pathways = kegg_list(database='pathway', org=org_id).read().split('path:')
    path_ids = []

    # make list of path ids to iterate through
    for path in pathways:
        if path != '':
            path_ids.append(path[:8])

    # iterate through each path and obtain interactions
    for path in path_ids:
        # get kgml representation of path
        kgml_path = read(kegg_get(path, option='kgml'))
        path_name = kgml_path._getname()
        # dictionary of compounds in current path (node_id: kegg_id)
        #   compound._getid() returns node id (only relevant in context of current path)
        #   compound._getname() returns kegg id (relevant in overall KEGG DB)
        compound_ids = {}
        for compound in kgml_path.compounds:
            compound_ids[compound._getid()] = compound._getname()[-6:]
        # go through each relation in path
        for relation in kgml_path.relations:
            relation_type = relation.element.attrib['type']

            # ignore maplink relations
            if relation_type == 'maplink': continue
            # relation._getentry1/2() returns  protein id (locus) or compound id (KEGG id)
            entries = [relation._getentry1()._getname(), relation._getentry2()._getname()]
            # if one or both interactors are listed as undefined, move on to next interaction
            if (entries[0] == 'undefined') | (entries[1] == 'undefined'): continue
            # list to hold existing interactors
            interactors = [[], []]
            # list to hold new metabolite ids for interactions with metabolites not yet in the database
            new_metabolites = [[], []]
            # go through each entry in the relation
            for num in range(0, 2):
                # each entry may contain >1 id; go through all of them
                for id in entries[num].split(' '):
                    if id == '': continue
                    # if interactor is not protein or compound, continue
                    if (id.split(':')[0] != org_id) & (id.split(':')[1] not in kegg_compounds): continue

                    # check if the id is a kegg id by searching in kegg_compounds
                    kegg_id= None
                    if id.split(':')[1] in kegg_compounds:
                        kegg_id = id.split(':')[1]

                    # check if interactor (protein) already exists
                    if (kegg_id is None) & (org_id != 'eco'):
                        interactor = session.query(Interactor).get(id.split(':')[1])
                        if interactor is not None:
                            # make sure to add None value; this will be needed to create interaction reference later
                            # None is appended rather than the interactor id because the interactor is not an ortholog
                            interactors[num].append([interactor, None])
                    # if it doesnt exist, it's not a valid protein, so check if it is a valid compound
                    elif kegg_id is not None:
                        interactor = session.query(Metabolite).filter_by(kegg = kegg_id).first()
                        # if metabolite with id was not found, append the kegg_id to new_metabolites to create
                        if interactor is None:
                            new_metabolites[num].append(kegg_id)
                        else:
                            # if the metabolite was found, add it to the existing interactor list
                            interactors[num].append([interactor, interactor.id])
                    # if parsing E. coli path, add all orthologs to interactor list
                    elif org_id == 'eco':
                        for ortholog in session.query(OrthologEcoli).filter_by(ortholog_id = id.split(':')[1],
                                                                                strain_protein = strain).all():
                            if ortholog is not None:
                                # add the id of the ecoli protein for the interaction reference later
                                interactors[num].append([ortholog.protein, id.split(':')[1]])

            # create list of interactor pairs from two separate lists
            interactor_pairs = []
            # create interactor pairs from interactors which already exist in db
            for interactor1 in interactors[0]:
                for interactor2 in interactors[1]:
                    if (interactor1[0].type != 'm') | (interactor2[0].type != 'm'):
                        interactor_pairs.append([interactor1, interactor2])
            # create interactor pair from interactors and new metabolites
            for interactor1 in interactors[0]:
                for id in new_metabolites[1]:
                    # ignore interactor pairs which would result in m-m interactions
                    if interactor1[0].type == 'm': continue
                    # Note: can query metabolite with kegg only because we updated the metabolite info first
                    metabolite = session.query(Metabolite).filter_by(kegg = id).first()
                    if metabolite is None:
                        metabolite = Metabolite(id = id, kegg = id, pubchem = kegg_compounds[id]['pubchem'],
                                                chebi = kegg_compounds[id]['chebi'])
                        session.add(metabolite)
                    interactor_pairs.append([interactor1, [metabolite, metabolite.id]])
            for interactor1 in interactors[1]:
                for id in new_metabolites[0]:
                    if interactor1[0].type == 'm': continue
                    metabolite = session.query(Metabolite).filter_by(kegg = id).first()
                    if metabolite is None:
                        metabolite = Metabolite(id = id, kegg = id, pubchem = kegg_compounds[id]['pubchem'],
                                                chebi = kegg_compounds[id]['chebi'])
                        session.add(metabolite)
                    interactor_pairs.append([interactor1, [metabolite, metabolite.id]])

            # if no interactor pairs were found, move on the the next interaction
            if len(interactor_pairs) == 0: continue

            # get all intermediates in reaction of type compound
            intermeds = []
            for subtype in relation.element.iter(tag='subtype'):
                # if the subtype element is a compound, get its node id
                if 'compound' in subtype.attrib:
                    compound_node_id = subtype.attrib['compound']
                    if compound_node_id is None: continue
                    # if the node id was not stored in the compound ids for this path, move on to the next sybtype
                    if int(compound_node_id) not in compound_ids: continue
                    # if compound id is valid, either add existing matching metabolite or create new one and add
                    kegg_id = compound_ids[int(compound_node_id)]
                    metabolite = session.query(Metabolite).filter_by(kegg = kegg_id).first()
                    if metabolite is None:
                        metabolite = Metabolite(id=kegg_id, name=kegg_compounds[kegg_id]['name'],
                                                pubchem=kegg_compounds[kegg_id]['pubchem'],
                                                chebi=kegg_compounds[kegg_id]['chebi'], kegg=kegg_id)
                        session.add(metabolite)
                    intermeds.append([metabolite, metabolite.id])

            # add protein - intermediate interactor pairs
            for interactor_list in interactors:
                for interactor in interactor_list:
                    if interactor[0].type != 'm':
                        for intermed in intermeds:
                            interactor_pairs.append([interactor, intermed])

            # go through each interaction pair and add interaction if it doesnt exist yet
            for interactor_pair in interactor_pairs:
                homogenous = (interactor_pair[0][0] == interactor_pair[1][0])
                interaction = session.query(Interaction).filter(Interaction.interactors.contains(interactor_pair[0][0]),
                                                                Interaction.interactors.contains(interactor_pair[1][0]),
                                                                Interaction.homogenous == homogenous).first()

                source = session.query(InteractionSource).filter_by(data_source=sourcedb).first()
                #create interaction if it doesnt exist yet, add source to its sources if it isn't already
                if interaction is None:
                    interaction = Interaction(type=interactor_pair[0][0].type + '-' + interactor_pair[1][0].type,
                                              strain=strain, homogenous=homogenous,
                                              interactors=[interactor_pair[0][0], interactor_pair[1][0]])
                    interaction.sources.append(source)
                    if org_id == 'eco':
                        interaction.ortholog_derived = 'Ecoli'
                    session.add(interaction), session.commit()
                elif source not in interaction.sources:
                    interaction.sources.append(source)

                # in case the interaction already existed, make sure interactor_a and interactor_b variables for
                # new interaction reference match up with the first and second interactors of the existing
                # interaction (so it's easy to see which Pseudomonas interactor matches up with which Ecoli
                # ortholog if the org id is eco)
                interactor_a, interactor_b = None, None
                if org_id == 'eco':
                    if interaction.interactors[0] == interactor_pair[0][0]:
                        interactor_a = interactor_pair[0][1]
                        interactor_b = interactor_pair[1][1]
                    else:
                        interactor_b = interactor_pair[0][1]
                        interactor_a = interactor_pair[1][1]

                # search for reference
                reference = session.query(InteractionReference).filter_by(source_db='kegg',
                                                                          comment='in ' + path_name + ' path',
                                                                          interactor_a=interactor_a,
                                                                          interactor_b=interactor_b).first()
                # if the reference doesnt exist, create it, add it to the interaction's references and add the source
                # to the reference's sources
                if reference is None:
                    reference = InteractionReference(source_db='kegg', comment='in ' + path_name + ' path',
                                                     interactor_a=interactor_a, interactor_b=interactor_b)
                    interaction.references.append(reference)
                    reference.sources.append(source)
                # if the reference does exist, add it to the interaction's reference list and add the source to the
                # reference's source list if it isn't there already
                else:
                    if interaction not in reference.interactions:
                        reference.interactions.append(interaction)
                    if source not in reference.sources:
                        reference.sources.append(source)

    session.commit()
    print(sourcedb, session.query(Interaction).count())
Example #16
0
def pathwayId2PathwayInfo(pid):
    url2 = 'curl http://rest.kegg.jp/get/' + pid + '/kgml'
    out = os.system(url2 + "> pathway.xml")
    pathway = read(open("pathway.xml"))
    return pathway