Esempio n. 1
0
 def _parse_relation(element):
     new_relation = Relation()
     new_relation.entry1 = int(element.attrib['entry1'])
     new_relation.entry2 = int(element.attrib['entry2'])
     new_relation.type = element.attrib['type']
     for subtype in element.getchildren():
         name, value = subtype.attrib['name'], subtype.attrib['value']
         if name in ('compound', 'hidden compound'):
             new_relation.subtypes.append((name, int(value)))
         else:
             new_relation.subtypes.append((name, value))
     self.pathway.add_relation(new_relation)
Esempio n. 2
0
 def _parse_relation(element):
     new_relation = Relation()
     new_relation.entry1 = int(element.attrib["entry1"])
     new_relation.entry2 = int(element.attrib["entry2"])
     new_relation.type = element.attrib["type"]
     for subtype in element:
         name, value = subtype.attrib["name"], subtype.attrib["value"]
         if name in ("compound", "hidden compound"):
             new_relation.subtypes.append((name, int(value)))
         else:
             new_relation.subtypes.append((name, value))
     self.pathway.add_relation(new_relation)
Esempio n. 3
0
 def _parse_relation(element):
     new_relation = Relation()
     new_relation.entry1 = int(element.attrib['entry1'])
     new_relation.entry2 = int(element.attrib['entry2'])
     new_relation.type = element.attrib['type']
     for subtype in element.getchildren():
         name, value = subtype.attrib['name'], subtype.attrib['value']
         if name in ('compound', 'hidden compound'):
             new_relation.subtypes.append((name, int(value)))
         else:
             new_relation.subtypes.append((name, value))
     self.pathway.add_relation(new_relation)
Esempio n. 4
0
def prune_kgml(pathway):
    """
    prune_KGML prune given kgml pathway
      This function prunes the given entries and relations by deleting
      empty entries and corresponding relations, and empty relations and
      corresponding entries. Further pruning is done for our case. We delete
      inner pathways, complex components, PCrel (by inserting edge among
      nodes it connects), maplink, orthogonals
      Inputs:
        pathway:
            entries: index map of entries
            relations: list of relations
      Outputs:
          entries: pruned entries
          relations: pruned relations
          new_relations: new relations added in place of removed compounds
    """

    # copy relations as list and entries as dict
    entries = dict([(e.id, e) for e in pathway.entries.values()])
    relations = [r for r in pathway.relations]
    new_relations = []

    # replace groups with their components and relations between components
    for e in [e for e in entries.values() if e.type == 'group']:
        comp_eids = [comp.id for comp in list(e.components)]
        for e1_id in comp_eids:
            for e2_id in comp_eids:
                if can_add_relation(e1_id, e2_id, relations, new_relations):
                    new_relations.append((e1_id, e2_id))

    # prune orthologs, groups, other, map in entries
    # if any "other" type of entry is included, relations that has
    # these entries are removed from the relations
    type_filter = ['ortholog', 'group', 'other', 'map']
    relations = [r for r in relations if r.entry1.type not in type_filter and r.entry2.type not in type_filter]
    # remove orthologs, groups, and others from entries
    for e in [e for e in entries.values() if e.type in type_filter]:
        del entries[e.id]

    # % prune compounds in entries
    # retrieve gene ids
    gene_ids = set([e.id for e in entries.values() if e.type == 'gene'])

    # for every compound
    for cid in [e.id for e in entries.values() if e.type == 'compound']:
        # find indexes of compounds in relations as source and
        # derive the related ids from it
        source_ids = set([r.entry1.id for r in relations if r.entry2.id == cid])
        source_set = source_ids & gene_ids  # take just genes, ignore others

        # find indexes of compounds in relations as destination
        # and derive the related ids from it
        dest_ids = set([r.entry2.id for r in relations if r.entry1.id == cid])
        dest_set = dest_ids & gene_ids  # take just genes, ignore others

        # create the relation among source and destination if
        # it is not self-relation and if the relation does not already
        # exist in the relations
        # counter = 0
        if not source_ids or not dest_ids:
            continue
        print('src =', source_ids, 'dst =', dest_ids)
        for tmp_s in source_set:
            for tmp_d in dest_set:
                # check if the relation already exists
                if can_add_relation(tmp_s, tmp_d, relations, new_relations):
                    new_relations.append((tmp_s, tmp_d))
                else:
                    print([tmp_s, tmp_d], 'already in relations')
        # removed relations containing this compound entry
        relations = [r for r in relations if r.entry1.id != cid and r.entry2.id != cid]

        # remove compounds from entries
        del entries[cid]

    # turn new relations to relation objects
    new_rel = []
    for s, d in new_relations:
        r = Relation()
        r._entry1 = entries[s]
        r._entry2 = entries[d]
        r._pamogk = True  # to mark relation as added by PAMOGK
        new_rel.append(r)
    relations += new_rel

    return entries, relations