def scores(self, id1, id2): """Returns the score between two given GO terms. :arg id1, identifier of a GO term (ie: GO:0043231, or whatever identifier is in your ontology). :arg id2, identifier of a GO term (ie: GO:0043229, or whatever identifier is in your ontology). """ golib = PyGoLib(self.goterms) #golib.fix_go_graph() goterm1 = self.goterms[id1] path1 = golib.get_path(goterm1, pred=goterm1['id'], paths=[], details=True) #print goterm1['id'], len(path1) ancester1 = _get_all_ancesters(path1) semantic_values1 = self.semantic_values(goterm1['id']) goterm2 = self.goterms[id2] path2 = golib.get_path(goterm2, pred=goterm2['id'], paths=[], details=True) #print goterm2['id'], len(path2) ancester2 = _get_all_ancesters(path2) semantic_values2 = self.semantic_values(goterm2['id']) common_ancester = list(set(ancester1).intersection(set(ancester2))) sum_comm_anc = 0 for ancester in common_ancester: sum_comm_anc = sum_comm_anc + semantic_values2[ancester] + \ semantic_values1[ancester] score = sum_comm_anc / (sum(semantic_values1.values()) + sum(semantic_values2.values())) return score
def scores(self, id1, id2): """Returns the score between two given GO terms. :arg id1, identifier of a GO term (ie: GO:0043231, or whatever identifier is in your ontology). :arg id2, identifier of a GO term (ie: GO:0043229, or whatever identifier is in your ontology). """ golib = PyGoLib(self.goterms) goterm1 = self.goterms[id1] path1 = golib.get_path(goterm1, pred=goterm1['id'], paths=[]) goterm2 = self.goterms[id2] path2 = golib.get_path(goterm2, pred=goterm2['id'], paths=[]) # We use goterm['id'] instead of the id provided to take into # account alt_id which are in the list of goterms but not in the # paths. Via goterm['id'] we get the 'normal' GO term identifier. scores = self.__score_parents(goterm1['id'], goterm2['id'], path1, path2) if scores: score = min(scores) self.log.debug("%s and %s are parents" % (id1, id2)) return (score, score) else: score = self.__score_cousins(goterm1['id'], goterm2['id'], path1, path2) return score
def semantic_values(self, id1): """ Returns the semantic values of all the parents of a given term. :arg id1, identifier of a GO term (ie: GO:0043231, or whatever identifier is in your ontology). """ golib = PyGoLib(self.goterms) goterm1 = self.goterms[id1] path1 = golib.get_path(goterm1, pred=goterm1['id'], paths=[], details=True) semantic_values = {} for ancester in _get_all_ancesters(path1): if ancester == goterm1['id']: semantic_values[ancester] = 1 continue tmp_score = [] for item in path1: tmp_cnt = 1 if ancester in item: path_el = item.split(',') ind = path_el.index(ancester) for step in range(ind - 1, 0, -2): if path_el[step] == 'is_a': tmp_cnt = tmp_cnt * 0.8 elif path_el[step] == 'part_of': tmp_cnt = tmp_cnt * 0.6 tmp_score.append(tmp_cnt) semantic_values[ancester] = max(tmp_score) return semantic_values
class GoDistanceCounter(object): """ This class is the main class of the project to compute the distance between two GO terms. """ def __init__(self, data=None): """ Constructor. :arg data, the graph of ontologies """ self.goterms = data if self.goterms is None: self.goterms = {} self.log = get_logger() self.pygo = PyGoLib(self.goterms) def __score_cousins(self, goid1, goid2, path1=None, path2=None): """ For two given GO term ID and the list of their path, return the score between them. :arg goid1, GO term ID (ie: GO:0043231, or whatever identifier is in your ontology). :arg goid2, GO term ID (ie: GO:0043229, or whatever identifier is in your ontology). :kwarg path1, the list path from the first GO term to the top of the tree, as returned by get_path(). :kwarg path2, the list path from the second GO term to the top of the tree, as returned by get_path(). """ if path1 is None: path1 = self.pygo.get_path(self.goterms[goid1]) if path2 is None: path2 = self.pygo.get_path(self.goterms[goid2]) mindist = None deltalevel = None for path in path1: step1 = path.split(',') for opath in path2: step2 = opath.split(',') inter = _get_ancester(step1, step2) if inter: index1 = step1.index(inter) index2 = step2.index(inter) dist = index1 + index2 deltaleveltmp = abs(index1 - index2) if not mindist or dist < mindist: mindist = dist deltalevel = deltaleveltmp if mindist is not None and deltalevel is not None: return (mindist, deltalevel) else: return None def __score_parents(self, goid1, goid2, path1=None, path2=None): """ For two given GO term ID and the list of their path, return the score between them if one is parent of the other. :arg goid1, GO term ID (ie: GO:0043231, or whatever identifier is in your ontology). :arg goid2, GO term ID (ie: GO:0043229, or whatever identifier is in your ontology). :kwarg path1, the list path from the first GO term to the top of the tree, as returned by get_path(). :kwarg path2, the list path from the second GO term to the top of the tree, as returned by get_path(). """ if path1 is None: path1 = self.pygo.get_path(self.goterms[goid1]) scores = [] for paths in path1: steps = paths.split(',') if goid2 in steps: start = steps.index(goid1) stop = steps.index(goid2) score = abs(stop - start) scores.append(score) if path2 is None: path2 = self.pygo.get_path(self.goterms[goid2]) for paths in path2: steps = paths.split(',') if goid1 in steps: start = steps.index(goid1) stop = steps.index(goid2) score = abs(stop - start) scores.append(score) return scores def scores(self, id1, id2): """Returns the score between two given GO terms. :arg id1, identifier of a GO term (ie: GO:0043231, or whatever identifier is in your ontology). :arg id2, identifier of a GO term (ie: GO:0043229, or whatever identifier is in your ontology). """ golib = PyGoLib(self.goterms) goterm1 = self.goterms[id1] path1 = golib.get_path(goterm1, pred=goterm1['id'], paths=[]) goterm2 = self.goterms[id2] path2 = golib.get_path(goterm2, pred=goterm2['id'], paths=[]) # We use goterm['id'] instead of the id provided to take into # account alt_id which are in the list of goterms but not in the # paths. Via goterm['id'] we get the 'normal' GO term identifier. scores = self.__score_parents(goterm1['id'], goterm2['id'], path1, path2) if scores: score = min(scores) self.log.debug("%s and %s are parents" % (id1, id2)) return (score, score) else: score = self.__score_cousins(goterm1['id'], goterm2['id'], path1, path2) return score