def __init__(self, fin_godag, num_calcs, relationships, w_e, seed, prt):
     tic = timeit.default_timer()
     self.godag = get_godag(fin_godag,
                            optional_attrs=['relationship'],
                            prt=prt)
     tic = prt_hms(tic, 'GOATOOLS read godag')
     # Needed because pysemsim not understand cygwin pathes
     self.graph = graph.from_resource(splitext(fin_godag)[0])
     tic = prt_hms(tic, 'pygosemsim read godag')
     self.seedobj = RandomSeed32(seed)
     self.goids = self._init_goids(num_calcs)
     tic = timeit.default_timer()
     self.wang = SsWang(self.goids, self.godag, relationships, w_e)
     self.go2reldepth = get_go2reldepth(
         {self.godag[go]
          for go in self.godag}, relationships)
     tic = prt_hms(tic, 'GOATOOLS wang setup')
Beispiel #2
0
## MAJOR DRAWBACKS OF CURRENT METHODS ARE:

from pygosemsim import download
from pygosemsim import graph
from pygosemsim import similarity
import numpy as np
import csv
import networkx as nx
import sys

# get go database
#download.download("go-basic", "http://current.geneontology.org/ontology/go.obo")

# load go graph from downloaded file
#G = graph.from_resource("/usr/local/lib/python3.9/site-packages/pygosemsim/_resources/go-basic")
G = graph.from_resource("/root/snpXplorer/AnnotateMe/INPUTS_OTHER/20220510_go")
term_list = list(G)

# take precalculated lower bounds
similarity.precalc_lower_bounds(G)

# calculate semantic similarity -- this is an example
# similarity.lin(G, "GO:0004340", "GO:0019158")

# read file with all go terms and corresponding pvalues -- take only the significant ones
go_list = []
go_p = {}
fname = sys.argv[1]
with open(fname) as finp:
    for line in finp:
        line = line.rstrip().split()
def main():
    #download.obo("go-basic")
    #download.gaf("sgd")

    G = nx.read_gpickle(gpath)
    nodes = sorted(G.nodes())
    node_ix = dict(zip(nodes, range(len(nodes))))

    G = graph.from_resource("go-basic")

    df = pd.read_csv(myers2006path, sep="\t")

    df['namespace'] = [G.nodes[n]['namespace'] if n in G else None for n in df['GO ID']]
    print(df)

    ix = (df['namespace'] == 'biological_process') & (df['# of S. cerevisiae annotations (direct and indirect)'] > 3)
    df = df[ix]
    
    eligible_terms = set(df['GO ID'])
    print("%d eligible terms" % len(eligible_terms))

    sgd_to_locus = feature_preprocessing.pairwise_go_semsim.read_names("../data-sources/yeast/names.txt")


    annot = annotation.from_resource("sgd")
    keys = list(annot.keys())

    translated_keys = {}
    nodes_to_terms = {}
    for key in keys:
        if key not in sgd_to_locus:
            continue 

        locus = sgd_to_locus[key]
        unified = get_unified_name(locus)
        if unified in node_ix:
            translated_keys[key] = node_ix[unified]
            node_terms = [t for t in annot[key]["annotation"].keys()]
            nodes_to_terms[key] = set(node_terms) & eligible_terms

    keys = list(translated_keys.keys())
    print(len(keys))

    #print(np.max([len(s) for s in nodes_to_terms.values()]))
    F = np.zeros((len(keys), len(keys)))
    for i in range(len(keys)):
        a = keys[i]
        print("%6d %s" % (i, a))

        terms_a = nodes_to_terms[a]
        if len(terms_a) == 0:
            continue 

        for j in range(i+1, len(keys)):
            b = keys[j]
            terms_b = nodes_to_terms[b]
            if len(terms_b) == 0:
                continue 
            
            F[translated_keys[a], translated_keys[b]] = len(terms_a & terms_b)
            F[translated_keys[b], translated_keys[a]] = F[translated_keys[a], translated_keys[b]]

        print(np.min(F), np.max(F))

    output_path = "../generated-data/pairwise_features/%s_common_functions" % (os.path.basename(gpath))

    np.save(output_path, F)
Beispiel #4
0
import os
from pygosemsim import graph

os.chdir('/Users/hastingj/Work/Onto/addiction-ontology')

import networkx as nx

G = graph.from_resource("/Users/hastingj/Work/Onto/addiction-ontology/addicto")

nx.ancestors(G, "ADDICTO:0000212")

"ADDICTO:0000239"  # Flavoured E-liquid
"ADDICTO:0000201"  # Cigarette
"ADDICTO:0000212"  # E-Cigarette
"ADDICTO:0000240"  # Fruit flavoured e-liquid

from pygosemsim import similarity

similarity.precalc_lower_bounds(G)

# How similar are Cigarette and E-cigarette?

similarity.resnik(G, "ADDICTO:0000201", "ADDICTO:0000212")
# 2.87

similarity.wang(G, "ADDICTO:0000201", "ADDICTO:0000212")
#0.436
similarity.lin(G, "ADDICTO:0000201", "ADDICTO:0000212")
#0.45
similarity.pekar(G, "ADDICTO:0000201", "ADDICTO:0000212")
#0.455
Beispiel #5
0
 def setUpClass(cls):
     cls.G = graph.from_resource("go-basic")
     graph.precalc_descendants(cls.G)
     cls.annot = annotation.from_resource("goa_human")
Beispiel #6
0
 def setUpClass(cls):
     cls.G = graph.from_resource("goslim_chembl")
     graph.precalc_descendants(cls.G)
 def __init__(self, fin_godag, prt):
     self.godag = GODag(fin_godag, optional_attrs=['relationship'], prt=prt)
     self.graph = graph.from_resource(splitext(fin_godag)[0])
def main(term_type):
    #download.obo("go-basic")
    #download.gaf("sgd")
    
    annot = annotation.from_resource("sgd")
    
    G = graph.from_resource("go-basic")

    #print(G.nodes['GO:2001311'])
    #exit()

    similarity.precalc_lower_bounds(G)
    sf = functools.partial(term_set.sim_func, G, similarity.lin)

    sgd_to_locus = read_names("../data-sources/yeast/names.txt")

    keys = list(annot.keys())


    ppcG = nx.read_gpickle(gpath)
    nodes = sorted(ppcG.nodes())
    node_ix = dict(zip(nodes, range(len(nodes))))

    
    translated_keys = {}
    nodes_to_terms = {}
    for key in keys:
        if key not in sgd_to_locus:
            continue 

        locus = sgd_to_locus[key]
        unified = get_unified_name(locus)
        if unified in node_ix:
            translated_keys[key] = node_ix[unified]
            node_terms = [t for t in annot[key]["annotation"].keys() if G.nodes[t]['namespace'] == term_type]
            nodes_to_terms[key] = set(node_terms)
    
    keys = list(translated_keys.keys())
    print(len(keys))
    #print(nodes_to_terms)
    #exit()

    F = np.zeros((len(keys), len(keys)))
    for i in range(len(keys)):
        a = keys[i]
        print("%6d %s" % (i, a))

        terms_a = nodes_to_terms[a]
        if len(terms_a) == 0:
            continue 

        #print(terms_a)
        for j in range(i+1, len(keys)):
            b = keys[j]
            terms_b = nodes_to_terms[b]
            if len(terms_b) == 0:
                continue 
            
            F[translated_keys[a], translated_keys[b]] = term_set.sim_bma(terms_a, terms_b, sf)
            F[translated_keys[b], translated_keys[a]] = F[translated_keys[a], translated_keys[b]]

        print(np.min(F), np.max(F))

        output_path = "../generated-data/pairwise_features/%s_semsim_%s" % (os.path.basename(gpath), term_type)
    
        np.save(output_path, F)