Exemple #1
0
def test_prf(fn1, fn2, sth, L):
    y_true = []
    y_score = []
    edges_1 = prep.read_edges(fn1)
    edges_2 = prep.read_edges(fn2)

    predict_set = {}
    for key in sth.keys():
        predict_set[key] = predict_set.get(key, 0.) + sth[key]
    predict_set = sorted(predict_set.iteritems(),
                         key=lambda d: d[1],
                         reverse=True)  #

    threshold = predict_set[L][1]
    for i in edges_1:
        if sth[i] > threshold:
            y_score.append(1)
        else:
            y_score.append(0)

    for i in edges_1:
        if i not in edges_2:
            y_true.append(0)
        else:
            y_true.append(1)

    print classification_report(y_true, y_score)
    print auc_score(y_true, y_score)
Exemple #2
0
def cal_clust(fn1,fn2):
    neig=get_neig(fn1)
    edges=prep.read_edges(fn2)
    all_edges=prep.read_edges(fn1)
    sth={edge:0 for edge in edges}
    for x in sth:
        edges_among_neig_i=0
        neig_i=neig[x[0]]
        for edge in all_edges:
            if (edge[0] in neig_i) & (edge[1] in neig_i):
                edges_among_neig_i=edges_among_neig_i+1
        if edges_among_neig_i==0:
            Clust_i=0
        else:
            Clust_i=2.0*edges_among_neig_i/(len(neig[x[0]])*(len(neig[x[0]])-1))

        edges_among_neig_j=0
        neig_j=neig[x[1]]
        for edge in all_edges:
            if (edge[0] in neig_j) & (edge[1] in neig_j):
                edges_among_neig_j=edges_among_neig_j+1
        if edges_among_neig_i==0:
            Clust_j=0
        else:
            Clust_j=2.0*edges_among_neig_j/(len(neig[x[1]])*(len(neig[x[1]])-1))
        sth[x]=Clust_i*Clust_j

        return sth
Exemple #3
0
def cal_auc(fn1, fn2, sth):
    edges_1 = prep.read_edges(fn1)
    edges_2 = prep.read_edges(fn2)
    edges_miss = edges_1 - edges_2

    pos = len(edges_miss)
    neg = len(edges_1) - pos

    predict_set = {}
    for key in sth.keys():
        predict_set[key] = predict_set.get(key, 0.) + sth[key]
    predict_set = sorted(predict_set.iteritems(),
                         key=lambda d: d[1],
                         reverse=False)  ##predict_set is list

    xy_arr = []
    tp, fp = 0., 0.

    for i in range(len(predict_set)):
        if (predict_set[i][0] in edges_miss):
            tp += 1
        else:
            fp += 1
        xy_arr.append([fp / neg, tp / pos])

    auc = 0.
    prev_x = 0
    for x, y in xy_arr:
        if x != prev_x:
            auc += (x - prev_x) * y
            prev_x = x

    return auc
Exemple #4
0
def test_prf(fn1,fn2,sth,L):
    y_true=[]
    y_score=[]
    edges_1=prep.read_edges(fn1)
    edges_2=prep.read_edges(fn2)
    
    predict_set={}
    for key in sth.keys():
        predict_set[key]=predict_set.get(key,0.)+sth[key]
    predict_set=sorted(predict_set.iteritems(),key=lambda d:d[1],reverse=True)#

    threshold=predict_set[L][1]
    for i in edges_1:
        if sth[i]>threshold:
            y_score.append(1)
        else:
            y_score.append(0)

    for i in edges_1:
        if i not in edges_2:
            y_true.append(0)
        else:
            y_true.append(1)

    print classification_report(y_true,y_score)
    print auc_score(y_true,y_score)
Exemple #5
0
def cal_auc(fn1,fn2,sth):
    edges_1=prep.read_edges(fn1)
    edges_2=prep.read_edges(fn2)
    edges_miss=edges_1-edges_2

    pos=len(edges_miss)
    neg=len(edges_1)-pos

    predict_set={}
    for key in sth.keys():
        predict_set[key]=predict_set.get(key,0.)+sth[key]
    predict_set=sorted(predict_set.iteritems(),key=lambda d:d[1],reverse=False)##predict_set is list

    xy_arr=[]
    tp, fp = 0., 0.
    
    for i in range(len(predict_set)):
        if (predict_set[i][0] in edges_miss):
            tp+=1
        else:
            fp+=1
        xy_arr.append([fp/neg,tp/pos])
    
    auc=0.
    prev_x=0
    for x,y in xy_arr:
        if x!=prev_x:
            auc+=(x-prev_x)*y
            prev_x=x

    return auc  
Exemple #6
0
def cal_miss_ratio(fn1,fn2):
    edges_1=prep.read_edges(fn1)
    edges_2=prep.read_edges(fn2)
    
    print "edges_1 "+str(len(edges_1))
    print "edges_2 "+str(len(edges_2))
    
    miss_ratio=float(len(edges_1-edges_2))/len(edges_1)
    new_ratio=float(len(edges_2-edges_1))/len(edges_1)

    print "miss_ratio is "+str(miss_ratio)
    print "new added ratio is "+str(new_ratio)
Exemple #7
0
def cal_miss_ratio(fn1, fn2):
    edges_1 = prep.read_edges(fn1)
    edges_2 = prep.read_edges(fn2)

    print "edges_1 " + str(len(edges_1))
    print "edges_2 " + str(len(edges_2))

    miss_ratio = float(len(edges_1 - edges_2)) / len(edges_1)
    new_ratio = float(len(edges_2 - edges_1)) / len(edges_1)

    print "miss_ratio is " + str(miss_ratio)
    print "new added ratio is " + str(new_ratio)
Exemple #8
0
def cal_PA(fn1,fn2):
    neig=get_neig(fn1)
    edges=prep.read_edges(fn2)
    sth={edge:0. for edge in edges}
    for x in sth:
        sth[x]=len(neig[x[0]])*len(neig[x[1]])
    return sth
Exemple #9
0
def cal_edge_current_flow_betweenness_centrality(fn1,fn2):
    edges=prep.read_edges(fn2)
    sth={edge:0. for edge in edges}
    
    G=nx.Graph()
    edges_all=prep.read_edges(fn1)
    G.add_edges_from(edges_all)

    graphs=list(nx.connected_component_subgraphs(G))#
    for g in graphs:
        edge_flow=nx.edge_current_flow_betweenness_centrality(
        g,normalized=True, weight=None,dtype=np.float32)
        for x in edge_flow.keys():
            if edge_flow.get(x) is not None:
                sth[x]=edge_flow.get(x)
    return sth
Exemple #10
0
def main():
    edges = prep.read_edges(path + fn)
    net = prep.build_net(edges)
    train, test = prep.divide_net(net, ratio)
    
    #CN
    sim = CN.predict_link(train)
    
    #Jaccard
    sim = Jaccard.predict_link(train)
    
    #AA
    sim = AA.predict_link(train)
    
    #RA
    sim = RA.predict_link(train)
    
    #PA
    sim = PA.predict_link(train)
    
    #Katz
    sim = Katz.predict_link(train)
    
    #SimRank
    sim = SimRank.predict_link(train)
Exemple #11
0
def main():
    edges = prep.read_edges(path + fn)
    net = prep.build_net(edges)
    train, test = prep.divide_net(net, ratio)

    #CN
    sim = CN.predict_link(train)

    #Jaccard
    sim = Jaccard.predict_link(train)

    #AA
    sim = AA.predict_link(train)

    #RA
    sim = RA.predict_link(train)

    #PA
    sim = PA.predict_link(train)

    #Katz
    sim = Katz.predict_link(train)

    #SimRank
    sim = SimRank.predict_link(train)
Exemple #12
0
def cal_edge_betweenness(fn1,fn2):
    edges=prep.read_edges(fn2)
    sth={edge:0. for edge in edges}
    
    G=nx.Graph()
    edges_all=prep.read_edges(fn1)
    G.add_edges_from(edges_all)
    edge_betweenness=nx.edge_betweenness_centrality(G)

    for x in sth.keys():
        u=(x[0], x[1])#!!!!!there is a blank between two nodes in edge_betweenness, so here need a switch
        if edge_betweenness.get(u) is not None:
            sth[x]=edge_betweenness.get(u)
        else:
            sth[x]=0
 
    return sth
Exemple #13
0
def cal_RA(fn1,fn2):
    neig=get_neig(fn1)
    edges=prep.read_edges(fn2)
    sth={edge:0. for edge in edges}
    for x in sth:
        for z in (neig[x[0]] & neig[x[1]]):
            sth[x]+=1/(len(neig[z]))
    return sth
Exemple #14
0
def test():
    edges = prep.read_edges(path + in_fn)
    net = prep.build_net(edges)

    my = SimRank.predict_link(net, 0.8)
    output_mat(my, path + 'my.txt')
    oth = read_mat(path + 'oth.txt')

    print cmp_mat(my, oth)
Exemple #15
0
def test():
    edges = prep.read_edges(path + in_fn)
    net = prep.build_net(edges)
    
    my = SimRank.predict_link(net, 0.8)
    output_mat(my, path + 'my.txt')
    oth = read_mat(path + 'oth.txt')        

    print cmp_mat(my, oth)
Exemple #16
0
def cal_PA_extend(fn1,fn2):
    neig=get_neig(fn1)
    edges=prep.read_edges(fn2)
    sth={edge:0. for edge in edges}
    for x in sth:
        if ((len(neig[x[0]])==1) or (len(neig[x[1]])==1)):
            sth[x]=0.
        else:
            sth[x]=1.0/(len(neig[x[0]])*len(neig[x[1]]))
    return sth
Exemple #17
0
def cal_communicability_centrality(fn1,fn2):
    edges=prep.read_edges(fn2)
    sth={edge:0. for edge in edges}
   
    G=nx.Graph()
    edges_all=prep.read_edges(fn1)
    G.add_edges_from(edges_all)
    communicability=nx.communicability_centrality(G)

    for x in sth:
        n1= communicability.get(x[0])
        n2= communicability.get(x[1])
        print n1,n2
        n3=max(n1,n2)
        n4=min(n1,n2)
        sth[x]=float(n4)/(n3+1)
#        sth[x]=n1*n2
#        sth[x]=n1+n2
    return sth                                                         
Exemple #18
0
def cal_auc(fn1, fn2, sth):
    edges_1 = prep.read_edges(fn1)
    edges_2 = prep.read_edges(fn2)
    edges_miss = edges_1 - edges_2

    pos = len(edges_miss)
    neg = len(edges_1) - pos

    predict_set = {}
    for key in sth:
        predict_set[key] = predict_set.get(key, 0.) + sth[key]
    predict_set = sorted(predict_set.iteritems(),
                         key=lambda d: d[1],
                         reverse=False)  ##predict_set is list

    xy_arr = []
    tp, fp = 0., 0.
    summ = 0.0
    rank = {}
    num = {}
    for i in range(len(predict_set)):
        rank[predict_set[i][1]] = rank.get(predict_set[i][1],
                                           0) + len(predict_set) - i
        num[predict_set[i][1]] = num.get(predict_set[i][1], 0) + 1
    for i in range(len(predict_set)):
        if (predict_set[i][0] in edges_miss):
            tp += 1
            summ += float(rank[predict_set[i][1]]) / num[predict_set[i][1]]

        else:
            fp += 1
        xy_arr.append([fp / neg, tp / pos])

    auc = 0.
    prev_x = 0
    for x, y in xy_arr:
        if x != prev_x:
            auc += (x - prev_x) * y
            prev_x = x

    print(float(summ) - pos * (pos + 1) / 2) / neg / pos
    return auc
Exemple #19
0
def cal_triangles(fn1,fn2):
    edges=prep.read_edges(fn2)
    sth={edge:0. for edge in edges}
    
    G=nx.Graph()
    edges_all=prep.read_edges(fn1)
    G.add_edges_from(edges_all)

    for x in sth:
        n1=nx.triangles(G,x[0])
        n2=nx.triangles(G,x[1])

        n3=max(n1,n2)
        n4=min(n1,n2)
        
        sth[x]=float(n4)/(n3+1)
##        sth[x]=(n1+1)*(n2+1)
##        sth[x]=n1+n2

    return sth
Exemple #20
0
def cal_embed(fn1,fn2):
    neig=get_neig(fn1)
    edges=prep.read_edges(fn2)
    sth={edge:0 for edge in edges}
    for x in sth:
        common_neig=len(neig[x[0]] & neig[x[1]])
        if common_neig==0:
            sth[x]=0.0
        else:
            sth[x]=float(common_neig)/((len(neig[x[0]])-1)+(len(neig[x[1]])-1)-common_neig)
    return sth
Exemple #21
0
def cal_precision_recall(fn1,fn2,sth,L):
    edges_1=prep.read_edges(fn1)
    edges_2=prep.read_edges(fn2)
    edges_miss=edges_1-edges_2
    
    predict_set={}
    for key in sth.keys():
        predict_set[key]=predict_set.get(key,0.)+sth[key]
    predict_set=sorted(predict_set.iteritems(),key=lambda d:d[1],reverse=False)#

    ##record the first L edges whose strength is weak. 
    predict_L=set()
    for i in range(L):
        predict_L.add(predict_set[i][0])

#    print predict_L    
    precision=float(len(predict_L & edges_miss))/len(predict_L)
    recall=float(len(predict_L & edges_miss))/len(edges_miss)
    F1=2*(precision*recall)/(precision+recall)
    return precision,recall, F1
Exemple #22
0
def cal_JC(fn1,fn2):
    neig=get_neig(fn1)
    #print neig
    edges=prep.read_edges(fn2)
    sth={edge:0. for edge in edges}
    for x in sth:
        if ((len(neig[x[0]])==1) or (len(neig[x[1]])==1)):
            sth[x] = 0
        else:
            sth[x]=float(len(neig[x[0]]&neig[x[1]]))/(len(neig[x[0]]|neig[x[1]])-2)
    return sth
Exemple #23
0
def get_neig(fn):    
    all_nodes=prep.read_nodes(fn)
    all_edges=prep.read_edges(fn)
    neig={}  ## neighbor of all_nodes
    for x in all_nodes:
        neig[x]=set()       
    for line in all_edges:
        u=line[0]
        v=line[1]
        neig[u].add(v)
        neig[v].add(u)
    return neig
Exemple #24
0
def cal_precision_recall(fn1, fn2, sth, L):
    edges_1 = prep.read_edges(fn1)
    edges_2 = prep.read_edges(fn2)
    edges_miss = edges_1 - edges_2

    predict_set = {}
    for key in sth.keys():
        predict_set[key] = predict_set.get(key, 0.) + sth[key]
    predict_set = sorted(predict_set.iteritems(),
                         key=lambda d: d[1],
                         reverse=False)  #

    ##record the first L edges whose strength is weak.
    predict_L = set()
    for i in range(L):
        predict_L.add(predict_set[i][0])


#    print predict_L
    precision = float(len(predict_L & edges_miss)) / len(predict_L)
    recall = float(len(predict_L & edges_miss)) / len(edges_miss)
    F1 = 2 * (precision * recall) / (precision + recall)
    return precision, recall, F1
Exemple #25
0
def cal_degree_ratio(fn1,fn2):
    edges=prep.read_edges(fn2)
    sth={edge:0 for edge in edges}
    
    all_nodes=prep.read_nodes(fn1)
    all_edges=prep.read_edges(fn1)
    neig={}  ## neighbor of all_nodes
    for x in all_nodes:
        neig[x]=set()
        
    for line in all_edges:
        u=line[0]
        v=line[1]
        neig[u].add(v)
        neig[v].add(u)

    for x in sth:
        i=len(neig[x[0]])
        j=len(neig[x[1]])
        if (i<=j):
            sth[x]=float(i)/j
        else:
            sth[x]=float(j)/i
    return sth
Exemple #26
0
import construct_graph
import preprocess


# for subgraph
graph = construct_graph.Graph(logfile='../datasets/subset/construct_graph.log')
subnodes = preprocess.read_list(path='../datasets/subset/1000_nodelist_url.txt')

graph.create_nodes_from_db(longabsdb_path='../datasets/subset/1000_long_abstracts.db',
                            labelsdb_path='../datasets/subset/1000_labels.db',
                            lookupdb_path='../datasets/subset/1000_nodes_lookup.db',
                            subnodes=subnodes)
print('nodes created..')

edges = preprocess.read_edges(path='../datasets/subset/1000_edgelist.txt')
print('edges are read...')

graph.create_edges_from_list(edges=edges)
print('edges are created...')

graph.write_graph(path='../datasets/subset/1000_graph_sub.gpickle')
print('graph is written...')

graph.draw()