Ejemplo n.º 1
0
def main():
    edges = prep.read_edges(path + fn)
    net = prep.build_net(edges)
    train, test = prep.divide_net(net, ratio)
    
    #CN
    sim = CN.predict_link(train)
    
    #Jaccard
    sim = Jaccard.predict_link(train)
    
    #AA
    sim = AA.predict_link(train)
    
    #RA
    sim = RA.predict_link(train)
    
    #PA
    sim = PA.predict_link(train)
    
    #Katz
    sim = Katz.predict_link(train)
    
    #SimRank
    sim = SimRank.predict_link(train)
Ejemplo n.º 2
0
Archivo: main.py Proyecto: yinhongyu/LP
def main():
    edges = prep.read_edges(path + fn)
    net = prep.build_net(edges)
    train, test = prep.divide_net(net, ratio)

    #CN
    sim = CN.predict_link(train)

    #Jaccard
    sim = Jaccard.predict_link(train)

    #AA
    sim = AA.predict_link(train)

    #RA
    sim = RA.predict_link(train)

    #PA
    sim = PA.predict_link(train)

    #Katz
    sim = Katz.predict_link(train)

    #SimRank
    sim = SimRank.predict_link(train)
Ejemplo n.º 3
0
def test():
    edges = prep.read_edges(path + in_fn)
    net = prep.build_net(edges)

    my = SimRank.predict_link(net, 0.8)
    output_mat(my, path + 'my.txt')
    oth = read_mat(path + 'oth.txt')

    print cmp_mat(my, oth)
Ejemplo n.º 4
0
def test():
    edges = prep.read_edges(path + in_fn)
    net = prep.build_net(edges)
    
    my = SimRank.predict_link(net, 0.8)
    output_mat(my, path + 'my.txt')
    oth = read_mat(path + 'oth.txt')        

    print cmp_mat(my, oth)
Ejemplo n.º 5
0
        print "开始构建伴病网络"
        G, bad_names = build_network(medical_records, normal_disease,
                                     normal_surgeries, last_res)
        end_time = datetime.datetime.now()
        print "构建伴病网络时间为 %d秒" % (end_time - start_time).seconds
        cnt_node = len(G)
        cnt_edge = 0
        for x in G:
            cnt_edge += len(G[x])
        print "节点数:%d" % cnt_node
        print "边数:%d" % cnt_edge

        net = WLM("texts/out/graph.txt")

        start_time = datetime.datetime.now()
        s = sr.SimRank(graph_file="texts/out/graph.txt")
        s.sim_rank()
        res = s.get_result()  # simrank 结果字典

        s.print_result("texts/out/similarity.txt")

        end_time = datetime.datetime.now()
        print '节点数: %d' % len(s.nodes)
        print 'sim_rank运行时间为%d' % (end_time - start_time).seconds
        d = db.DataBase()
        values = d.query('select 非标准名称, 标准疾病名 from labeleddata')
        cnt_before = 0
        cnt_after = 0
        cnt_weighted = 0
        cnt_noise = 0
        cnt_correct = 0
Ejemplo n.º 6
0
     damp = 0.15
     tStart = time.time()
     pageRank = PageRank.PageRank(graph, damp)
     tEnd = time.time()
     timeCost = tEnd - tStart
     with open("output.txt", 'w', encoding='UTF-8') as f:
         f.write("time cost: %f\n" % timeCost)
         f.write("PageRank\n")
         for key in pageRank:
             f.write("%s: " % key)
             f.write("%f\n" % pageRank[key])
     f.close()
 elif method == 'simrank':
     c = 0.8  #decay factor
     tStart = time.time()
     simMatrix = SimRank.SimRank(graph, c)
     tEnd = time.time()
     timeCost = tEnd - tStart
     # df = pd.DataFrame.from_dict(simMatrix,orient='index').transpose()
     # # ax = sns.heatmap(df, cmap="YlGnBu", annot=True, fmt="f")
     # ax = sns.heatmap(df, cmap="YlGnBu")
     # label_y = ax.get_yticklabels()
     # plt.setp(label_y, rotation=45, horizontalalignment='right')
     # label_x = ax.get_xticklabels()
     # plt.setp(label_x, rotation=45, horizontalalignment='right')
     # plt.xlabel('vertex')
     # plt.ylabel('vertex')
     # plt.title('Heatmap')
     # plt.show()
     with open("output.txt", 'w', encoding='UTF-8') as f:
         f.write("time cost: %f\n" % timeCost)
Ejemplo n.º 7
0
    for k, v in union_times.iteritems():
        v1 = v * 1.0 / single_times[k[0]]  # 权值1
        G.add_edge(k[0], k[1], weight=v1)
        v2 = v * 1.0 / single_times[k[1]]  # 权值2
        G.add_edge(k[1], k[0], weight=v2)

    return G, bad_names


def disambiguate(labeled_data, sim_res, bad_names):
    normal, icd4_dic = get_cand_init()
    for u_name, n_name in labeled_data:
        name_dic = get_cand(u_name, normal, icd4_dic)
        name_list = dic2list(name_dic)
        name_list[0][0]


normal_dis, normal_sur, records, labeled_data = init()
G, bad_names = get_graph(normal_dis, normal_sur, records, labeled_data)

write_G(G, "iteration_texts/out/graph.txt")
write_bad_names(bad_names, "iteration_texts/out/bad_names.txt")

s = SimRank.SimRank(graph_file="iteration_texts/out/graph.txt")
s.sim_rank()
res = s.get_result()
s.print_result("iteration_texts/out/similarity.txt")

ret = disambiguate(labeled_data, res, bad_names)