damp = 0.15 tStart = time.time() pageRank = PageRank.PageRank(graph, damp) tEnd = time.time() timeCost = tEnd - tStart with open("output.txt", 'w', encoding='UTF-8') as f: f.write("time cost: %f\n" % timeCost) f.write("PageRank\n") for key in pageRank: f.write("%s: " % key) f.write("%f\n" % pageRank[key]) f.close() elif method == 'simrank': c = 0.8 #decay factor tStart = time.time() simMatrix = SimRank.SimRank(graph, c) tEnd = time.time() timeCost = tEnd - tStart # df = pd.DataFrame.from_dict(simMatrix,orient='index').transpose() # # ax = sns.heatmap(df, cmap="YlGnBu", annot=True, fmt="f") # ax = sns.heatmap(df, cmap="YlGnBu") # label_y = ax.get_yticklabels() # plt.setp(label_y, rotation=45, horizontalalignment='right') # label_x = ax.get_xticklabels() # plt.setp(label_x, rotation=45, horizontalalignment='right') # plt.xlabel('vertex') # plt.ylabel('vertex') # plt.title('Heatmap') # plt.show() with open("output.txt", 'w', encoding='UTF-8') as f: f.write("time cost: %f\n" % timeCost)
print "开始构建伴病网络" G, bad_names = build_network(medical_records, normal_disease, normal_surgeries, last_res) end_time = datetime.datetime.now() print "构建伴病网络时间为 %d秒" % (end_time - start_time).seconds cnt_node = len(G) cnt_edge = 0 for x in G: cnt_edge += len(G[x]) print "节点数:%d" % cnt_node print "边数:%d" % cnt_edge net = WLM("texts/out/graph.txt") start_time = datetime.datetime.now() s = sr.SimRank(graph_file="texts/out/graph.txt") s.sim_rank() res = s.get_result() # simrank 结果字典 s.print_result("texts/out/similarity.txt") end_time = datetime.datetime.now() print '节点数: %d' % len(s.nodes) print 'sim_rank运行时间为%d' % (end_time - start_time).seconds d = db.DataBase() values = d.query('select 非标准名称, 标准疾病名 from labeleddata') cnt_before = 0 cnt_after = 0 cnt_weighted = 0 cnt_noise = 0 cnt_correct = 0
for k, v in union_times.iteritems(): v1 = v * 1.0 / single_times[k[0]] # 权值1 G.add_edge(k[0], k[1], weight=v1) v2 = v * 1.0 / single_times[k[1]] # 权值2 G.add_edge(k[1], k[0], weight=v2) return G, bad_names def disambiguate(labeled_data, sim_res, bad_names): normal, icd4_dic = get_cand_init() for u_name, n_name in labeled_data: name_dic = get_cand(u_name, normal, icd4_dic) name_list = dic2list(name_dic) name_list[0][0] normal_dis, normal_sur, records, labeled_data = init() G, bad_names = get_graph(normal_dis, normal_sur, records, labeled_data) write_G(G, "iteration_texts/out/graph.txt") write_bad_names(bad_names, "iteration_texts/out/bad_names.txt") s = SimRank.SimRank(graph_file="iteration_texts/out/graph.txt") s.sim_rank() res = s.get_result() s.print_result("iteration_texts/out/similarity.txt") ret = disambiguate(labeled_data, res, bad_names)