def runSetExpan(seedEidsWithConfidence, numToExpand): expandedEidsWithConfidence = set_expan.setExpan( seedEidsWithConfidence=seedEidsWithConfidence, negativeSeedEids=negativeSeedEids, eid2patterns=eid2patterns, pattern2eids=pattern2eids, eidAndPattern2strength=eidAndPattern2strength, eid2types=eid2types, type2eids=type2eids, eidAndType2strength=eidAndType2strength, eid2ename=eid2ename, eid2embed=eid2embed, source_weights={ "sg": 1.0, "tp": 5.0, "eb": 5.0 }, max_expand_eids=numToExpand, use_embed=True, use_type=True, FLAGS_VERBOSE=False, FLAGS_DEBUG=False) print(expandedEidsWithConfidence) for ele in expandedEidsWithConfidence: print("eid=", ele[0], "ename=", eid2ename[ele[0]], "confidence_score=", ele[1]) return expandedEidsWithConfidence
def runsetexpan(setname): for st in good_gold_set: if st == setname: fin = open('../data/eval/results/{}/setexpan/{}.log'.format(dataset, st), 'w+') entities = good_gold_set[st] for numseeds, qs in queries[st].items(): fin.write("[{} seeds]\n".format(numseeds)) start = time.time() ap1 = [] idx = 0 for q in qs: idx += 1 print("Running \"{}\" [{} seeds] exp {} ...".format(st, numseeds, idx)) samples = [ename2eid[x] for x in list(q)] entities_to_retrieve = [i for i in entities if i not in samples] truth = [eid2ename[int(x)] for x in entities_to_retrieve] # names seedEidsWithConfidence = [(int(ele), 0.0) for ele in samples] negativeSeedEids = set() try: expandedEidsWithConfidence = set_expan.setExpan( seedEidsWithConfidence=seedEidsWithConfidence, negativeSeedEids=negativeSeedEids, eid2patterns=eid2patterns, pattern2eids=pattern2eids, eidAndPattern2strength=eidAndPattern2strength, eid2types=eid2types, type2eids=type2eids, eidAndType2strength=eidAndType2strength, eid2ename=eid2ename, FLAGS_VERBOSE=False, FLAGS_DEBUG=False ) answers = [] for ele in expandedEidsWithConfidence: answers.append(eid2ename[ele[0]]) ap1.append(ap(answers, truth)) except: print(setname + " from " + str(current_process().name)) ap1.append(0.0) end = time.time() fin.write("{}\n".format(str(ap1))) fin.write("{}\n".format((end-start)/idx)) fin.close()
def runsetexpan(setname): print("running eval for {}".format(setname)) for st in good_gold_set: if st == setname: fin = open('../data/eval/results/{}/{}.log'.format(dataset, st), 'w+') entities = good_gold_set[st] ap2 = [] var1 = [] var2 = [] times = [] for numseeds, qs in queries[st].items(): if numseeds != 3: continue start = time.time() ap1 = [] idx = 0 for q in qs: idx += 1 print("Running \"{}\" [{} seeds] iteration {} ...".format( st, numseeds, idx)) samples = [ename2eid[x] for x in list(q)] entities_to_retrieve = [ i for i in entities if i not in samples ] truth = [eid2ename[int(x)] for x in entities_to_retrieve] # names seedEidsWithConfidence = [(int(ele), 0.0) for ele in samples] fin.write("input: {}\n".format( str([eid2ename[int(x)] for x in samples]))) negativeSeedEids = set() expandedEidsWithConfidence = set_expan.setExpan( seedEidsWithConfidence=seedEidsWithConfidence, negativeSeedEids=negativeSeedEids, eid2patterns=eid2patterns, pattern2eids=pattern2eids, eidAndPattern2strength=eidAndPattern2strength, eid2types=eid2types, type2eids=type2eids, eidAndType2strength=eidAndType2strength, eid2ename=eid2ename, FLAGS_VERBOSE=False, FLAGS_DEBUG=False) answers = [] for ele in expandedEidsWithConfidence: answers.append(eid2ename[ele[0]]) fin.write("output: {}\n".format(str(answers))) fin.write('----------------------------------------\n') ap1.append(ap(answers, truth)) end = time.time() fin.write( "[{} seeds]: MAP: {}, variance: {}, std.variance: {}, avg. time: {:.2f} secs\n\n" .format(numseeds, sum(ap1) / len(ap1), np.var(ap1), np.std(ap1), (end - start) / idx)) fin.write("{}\n".format(str(ap1))) ap2.append(sum(ap1) / len(ap1)) var1.append(np.var(ap1)) var2.append(np.std(ap1)) times.append((end - start) / idx) fin.write(str(ap2) + '\n') fin.write(str(times) + '\n') fin.close()
print('SEED_SAMPLE #' + str(idx)) userInput = random.sample(seeds, 10) seedEidsWithConfidence = [(ename2eid[ele], 0.0) for ele in userInput] negativeSeedEids = set() for ele in eid2ename: if not eid2ename[ele].startswith('PHRASE_'): negativeSeedEids.add(ele) expandedEidsWithConfidence = set_expan.setExpan( seedEidsWithConfidence=seedEidsWithConfidence, negativeSeedEids=negativeSeedEids, eid2patterns=eid2patterns, pattern2eids=pattern2eids, eidAndPattern2strength=eidAndPattern2strength, eid2types=eid2types, type2eids=type2eids, eidAndType2strength=eidAndType2strength, eid2ename=eid2ename, FLAGS_VERBOSE=True, FLAGS_DEBUG=True) for idx, ele in enumerate(expandedEidsWithConfidence): if ele[0] not in MRR: MRR[ele[0]] = 0.0 MRR[ele[0]] += 1 / (idx + 1) MRR_sorted = sorted(MRR.items(), key=lambda kv: kv[1], reverse=True) with open(folder + enttype + '/seed' + str(seedSetNo) + '.txt',
if args.debug: print("[Width Expansion] Expand: {}, restrictions: {}".format( targetNode, negativeSeedEids)) # at least grow one node max_expand_eids = max( len(negativeSeedEids) + 1, level2max_expand_eids[targetNode.level]) newOrderedChildrenEidsWithConfidence = setExpan( seedEidsWithConfidence, negativeSeedEids, eid2patterns, pattern2eids, eidAndPattern2strength, eid2types, type2eids, eidAndType2strength, eid2ename, eid2embed, source_weights=level2source_weights[targetNode.level], max_expand_eids=max_expand_eids, use_embed=True, use_type=True) newOrderedChildren = [] for ele in newOrderedChildrenEidsWithConfidence: newChildEid = ele[0] confidence_score = ele[1] confidence_score += targetNode.confidence_score synonym_FLAG = False # Check synonmy for sibling in targetNode.children: if isSynonym(args, newChildEid, sibling.eid):
# seedEidsWithConfidence_dblp_method_class = [(9012, 0.0), (652, 0.0), (13169, 0.0), (15819, 0.0), (14650, 0.0), # (15554, 0.0), (3334, 0.0), (14542, 0.0), (3044, 0.0), (9847, 0.0), # (9826, 0.0), (9619, 0.0), (11985, 0.0), (6825, 0.0), (2767, 0.0), # (9152, 0.0), (12024, 0.0), (10105, 0.0), (3848, 0.0), (4810, 0.0), # (11944, 0.0), (6319, 0.0), (126, 0.0)] negativeSeedEids = set() expandedEidsWithConfidence = set_expan.setExpan( seedEidsWithConfidence=seedEidsWithConfidence, negativeSeedEids=negativeSeedEids, eid2patterns=eid2patterns, pattern2eids=pattern2eids, eidAndPattern2strength=eidAndPattern2strength, eid2types=eid2types, type2eids=type2eids, eidAndType2strength=eidAndType2strength, eid2ename=eid2ename, eid2embed=eid2embed, source_weights={ "sg": 1.0, "tp": 5.0, "eb": 5.0 }, use_embed=True, use_type=True, FLAGS_VERBOSE=True, FLAGS_DEBUG=True) print("=== In test case ===") for ele in expandedEidsWithConfidence: print("eid=", ele[0], "ename=", eid2ename[ele[0]], "confidence_score=", ele[1]) with open("./test_setexpan.txt", "w") as fout: