Ejemplo n.º 1
0
def runSetExpan(seedEidsWithConfidence, numToExpand):
    expandedEidsWithConfidence = set_expan.setExpan(
        seedEidsWithConfidence=seedEidsWithConfidence,
        negativeSeedEids=negativeSeedEids,
        eid2patterns=eid2patterns,
        pattern2eids=pattern2eids,
        eidAndPattern2strength=eidAndPattern2strength,
        eid2types=eid2types,
        type2eids=type2eids,
        eidAndType2strength=eidAndType2strength,
        eid2ename=eid2ename,
        eid2embed=eid2embed,
        source_weights={
            "sg": 1.0,
            "tp": 5.0,
            "eb": 5.0
        },
        max_expand_eids=numToExpand,
        use_embed=True,
        use_type=True,
        FLAGS_VERBOSE=False,
        FLAGS_DEBUG=False)
    print(expandedEidsWithConfidence)
    for ele in expandedEidsWithConfidence:
        print("eid=", ele[0], "ename=", eid2ename[ele[0]], "confidence_score=",
              ele[1])

    return expandedEidsWithConfidence
Ejemplo n.º 2
0
def runsetexpan(setname):
    for st in good_gold_set:
        if st == setname:
            fin = open('../data/eval/results/{}/setexpan/{}.log'.format(dataset, st), 'w+')
            entities = good_gold_set[st]
            for numseeds, qs in queries[st].items():
                fin.write("[{} seeds]\n".format(numseeds))
                start = time.time()
                ap1 = []
                idx = 0
                for q in qs:
                    idx += 1  
                    print("Running \"{}\" [{} seeds] exp {} ...".format(st, numseeds, idx))
                    samples = [ename2eid[x] for x in list(q)]
                    entities_to_retrieve = [i for i in entities if i not in samples]
                    truth = [eid2ename[int(x)] for x in entities_to_retrieve] # names
                    seedEidsWithConfidence = [(int(ele), 0.0) for ele in samples]
                    negativeSeedEids = set()
                    try:
                        expandedEidsWithConfidence = set_expan.setExpan(
                            seedEidsWithConfidence=seedEidsWithConfidence,
                            negativeSeedEids=negativeSeedEids,
                            eid2patterns=eid2patterns,
                            pattern2eids=pattern2eids,
                            eidAndPattern2strength=eidAndPattern2strength,
                            eid2types=eid2types,
                            type2eids=type2eids,
                            eidAndType2strength=eidAndType2strength,
                            eid2ename=eid2ename,
                            FLAGS_VERBOSE=False,
                            FLAGS_DEBUG=False
                        )
                        answers = []
                        for ele in expandedEidsWithConfidence:
                            answers.append(eid2ename[ele[0]])
                        ap1.append(ap(answers, truth))
                    except:
                        print(setname + " from " + str(current_process().name))
                        ap1.append(0.0)
                end = time.time()
                fin.write("{}\n".format(str(ap1)))
                fin.write("{}\n".format((end-start)/idx))
            fin.close()
def runsetexpan(setname):
    print("running eval for {}".format(setname))
    for st in good_gold_set:
        if st == setname:
            fin = open('../data/eval/results/{}/{}.log'.format(dataset, st),
                       'w+')
            entities = good_gold_set[st]
            ap2 = []
            var1 = []
            var2 = []
            times = []
            for numseeds, qs in queries[st].items():
                if numseeds != 3:
                    continue
                start = time.time()
                ap1 = []
                idx = 0
                for q in qs:
                    idx += 1
                    print("Running \"{}\" [{} seeds] iteration {} ...".format(
                        st, numseeds, idx))
                    samples = [ename2eid[x] for x in list(q)]
                    entities_to_retrieve = [
                        i for i in entities if i not in samples
                    ]
                    truth = [eid2ename[int(x)]
                             for x in entities_to_retrieve]  # names
                    seedEidsWithConfidence = [(int(ele), 0.0)
                                              for ele in samples]
                    fin.write("input: {}\n".format(
                        str([eid2ename[int(x)] for x in samples])))

                    negativeSeedEids = set()
                    expandedEidsWithConfidence = set_expan.setExpan(
                        seedEidsWithConfidence=seedEidsWithConfidence,
                        negativeSeedEids=negativeSeedEids,
                        eid2patterns=eid2patterns,
                        pattern2eids=pattern2eids,
                        eidAndPattern2strength=eidAndPattern2strength,
                        eid2types=eid2types,
                        type2eids=type2eids,
                        eidAndType2strength=eidAndType2strength,
                        eid2ename=eid2ename,
                        FLAGS_VERBOSE=False,
                        FLAGS_DEBUG=False)
                    answers = []
                    for ele in expandedEidsWithConfidence:
                        answers.append(eid2ename[ele[0]])
                    fin.write("output: {}\n".format(str(answers)))
                    fin.write('----------------------------------------\n')
                    ap1.append(ap(answers, truth))
                end = time.time()
                fin.write(
                    "[{} seeds]: MAP: {}, variance: {}, std.variance: {}, avg. time: {:.2f} secs\n\n"
                    .format(numseeds,
                            sum(ap1) / len(ap1), np.var(ap1), np.std(ap1),
                            (end - start) / idx))
                fin.write("{}\n".format(str(ap1)))
                ap2.append(sum(ap1) / len(ap1))
                var1.append(np.var(ap1))
                var2.append(np.std(ap1))
                times.append((end - start) / idx)
            fin.write(str(ap2) + '\n')
            fin.write(str(times) + '\n')
            fin.close()
Ejemplo n.º 4
0
                print('SEED_SAMPLE #' + str(idx))
                userInput = random.sample(seeds, 10)
                seedEidsWithConfidence = [(ename2eid[ele], 0.0)
                                          for ele in userInput]

                negativeSeedEids = set()
                for ele in eid2ename:
                    if not eid2ename[ele].startswith('PHRASE_'):
                        negativeSeedEids.add(ele)

                expandedEidsWithConfidence = set_expan.setExpan(
                    seedEidsWithConfidence=seedEidsWithConfidence,
                    negativeSeedEids=negativeSeedEids,
                    eid2patterns=eid2patterns,
                    pattern2eids=pattern2eids,
                    eidAndPattern2strength=eidAndPattern2strength,
                    eid2types=eid2types,
                    type2eids=type2eids,
                    eidAndType2strength=eidAndType2strength,
                    eid2ename=eid2ename,
                    FLAGS_VERBOSE=True,
                    FLAGS_DEBUG=True)
                for idx, ele in enumerate(expandedEidsWithConfidence):
                    if ele[0] not in MRR:
                        MRR[ele[0]] = 0.0
                    MRR[ele[0]] += 1 / (idx + 1)

            MRR_sorted = sorted(MRR.items(),
                                key=lambda kv: kv[1],
                                reverse=True)

            with open(folder + enttype + '/seed' + str(seedSetNo) + '.txt',
Ejemplo n.º 5
0
            if args.debug:
                print("[Width Expansion] Expand: {}, restrictions: {}".format(
                    targetNode, negativeSeedEids))

            # at least grow one node
            max_expand_eids = max(
                len(negativeSeedEids) + 1,
                level2max_expand_eids[targetNode.level])
            newOrderedChildrenEidsWithConfidence = setExpan(
                seedEidsWithConfidence,
                negativeSeedEids,
                eid2patterns,
                pattern2eids,
                eidAndPattern2strength,
                eid2types,
                type2eids,
                eidAndType2strength,
                eid2ename,
                eid2embed,
                source_weights=level2source_weights[targetNode.level],
                max_expand_eids=max_expand_eids,
                use_embed=True,
                use_type=True)
            newOrderedChildren = []
            for ele in newOrderedChildrenEidsWithConfidence:
                newChildEid = ele[0]
                confidence_score = ele[1]
                confidence_score += targetNode.confidence_score
                synonym_FLAG = False  # Check synonmy
                for sibling in targetNode.children:
                    if isSynonym(args, newChildEid, sibling.eid):
Ejemplo n.º 6
0
# seedEidsWithConfidence_dblp_method_class = [(9012, 0.0), (652, 0.0), (13169, 0.0), (15819, 0.0), (14650, 0.0),
#                                             (15554, 0.0), (3334, 0.0), (14542, 0.0), (3044, 0.0), (9847, 0.0),
#                                             (9826, 0.0), (9619, 0.0), (11985, 0.0), (6825, 0.0), (2767, 0.0),
#                                             (9152, 0.0), (12024, 0.0), (10105, 0.0), (3848, 0.0), (4810, 0.0),
#                                             (11944, 0.0), (6319, 0.0), (126, 0.0)]
negativeSeedEids = set()
expandedEidsWithConfidence = set_expan.setExpan(
    seedEidsWithConfidence=seedEidsWithConfidence,
    negativeSeedEids=negativeSeedEids,
    eid2patterns=eid2patterns,
    pattern2eids=pattern2eids,
    eidAndPattern2strength=eidAndPattern2strength,
    eid2types=eid2types,
    type2eids=type2eids,
    eidAndType2strength=eidAndType2strength,
    eid2ename=eid2ename,
    eid2embed=eid2embed,
    source_weights={
        "sg": 1.0,
        "tp": 5.0,
        "eb": 5.0
    },
    use_embed=True,
    use_type=True,
    FLAGS_VERBOSE=True,
    FLAGS_DEBUG=True)
print("=== In test case ===")
for ele in expandedEidsWithConfidence:
    print("eid=", ele[0], "ename=", eid2ename[ele[0]], "confidence_score=",
          ele[1])

with open("./test_setexpan.txt", "w") as fout: