Example #1
0
def get_entity_linker():
    global _entity_linker
    if _entity_linker is not None:
        return _entity_linker
    from entity_linker.entity_linker import EntityLinker, WebSearchResultsExtenderEntityLinker
    _entity_linker = WebSearchResultsExtenderEntityLinker.init_from_config()
    return _entity_linker
Example #2
0
def get_number_of_external_entities():
    import scorer_globals
    globals.read_configuration('config_webentity.cfg')
    parser = CoreNLPParser.init_from_config()
    entity_linker = WebSearchResultsExtenderEntityLinker.init_from_config()
    entity_linker.topn_entities = 100000
    scorer_globals.init()

    parameters = translator.TranslatorParameters()
    parameters.require_relation_match = False
    parameters.restrict_answer_type = False

    datasets = ["webquestions_split_train", "webquestions_split_dev",]
    # datasets = ["webquestions_split_train_externalentities", "webquestions_split_dev_externalentities",]
    # datasets = ["webquestions_split_train_externalentities3", "webquestions_split_dev_externalentities3",]

    external_entities_count = []
    for dataset in datasets:
        queries = load_eval_queries(dataset)
        for index, query in enumerate(queries):
            entities = entity_linker.identify_entities_in_tokens(parser.parse(query.utterance).tokens, text=query.utterance, find_dates=False)
            print "-------------------------"
            print query.utterance
            print "\n".join(map(str, sorted(entities, key=lambda entity: entity.external_entity_count, reverse=True)))

            external_entities_count.append(0)
            for entity in entities:
                if entity.external_entity:
                    external_entities_count[-1] += 1
            if index % 100 == 0:
                print >> sys.stderr, "%s queries processed" % index
    print "========================================="
    print external_entities_count
    print sum(external_entities_count)
    print len(external_entities_count)