def get_entity_linker(): global _entity_linker if _entity_linker is not None: return _entity_linker from entity_linker.entity_linker import EntityLinker, WebSearchResultsExtenderEntityLinker _entity_linker = WebSearchResultsExtenderEntityLinker.init_from_config() return _entity_linker
def get_number_of_external_entities(): import scorer_globals globals.read_configuration('config_webentity.cfg') parser = CoreNLPParser.init_from_config() entity_linker = WebSearchResultsExtenderEntityLinker.init_from_config() entity_linker.topn_entities = 100000 scorer_globals.init() parameters = translator.TranslatorParameters() parameters.require_relation_match = False parameters.restrict_answer_type = False datasets = ["webquestions_split_train", "webquestions_split_dev",] # datasets = ["webquestions_split_train_externalentities", "webquestions_split_dev_externalentities",] # datasets = ["webquestions_split_train_externalentities3", "webquestions_split_dev_externalentities3",] external_entities_count = [] for dataset in datasets: queries = load_eval_queries(dataset) for index, query in enumerate(queries): entities = entity_linker.identify_entities_in_tokens(parser.parse(query.utterance).tokens, text=query.utterance, find_dates=False) print "-------------------------" print query.utterance print "\n".join(map(str, sorted(entities, key=lambda entity: entity.external_entity_count, reverse=True))) external_entities_count.append(0) for entity in entities: if entity.external_entity: external_entities_count[-1] += 1 if index % 100 == 0: print >> sys.stderr, "%s queries processed" % index print "=========================================" print external_entities_count print sum(external_entities_count) print len(external_entities_count)