Beispiel #1
0
    def extract_element_from_source(self, key):
        # lazy load
        if self.models is None:
            self.args = argparse.Namespace(**self.config)
            self.models = main_dense.load_models(self.args, logger=None)

        data = self.my_run(self.args, None, *self.models, test_data=key)

        return data
Beispiel #2
0
    def __init__(self, name, **config):
        super().__init__(name)

       
        self.args = argparse.Namespace(**config)

        self.logger = logging.getLogger("KILT")

        self.models = main_dense.load_models(self.args, logger=self.logger)

        self.ner_model = SequenceTagger.load("ner")

        self.cache_pages = {}

        self.Wikipedia_title2id = pickle.load(open(self.args.wikipedia_title2id, "rb"))
    "interactive": False,
    "biencoder_model": "models/biencoder_wiki_large.bin",
    "biencoder_config": "models/biencoder_wiki_large.json",
    "entity_catalogue": "models/entity.jsonl",
    "entity_encoding": "models/all_entities_large.t7",
    "crossencoder_model": "models/crossencoder_wiki_large.bin",
    "crossencoder_config": "models/crossencoder_wiki_large.json",
    "output_path": "output",
    "fast": False,
    "top_k": 100,
}
args = argparse.Namespace(**PARAMETERS)

logger = utils.get_logger(args.output_path)

models = main_dense.load_models(args, logger)

table = prettytable.PrettyTable([
    "DATASET",
    "biencoder accuracy",
    "recall at 100",
    "crossencoder normalized accuracy",
    "overall unormalized accuracy",
    "support",
])

for dataset in DATASETS:
    logger.info(dataset["name"])
    PARAMETERS["test_mentions"] = dataset["filename"]

    args = argparse.Namespace(**PARAMETERS)
Beispiel #4
0
    "test_entities": None,
    "test_mentions": None,
    "interactive": False,
    "biencoder_model": models_path + "biencoder_wiki_large.bin",
    "biencoder_config": models_path + "biencoder_wiki_large.json",
    "entity_catalogue": models_path + "entity.jsonl",
    "entity_encoding": models_path + "all_entities_large.t7",
    "crossencoder_model": models_path + "crossencoder_wiki_large.bin",
    "crossencoder_config": models_path + "crossencoder_wiki_large.json",
    "fast": True,  # set this to be true if speed is a concern
    "output_path": "logs/",  # logging directory
    "top_k": 1,
}

args = argparse.Namespace(**config)
models = main_dense.load_models(args)

# 2. Create NER-tagger.
tagger_ner = load_flair_ner("ner-fast")

# 3. Init server.
server_address = ("localhost", 5555)
server = HTTPServer(
    server_address,
    make_handler(base_url, wiki_version, models, tagger_ner, args, logger),
)

try:
    print("Ready for listening.")
    server.serve_forever()
except KeyboardInterrupt:
Beispiel #5
0
 def load_models(self, models_path):
     args = self.build_blink_config(models_path)
     self.blink_config = args
     self.models = main_dense.load_models(args, logger=self.blink_logger)
Beispiel #6
0
        "biencoder_model": models_path + "biencoder_wiki_large.bin",
        "biencoder_config": models_path + "biencoder_wiki_large.json",
        "entity_catalogue": models_path + "entity.jsonl",
        "entity_encoding": models_path + "all_entities_large.t7",
        "crossencoder_model": models_path + "crossencoder_wiki_large.bin",
        "crossencoder_config": models_path + "crossencoder_wiki_large.json",
        "top_k": 10,
        "show_url": False,
        "fast": args.fast,  # set this to be true if speed is a concern
        "output_path": models_path + "logs/",  # logging directory
        "faiss_index": None,  #"flat",
        "index_path": models_path + "faiss_flat_index.pkl",
    }

    args_blink = argparse.Namespace(**config)
    models = main_dense.load_models(args_blink, logger=logger)
    _, _, _, _, _, predictions, scores, = main_dense.run(args_blink,
                                                         logger,
                                                         *models,
                                                         test_data=for_blink,
                                                         device=args.device)

    for s, pp in zip(for_blink, predictions):
        pp = [p for p in pp if not p.startswith('List of')]
        p = f'"{pp[0]}"' if pp else '-'
        p = p.replace(' ', '_')
        graph_n = s['graph_n']
        triple_n = s['triple_n']
        triples = [g for g in graphs[graph_n].triples]
        n, rel, w = triples[triple_n]
        triples[triple_n] = Triple(n, rel, p)