Esempio n. 1
0
def get_clusters(model_file,
                 word_vectors_file,
                 test_file,
                 get_gold_clusters=False):
    # load model and vector mappings.
    model = load_model(model_file)
    embed_map = EmbedMap(word_vectors_file)

    # calculate system clusters
    gold_clusters, sys_clusters = {}, {}
    test_file_reader = jsonlines.open(test_file)
    for doc_data in test_file_reader.iter():
        doc = Document(doc_data, embed_map, remove_multi_word_mention=False)
        mention_pairs, ref_index = doc.generate_candidate_mention_pairs()
        mention_pairs_array, _ = convert_train_data(mention_pairs)

        # get predictions.
        predicts = [score[0] for score in model.predict(mention_pairs_array)]
        predicts = dict(zip(ref_index, predicts))

        # generate clusters.
        doc_clusters = cluster_mentions(list(doc.candidates.keys()), predicts)

        gold_clusters[doc.doc_id] = doc.gold_clusters
        sys_clusters[doc.doc_id] = doc_clusters

    if get_gold_clusters:
        return gold_clusters, sys_clusters
    else:
        return sys_clusters