def set_test_file(self, test_file_location):
     self.test_file_iterator = ConllReader(
         test_file_location,
         self.kb_prefix,
         max_elements=self.max_elements_to_read,
         disambiguation=self.disambiguation,
         score_transform=self.score_transform)
    def construct_evaluator(self, settings, file):
        evaluation_type = settings["evaluation"]["type"]
        method = settings["evaluation"]["method"]
        prefix = settings["endpoint"]["prefix"] if "prefix" in settings[
            "endpoint"] else ""
        file_reader = ConllReader(settings["dataset"][file],
                                  entity_prefix=prefix)

        if evaluation_type == "cutoff":
            cutoff = float(settings["evaluation"]["cutoff"])
            evaluator = Evaluator(file_reader,
                                  cutoff,
                                  self.logger,
                                  method=method)

        return evaluator
database = HypergraphInterface(database_interface,
                               expansion_strategy,
                               prefix=prefix)

candidate_generator = NeighborhoodCandidateGenerator(
    database, neighborhood_search_scope=1, extra_literals=True)

e_indexer = LazyIndexer((40000, 1))
r_indexer = FreebaseRelationIndexer((6000, 1), 10)

database = IndexedInterface(database, e_indexer, r_indexer)
candidate_generator = CandidateGeneratorCache(candidate_generator,
                                              disk_cache=disk_cache)

train_file_iterator = ConllReader("data/webquestions/train.split.conll",
                                  entity_prefix=prefix)


def project_from_name_wrapper(iterator, skip=True):
    for example in iterator:
        names = example["gold_entities"]
        graph = example["neighborhood"]
        name_projection_dictionary = graph.get_inverse_name_connections(names)

        gold_list = []
        for name, l in name_projection_dictionary.items():
            if len(l) > 0:
                gold_list.extend(l)
            elif graph.has_index(name):
                gold_list.append(graph.to_index(name))
Exemple #4
0
from experiment_construction.candidate_generator_construction.candidate_generator_cache import CandidateGeneratorCache
from helpers.read_conll_files import ConllReader

database_interface = FreebaseInterface()
expansion_strategy = OnlyFreebaseExpansionStrategy()

prefix = "http://rdf.freebase.com/ns/"
disk_cache = "/datastore/michael_cache/webquestions.1neighbors.cache"

database = HypergraphInterface(database_interface,
                               expansion_strategy,
                               prefix=prefix)

candidate_generator = NeighborhoodCandidateGenerator(
    database, neighborhood_search_scope=1, extra_literals=True)
candidate_generator = CandidateGeneratorCache(candidate_generator,
                                              disk_cache=disk_cache)

train_file_iterator = ConllReader("data/webquestions/train.split.conll")
epoch_iterator = train_file_iterator.iterate()
epoch_iterator = candidate_generator.enrich(epoch_iterator)

for example in epoch_iterator:
    for edge in example["neighborhood"].entity_to_event_edges:
        print(edge[1])

    for edge in example["neighborhood"].event_to_entity_edges:
        print(edge[1])

    for edge in example["neighborhood"].entity_to_entity_edges:
        print(edge[1])
Exemple #5
0
from helpers.read_conll_files import ConllReader
import random

reader = ConllReader(
    "/home/michael/Projects/QuestionAnswering/GCNQA/data/webquestions/train.internal.conll"
)
sentences = []
for line in reader.iterate():
    sentence = " ".join([w[1] for w in line["sentence"]])
    sentences.append(sentence)

subset = random.sample(sentences, 50)

counter = {}

for example in subset:
    print(example)
    classification = input()

    if classification not in counter:
        counter[classification] = 0

    counter[classification] += 1

total = sum([v for k, v in counter.items()])
print("class\tcount\t%")
for k, v in counter.items():
    print(str(k) + ":   \t" + str(v) + "   \t" + str(v / total * 100))
 def __init__(self, validation_file_location, prefix):
     self.validation_file_iterator = ConllReader(validation_file_location,
                                                 entity_prefix=prefix)
     self.evaluator = Evaluator(self.validation_file_iterator)