def set_test_file(self, test_file_location): self.test_file_iterator = ConllReader( test_file_location, self.kb_prefix, max_elements=self.max_elements_to_read, disambiguation=self.disambiguation, score_transform=self.score_transform)
def construct_evaluator(self, settings, file): evaluation_type = settings["evaluation"]["type"] method = settings["evaluation"]["method"] prefix = settings["endpoint"]["prefix"] if "prefix" in settings[ "endpoint"] else "" file_reader = ConllReader(settings["dataset"][file], entity_prefix=prefix) if evaluation_type == "cutoff": cutoff = float(settings["evaluation"]["cutoff"]) evaluator = Evaluator(file_reader, cutoff, self.logger, method=method) return evaluator
database = HypergraphInterface(database_interface, expansion_strategy, prefix=prefix) candidate_generator = NeighborhoodCandidateGenerator( database, neighborhood_search_scope=1, extra_literals=True) e_indexer = LazyIndexer((40000, 1)) r_indexer = FreebaseRelationIndexer((6000, 1), 10) database = IndexedInterface(database, e_indexer, r_indexer) candidate_generator = CandidateGeneratorCache(candidate_generator, disk_cache=disk_cache) train_file_iterator = ConllReader("data/webquestions/train.split.conll", entity_prefix=prefix) def project_from_name_wrapper(iterator, skip=True): for example in iterator: names = example["gold_entities"] graph = example["neighborhood"] name_projection_dictionary = graph.get_inverse_name_connections(names) gold_list = [] for name, l in name_projection_dictionary.items(): if len(l) > 0: gold_list.extend(l) elif graph.has_index(name): gold_list.append(graph.to_index(name))
from experiment_construction.candidate_generator_construction.candidate_generator_cache import CandidateGeneratorCache from helpers.read_conll_files import ConllReader database_interface = FreebaseInterface() expansion_strategy = OnlyFreebaseExpansionStrategy() prefix = "http://rdf.freebase.com/ns/" disk_cache = "/datastore/michael_cache/webquestions.1neighbors.cache" database = HypergraphInterface(database_interface, expansion_strategy, prefix=prefix) candidate_generator = NeighborhoodCandidateGenerator( database, neighborhood_search_scope=1, extra_literals=True) candidate_generator = CandidateGeneratorCache(candidate_generator, disk_cache=disk_cache) train_file_iterator = ConllReader("data/webquestions/train.split.conll") epoch_iterator = train_file_iterator.iterate() epoch_iterator = candidate_generator.enrich(epoch_iterator) for example in epoch_iterator: for edge in example["neighborhood"].entity_to_event_edges: print(edge[1]) for edge in example["neighborhood"].event_to_entity_edges: print(edge[1]) for edge in example["neighborhood"].entity_to_entity_edges: print(edge[1])
from helpers.read_conll_files import ConllReader import random reader = ConllReader( "/home/michael/Projects/QuestionAnswering/GCNQA/data/webquestions/train.internal.conll" ) sentences = [] for line in reader.iterate(): sentence = " ".join([w[1] for w in line["sentence"]]) sentences.append(sentence) subset = random.sample(sentences, 50) counter = {} for example in subset: print(example) classification = input() if classification not in counter: counter[classification] = 0 counter[classification] += 1 total = sum([v for k, v in counter.items()]) print("class\tcount\t%") for k, v in counter.items(): print(str(k) + ": \t" + str(v) + " \t" + str(v / total * 100))
def __init__(self, validation_file_location, prefix): self.validation_file_iterator = ConllReader(validation_file_location, entity_prefix=prefix) self.evaluator = Evaluator(self.validation_file_iterator)