# TODO CHECK SOMEWHERE ELSE if len(gold_list) == 0: # print("name " + str(names) + " does not match anything, discarding") if not skip: yield example continue gold_list = np.array(gold_list).astype(np.int32) # print(example["neighborhood"].entity_vertices.shape[0]) # print("projected " + str(example["gold_entities"]) + " to " + str(gold_list)) example["gold_entities"] = gold_list yield example epoch_iterator = train_file_iterator.iterate() epoch_iterator = candidate_generator.enrich(epoch_iterator) epoch_iterator = project_from_name_wrapper(epoch_iterator) all_seen_paths = defaultdict(int) for example in epoch_iterator: example_seen = {} for g in example["gold_entities"]: paths = example["neighborhood"].get_paths_to_neighboring_centroid(g) for path in paths: if len(path) == 4: label = path[1] + path[2] else: label = path[1] + path[2] + path[4] + path[5]
from helpers.read_conll_files import ConllReader import random reader = ConllReader( "/home/michael/Projects/QuestionAnswering/GCNQA/data/webquestions/train.internal.conll" ) sentences = [] for line in reader.iterate(): sentence = " ".join([w[1] for w in line["sentence"]]) sentences.append(sentence) subset = random.sample(sentences, 50) counter = {} for example in subset: print(example) classification = input() if classification not in counter: counter[classification] = 0 counter[classification] += 1 total = sum([v for k, v in counter.items()]) print("class\tcount\t%") for k, v in counter.items(): print(str(k) + ": \t" + str(v) + " \t" + str(v / total * 100))