# TODO CHECK SOMEWHERE ELSE
        if len(gold_list) == 0:
            # print("name " + str(names) + " does not match anything, discarding")
            if not skip:
                yield example

            continue

        gold_list = np.array(gold_list).astype(np.int32)
        # print(example["neighborhood"].entity_vertices.shape[0])
        # print("projected " + str(example["gold_entities"]) + " to " + str(gold_list))
        example["gold_entities"] = gold_list
        yield example


epoch_iterator = train_file_iterator.iterate()
epoch_iterator = candidate_generator.enrich(epoch_iterator)
epoch_iterator = project_from_name_wrapper(epoch_iterator)

all_seen_paths = defaultdict(int)

for example in epoch_iterator:
    example_seen = {}
    for g in example["gold_entities"]:
        paths = example["neighborhood"].get_paths_to_neighboring_centroid(g)
        for path in paths:
            if len(path) == 4:
                label = path[1] + path[2]
            else:
                label = path[1] + path[2] + path[4] + path[5]
예제 #2
0
from helpers.read_conll_files import ConllReader
import random

reader = ConllReader(
    "/home/michael/Projects/QuestionAnswering/GCNQA/data/webquestions/train.internal.conll"
)
sentences = []
for line in reader.iterate():
    sentence = " ".join([w[1] for w in line["sentence"]])
    sentences.append(sentence)

subset = random.sample(sentences, 50)

counter = {}

for example in subset:
    print(example)
    classification = input()

    if classification not in counter:
        counter[classification] = 0

    counter[classification] += 1

total = sum([v for k, v in counter.items()])
print("class\tcount\t%")
for k, v in counter.items():
    print(str(k) + ":   \t" + str(v) + "   \t" + str(v / total * 100))