コード例 #1
0
            yield t


def store(triples, full_path):
    with open(full_path, 'w') as writer:
        for t in triples:
            s, p, o = t
            t_str = s + '\t' + p + '\t' + o + '\n'
            writer.write(t_str)


kg = ['WN18RR', 'FB15k-237', 'YAGO3-10']
for i in kg:
    data_dir = f'KGs/{i}/'
    clean_data_dir = f'KGs/{i}*/'
    dataset = Dataset(data_dir=data_dir)
    print(data_dir)

    clean_valid_set = clean_dataset(
        dataset.valid_data,
        entities=dataset.get_entities(dataset.train_data),
        relations=dataset.get_relations(dataset.train_data))
    clean_test_set = clean_dataset(
        dataset.test_data,
        entities=dataset.get_entities(dataset.train_data),
        relations=dataset.get_relations(dataset.train_data))

    store(dataset.train_data, clean_data_dir + 'train.txt')  # Train set
    store(clean_valid_set, clean_data_dir + 'valid.txt')  # Cleaned valid set
    store(clean_test_set, clean_data_dir + 'test.txt')  # Clean test set
from util.data import Dataset

kg = ['WN18RR', 'FB15k-237', 'YAGO3-10']
for i in kg:
    dataset = Dataset(data_dir=f'KGs/{i}/')
    # Get all entities from train set.
    entities = set(dataset.get_entities(dataset.train_data))
    dataset.describe_oov(dataset.test_data, entities, info=f'{i}-Test set')
    dataset.describe_oov(dataset.valid_data, entities, info=f'{i}-Val set')

# Cleaned datasets
kg = ['WN18RR*', 'FB15k-237*', 'YAGO3-10*']
for i in kg:
    dataset = Dataset(data_dir=f'KGs/{i}/')
    # Get all entities from train set.
    entities = set(dataset.get_entities(dataset.train_data))
    dataset.describe_oov(dataset.test_data, entities, info=f'{i}-Test set')
    dataset.describe_oov(dataset.valid_data, entities, info=f'{i}-Val set')