Exemple #1
0
from zincbase import KB
from zincbase.utils.data_science import calc_auc_roc

kb = KB()

Xs = []
Ys = []
csvfile = csv.reader(open('./assets/countries_s3_test.csv', 'r'),
                     delimiter='\t')
for row in csvfile:
    Xs.append([row[0], row[1]])
    Ys.append(row[2])

kb.from_csv('./assets/countries_s3_train.csv', delimiter='\t')

kb.build_kg_model(cuda=True, embedding_size=1000, gamma=0.1)
kb.train_kg_model(steps=40000, batch_size=512, lr=0.000002, neg_to_pos=64)

y_true = []
sample = []
for ((head, relation), tail) in zip(Xs, Ys):
    for candidate_region in [
            'oceania', 'asia', 'europe', 'africa', 'americas'
    ]:
        y_true.append(1 if candidate_region == tail else 0)
        sample.append((kb._entity2id[head], kb._relation2id[relation],
                       kb._entity2id[candidate_region]))
sample = torch.LongTensor(sample).cuda()
with torch.no_grad():
    y_score, _ = kb._kg_model(sample)
    y_score = y_score.squeeze(1).cpu().numpy()
Exemple #2
0
"""Test the combination of edge attributes and negative examples."""

import context

from zincbase import KB

kb = KB()
kb.seed(555)

kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t')

# specifying both a ~ and a truthiness < 0 is probably unnecessary.
kb.store('~locatedin(canada, africa)', {'truthiness': -1.})

kb.build_kg_model(cuda=False,
                  embedding_size=100,
                  pred_attributes=['truthiness'])

kb.train_kg_model(steps=1000, batch_size=4, neg_ratio=0.01)

canada_in_africa = kb.estimate_triple_prob('canada', 'locatedin', 'africa')
canada_in_asia = kb.estimate_triple_prob('canada', 'locatedin', 'asia')
canada_in_america = kb.estimate_triple_prob('canada', 'locatedin',
                                            'northern_america')
assert 2 * canada_in_africa < canada_in_asia
assert canada_in_america > 2 * canada_in_asia

clafrica_truthiness = kb.estimate_triple_prob_with_attrs(
    'canada', 'locatedin', 'africa', 'truthiness')
clasia_truthiness = kb.estimate_triple_prob_with_attrs('canada', 'locatedin',
                                                       'asia', 'truthiness')
Exemple #3
0
from zincbase import KB

kb = KB()
kb.seed(555)

kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t')

rule_num = kb.store('~locatedin(canada, africa)')

b = list(kb.query('locatedin(canada, X)'))
assert len(b) == 1
assert b[0]['X'] == 'northern_america'
assert kb.delete_rule(rule_num)

kb.build_kg_model(cuda=False, embedding_size=100)

kb.train_kg_model(steps=500, batch_size=512, neg_ratio=0.01)

canada_in_africa_naive = kb.estimate_triple_prob('canada', 'locatedin',
                                                 'africa')
canada_in_asia_naive = kb.estimate_triple_prob('canada', 'locatedin', 'asia')

austria_neighbors_spain_naive = kb.estimate_triple_prob(
    'austria', 'neighbor', 'spain')
austria_neighbors_france_naive = kb.estimate_triple_prob(
    'austria', 'neighbor', 'france')

kb = KB()
kb.seed(555)
kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t')
Exemple #4
0
"""

from zincbase import KB

kb = KB()

kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t')

print(list(kb.query('locatedin(X, northern_europe)')))
# prints [{'X': 'norway'}, {'X': 'iceland'}, {'X': 'faroe_islands'}, ...]

print(list(kb.query('neighbor(austria, X)')))
# prints [{'X': 'italy'}, {'X': 'czechia'}, {'X': 'slovenia'}, ...]

kb.build_kg_model(cuda=True, embedding_size=100)

kb.train_kg_model(steps=1000, batch_size=512)  # takes < 1 minute

print(kb.estimate_triple_prob('mali', 'locatedin', 'africa'))
# prints a number close to 1

print(kb.get_most_likely('singapore', 'locatedin', '?', k=2))
# prints [{'prob': 0.9672, 'triple': ('singapore', 'locatedin', 'south_eastern_asia')}, ...]

print(kb.get_most_likely('austria', 'neighbor', '?', k=8))
# prints [{'prob': 0.9749, 'triple': ('austria', 'neighbor', 'liechtenstein')} ...]

kb.fit_knn()

print(kb.get_nearest_neighbors('uganda', k=4))
Exemple #5
0
# # # # # # # # # # # # # # # # # # # # # # # #
kb.attr('tom', {'owns_a_raincoat': 0.0, 'doesnt_own_raincoat': 1.0})
kb.attr('todd', {'owns_a_raincoat': 0.0, 'doesnt_own_raincoat': 1.0})
kb.attr('oleg', {'owns_a_raincoat': 0.0, 'doesnt_own_raincoat': 1.0})
kb.attr('john', {'owns_a_raincoat': 0.0, 'doesnt_own_raincoat': 1.0})
kb.attr('akshay', {'owns_a_raincoat': 0.0, 'doesnt_own_raincoat': 1.0})
kb.attr('vedant', {'owns_a_raincoat': 0.0, 'doesnt_own_raincoat': 1.0})
kb.attr('other1', {'owns_a_raincoat': 1.0, 'doesnt_own_raincoat': 0.0})
kb.attr('other2', {'owns_a_raincoat': 1.0, 'doesnt_own_raincoat': 0.0})
kb.attr('other3', {'owns_a_raincoat': 1.0, 'doesnt_own_raincoat': 0.0})
kb.attr('other4', {'owns_a_raincoat': 1.0, 'doesnt_own_raincoat': 0.0})
kb.attr('other5', {'owns_a_raincoat': 1.0, 'doesnt_own_raincoat': 0.0})
kb.attr('other6', {'owns_a_raincoat': 1.0, 'doesnt_own_raincoat': 0.0})

kb.build_kg_model(cuda=False,
                  embedding_size=30,
                  node_attributes=['owns_a_raincoat', 'doesnt_own_raincoat'],
                  attr_loss_to_graph_loss=0.9)
# Ideally use bs=1 to overfit on this small dataset
# bs=2 at least checks that it works with > 1 bs
kb.train_kg_model(steps=12001, batch_size=2, neg_to_pos=4)

# # # # # # # # # # # # # # # # # # # # # # # #
# People from Seattle should be more likely to
# own an umbrella (attribute prediction test)
# # # # # # # # # # # # # # # # # # # # # # # #
x = kb._kg_model.run_embedding(kb.get_embedding('other1'), 'owns_a_raincoat')
y = kb._kg_model.run_embedding(kb.get_embedding('other1'),
                               'doesnt_own_raincoat')
assert round(x) == 1
assert round(y) == 0
x = kb._kg_model.run_embedding(kb.get_embedding('other2'), 'owns_a_raincoat')