def test_predict(): model = DistMult(batches_count=2, seed=555, epochs=1, k=10, loss='pairwise', loss_params={'margin': 5}, optimizer='adagrad', optimizer_params={'lr': 0.1}) X = np.array([['a', 'y', 'b'], ['b', 'y', 'a'], ['a', 'y', 'c'], ['c', 'z', 'a'], ['a', 'z', 'd']]) model.fit(X) preds1 = model.predict(X) preds2 = model.predict(to_idx(X, model.ent_to_idx, model.rel_to_idx), from_idx=True) np.testing.assert_array_equal(preds1, preds2)
def test_oneton_adapter(): from ampligraph.evaluation.protocol import create_mappings, to_idx # Train set X = np.array([['a', 'p', 'b'], ['a', 'p', 'd'], ['c', 'p', 'd'], ['c', 'p', 'e'], ['c', 'p', 'f']]) # a, b, c, d, e, f O = np.array([ [0, 1, 0, 1, 0, 0], # (a, p) [0, 0, 0, 1, 1, 1] ]) # (c, p) # Test T = np.array([['a', 'p', 'c'], ['c', 'p', 'b']]) # a, b, c, d, e, f OT1 = np.array([ [ 0, 1, 0, 1, 0, 0 ], # (a, p) # test set onehots when output mapping is from train set [0, 0, 0, 1, 1, 1] ]), # (c, p) OT2 = np.array([ [ 0, 0, 1, 0, 0, 0 ], # (a, p) # test set onehots when output mapping is from test set [0, 1, 0, 0, 0, 0] ]), # (c, p) # Filter filter = np.concatenate((X, T)) # a, b, c, d, e, f OF = np.array([ [0, 1, 1, 1, 0, 0], # (a, p) # train set onehots when output mapping is from filter [0, 1, 0, 1, 1, 1] ]) # (c, p) # Expected input tuple to filtered outputs OF_map = {(0, 0): [0, 1, 1, 1, 0, 0], (2, 0): [0, 1, 0, 1, 1, 1]} rel_to_idx, ent_to_idx = create_mappings(X) X = to_idx(X, ent_to_idx, rel_to_idx) adapter = OneToNDatasetAdapter() adapter.use_mappings(rel_to_idx, ent_to_idx) adapter.set_data(X, 'train', mapped_status=True) adapter.set_data(T, 'test', mapped_status=False) # Adapter internally maps test set assert (adapter.mapped_status['test'] == True) # Generate output map train_output_map = adapter.generate_output_mapping('train') # Assert all unique sp pairs are in the output_map keys unique_sp = set([(s, p) for s, p in X[:, [0, 1]]]) for sp in train_output_map.keys(): assert (sp in unique_sp) # ValueError if generating onehot outputs before output_mapping is set with pytest.raises(ValueError): adapter.generate_outputs('train') adapter.set_output_mapping(train_output_map) adapter.generate_outputs('train') train_iter = adapter.get_next_batch(batches_count=1, dataset_type='train', use_filter=False) triples, onehot = next(train_iter) assert np.all(np.unique(X[:, [0, 1]], axis=0) == triples[:, [0, 1]]) assert np.all(O == onehot) test_iter = adapter.get_next_batch(batches_count=1, dataset_type='test', use_filter=False) triples, onehot = next(test_iter) assert np.all(np.unique(X[:, [0, 1]], axis=0) == triples[:, [0, 1]]) assert np.all(OT1 == onehot) # Generate test output map test_output_map = adapter.generate_output_mapping('test') adapter.set_output_mapping(test_output_map) test_iter = adapter.get_next_batch(batches_count=1, dataset_type='test', use_filter=False) triples, onehot = next(test_iter) assert np.all(np.unique(X[:, [0, 1]], axis=0) == triples[:, [0, 1]]) assert np.all(OT2 == onehot) # Train onehot outputs with filter=True adapter.set_filter(filter_triples=filter) train_iter = adapter.get_next_batch(batches_count=1, dataset_type='train', use_filter=True) triples, onehot = next(train_iter) assert np.all(np.unique(X[:, [0, 1]], axis=0) == triples[:, [0, 1]]) assert np.all(OF == onehot) ## Test adapter clear_outputs assert (len(adapter.filtered_status) > 0) adapter.clear_outputs() assert (len(adapter.filtered_status) == 0) # Test verify_outputs adapter.clear_outputs() adapter.generate_outputs('train', use_filter=False, unique_pairs=False) assert adapter.verify_outputs('train', use_filter=False, unique_pairs=False) == True assert adapter.verify_outputs('train', use_filter=True, unique_pairs=True) == False assert adapter.verify_outputs('train', use_filter=True, unique_pairs=False) == False assert adapter.verify_outputs('train', use_filter=False, unique_pairs=True) == False adapter.clear_outputs() adapter.generate_outputs('train', use_filter=True, unique_pairs=True) assert adapter.verify_outputs('train', use_filter=False, unique_pairs=False) == False assert adapter.verify_outputs('train', use_filter=True, unique_pairs=True) == True assert adapter.verify_outputs('train', use_filter=True, unique_pairs=False) == False assert adapter.verify_outputs('train', use_filter=False, unique_pairs=True) == False # Test batch output shapes adapter.clear_outputs() train_iter = adapter.get_next_batch(batches_count=1, dataset_type='train', use_filter=True, unique_pairs=True) out, triples = next(train_iter) assert out.shape[0] == 2 assert triples.shape[0] == 2 adapter.clear_outputs() train_iter = adapter.get_next_batch(batches_count=1, dataset_type='train', use_filter=True, unique_pairs=False) out, triples = next(train_iter) assert out.shape[0] == 5 assert triples.shape[0] == 5 # Test batch subject corruptions batch_size = 3 batch_iter = adapter.get_next_batch_subject_corruptions( batch_size=batch_size, dataset_type='train', use_filter=True) triples, out, out_onehot = next(batch_iter) assert np.all( X == triples ) # only 1 relation in X, so triples should be the same (ignores batch_size) assert triples.shape[1] == 3 # triples should be triples! assert out.shape[1] == 3 # batch out should also be triples! assert out.shape[ 0] == batch_size # batch dimension of out should equal batch_size assert out_onehot.shape[ 0] == batch_size # Onehot batch_dimension should equal batch size assert out_onehot.shape[0] == out.shape[ 0] # .. and should be same size as number of unique entities assert out_onehot.shape[1] == len(adapter.ent_to_idx) adapter.clear_outputs() batch_iter = adapter.get_next_batch_subject_corruptions( batch_size=-1, dataset_type='train', use_filter=True) triples, out, out_onehot = next(batch_iter) assert np.all( X == triples) # only 1 relation in X, so triples should be the same assert triples.shape[1] == 3 # triples should be triples! assert out.shape[1] == 3 # batch out should also be triples! assert out.shape[0] == len( adapter.ent_to_idx ) # batch_size=-1 (entire set), so the batch_dim should equal ents assert out_onehot.shape[0] == out_onehot.shape[ 1] # Onehot should be a square matrix assert out_onehot.shape[0] == out.shape[ 0] # .. and should be same size as number of unique entities assert out_onehot.shape[1] == len(adapter.ent_to_idx) # Verify that onehot outputs are as expected in the out array (or if not present in OF_Map then is a zero vector) for idx, (s, p, o) in enumerate(out): if (s, p) in OF_map.keys(): onehot = OF_map[(s, p)] assert np.all(onehot == out_onehot[idx]) else: assert np.all(out_onehot[idx] == 0)