def make_unit_network( df, charge_types, target_charge_type='charge_1_description_category_macro'): lead_charge_code = 'lead_charge_code' lead_charge_code_type = f'{lead_charge_code}_type' kb = KB() kb.name = 'cpd' data = df[[ 'lead_charge_police_related', 'beat', 'unit', 'arrest_time', target_charge_type, lead_charge_code ]].copy(deep=True) for charge_type in charge_types: if charge_type == 'Felony': data[lead_charge_code_type] = np.where(data[lead_charge_code] > 7, charge_type, "None") elif charge_type == 'Misdemeanor': data[lead_charge_code_type] = np.where( (data[lead_charge_code] > 4) & (data[lead_charge_code] <= 7), charge_type, data[lead_charge_code_type]) elif charge_type == 'Petty or Other': data[lead_charge_code_type] = np.where( (data[lead_charge_code] > 0) & (data[lead_charge_code] <= 4), charge_type, data[lead_charge_code_type]) elif charge_type == 'Not Specified': data[lead_charge_code_type] = np.where( (data[lead_charge_code] < 0), 'Not Specified', data[lead_charge_code_type]) keys = ['unit', 'beat'] for key in keys: fill_val = '000' if key == 'unit' else '0000' if key == 'beat' else 'None' data[key] = data[key].fillna(fill_val).astype(str) key_data = data[key].unique().tolist() key_data.sort() for i in key_data: kb.store(f'isA({i},{key})') unit_nodes = data[['unit', 'beat']].drop_duplicates() unit_nodes = list(zip(unit_nodes['unit'], unit_nodes['beat'])) for (unit, beat) in unit_nodes: kb.store(f'assignedTo({beat}, {unit})')
There's no pressing need to do this using Zincbase -- it's just a demo. """ import random import cv2 import numpy as np from PIL import Image import torch from zincbase import KB MAX_Y = 100 MAX_X = 100 kb = KB() for y in range(MAX_Y): for x in range(MAX_X): state = torch.bernoulli(torch.tensor([0.4])).int().item() kb.store(f'cell({(y * MAX_X) + x})', node_attributes=[{'x': x, 'y': y, 'state': state}]) node = kb.node((y * MAX_X) + x) for y in range(MAX_Y): for x in range(MAX_X): neighbors = ((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 1), (1, -1), (1, 0), (1, 1)) for neighbor in neighbors: y_n = y + neighbor[0] x_n = x + neighbor[1]
import context from zincbase import KB kb = KB() kb.store('a(b,c)') kb.node('b')['is_letter'] = 1.0 assert kb.node('b').attrs == {'is_letter': 1.0} assert 'is_letter' in kb.node('b') kb.node('b')['is_letter'] = 2.0 del kb.node('b')['is_letter'] assert 'is_letter' not in kb.node('b').attrs kb.node('b')['is_letter'] = 2.0 assert kb.node('b').attrs == {'is_letter': 2.0} kb.edge('b', 'a', 'c').both_alpha = 1.0 assert kb.edge('b', 'a', 'c').attrs == {'both_alpha': 1.0} kb.edge('b', 'a', 'c')['both_alpha'] = 2.0 assert kb.edge('b', 'a', 'c').attrs == {'both_alpha': 2.0} assert kb.to_triples() == [('b', 'a', 'c')] triples = kb.to_triples(data=True) assert triples == [('b', 'a', 'c', { 'is_letter': 2.0 }, { 'both_alpha': 2.0 }, {}, False)] kb.node('c').is_letter = 0.9 triples = kb.to_triples(data=True) assert triples == [('b', 'a', 'c', { 'is_letter': 2.0 }, { 'both_alpha': 2.0 }, {
import context from zincbase import KB kb = KB() kb.store('bought_ticket(tom)') rule_num = kb.store('winner(X) :- bought_ticket(X), had_correct_numbers(X)') assert str(kb.rule('winner(X)')) == 'winner(X)' assert kb.rule(rule_num) == kb.rule('winner(X)') assert list(kb.query('winner(X)')) == [] fake_lottery_win = kb.store('had_correct_numbers(tom)') assert list(kb.query('winner(X)')) == [{'X': 'tom'}] kb.delete_rule(fake_lottery_win) assert list(kb.query('winner(X)')) == [] tom = kb.node('tom') possible_winner_called = 0 def possible_winner(me, affected_nodes, node_that_changed, attr_changed, cur_val, prev_val): global possible_winner_called if cur_val != 6: possible_winner_called += 1 return False kb.rule(rule_num).on_change = possible_winner
"""Test negative examples using Countries. The main idea here is that if we explicitly enter some false facts (signalling to the KB that they are false, it should make less-wrong predictions for them, versus just going by its own synthetic negative examples.) It may have the side effect of pushing UP the probability of other wrong triples, see e.g. "canada in asia" below. """ import context from zincbase import KB kb = KB() kb.seed(555) kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t') rule_num = kb.store('~locatedin(canada, africa)') b = list(kb.query('locatedin(canada, X)')) assert len(b) == 1 assert b[0]['X'] == 'northern_america' assert kb.delete_rule(rule_num) kb.build_kg_model(cuda=False, embedding_size=100) kb.train_kg_model(steps=500, batch_size=512, neg_ratio=0.01) canada_in_africa_naive = kb.estimate_triple_prob('canada', 'locatedin', 'africa')
size (MAX_Y & MAX_X in the code below), that's probably going to be too much. """ import random import sys import textwrap import time import cv2 import numpy as np from PIL import Image import torch from zincbase import KB kb = KB() MAX_Y = 100 MAX_X = 100 try: recursion_limit = int(sys.argv[1]) except IndexError: print( textwrap.dedent( """\nSetting recursion limit to 15. This is cautious and slow. \ Specify a first arg to change it, e.g. `python3 abelian_sandpile.py 10000 5000`""" )) recursion_limit = 15 try: propagation_limit = int(sys.argv[2])
"""Test the combination of edge attributes and negative examples.""" import context from zincbase import KB kb = KB() kb.seed(555) kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t') # specifying both a ~ and a truthiness < 0 is probably unnecessary. kb.store('~locatedin(canada, africa)', {'truthiness': -1.}) kb.build_kg_model(cuda=False, embedding_size=100, pred_attributes=['truthiness']) kb.train_kg_model(steps=1000, batch_size=4, neg_ratio=0.01) canada_in_africa = kb.estimate_triple_prob('canada', 'locatedin', 'africa') canada_in_asia = kb.estimate_triple_prob('canada', 'locatedin', 'asia') canada_in_america = kb.estimate_triple_prob('canada', 'locatedin', 'northern_america') assert 2 * canada_in_africa < canada_in_asia assert canada_in_america > 2 * canada_in_asia clafrica_truthiness = kb.estimate_triple_prob_with_attrs( 'canada', 'locatedin', 'africa', 'truthiness') clasia_truthiness = kb.estimate_triple_prob_with_attrs('canada', 'locatedin', 'asia', 'truthiness')
import types import context from zincbase import KB kb = KB() b = kb.store('c(x)') assert b == 0 b = kb.query('c(X)') assert isinstance(b, types.GeneratorType) b = list(b) assert len(b) == 1 assert b[0]['X'] == 'x' assert kb.node(b[0]['X']) == {} b = kb.store('c(y)') assert b == 1 b = kb.delete_rule(1) assert b assert not list(kb.query('c(y)')) b = kb.store('c(y)') assert b == 1 b = kb.query('c(X)') b = list(b) assert len(b) == 2 assert b[0]['X'] in ('x', 'y') assert b[1]['X'] in ('x', 'y') assert b[0]['X'] != b[1]['X'] assert kb.node(b[0]['X']) == {} assert kb.node(b[1]['X']) == {} b = kb.store('loves(tom, shamala)') assert 'tom' in kb.G
import context from zincbase import KB kb = KB() kb.store('a(b,c)') kb.attr('b', {'is_letter': 1.0}) assert kb.node('b') == {'is_letter': 1.0} kb.edge_attr('b', 'a', 'c', {'both_alpha': 1.0}) assert kb.edge('b', 'a', 'c') == {'both_alpha': 1.0} assert kb.to_triples() == [('b', 'a', 'c')] triples = kb.to_triples(data=True) assert triples == [('b', 'a', 'c', {'is_letter': 1.0}, {'both_alpha': 1.0}, {}, False)] kb.attr('c', {'is_letter': 0.9}) triples = kb.to_triples(data=True) assert triples == [('b', 'a', 'c', {'is_letter': 1.0}, {'both_alpha': 1.0}, {'is_letter': 0.9}, False)] neg_rule_idx = kb.store('~a(b,c)') triples = kb.to_triples(data=True) assert triples == [('b', 'a', 'c', {'is_letter': 1.0}, {'both_alpha': 1.0}, {'is_letter': 0.9}, True)] kb.delete_rule(neg_rule_idx) triples = kb.to_triples(data=True) assert triples == [('b', 'a', 'c', {'is_letter': 1.0}, {'both_alpha': 1.0}, {'is_letter': 0.9}, False)] kb.edge_attr('b', 'a', 'c', {'truthiness':-1}) triples = kb.to_triples(data=True) assert triples == [('b', 'a', 'c', {'is_letter': 1.0}, {'both_alpha': 1.0, 'truthiness': -1}, {'is_letter': 0.9}, True)] kb.delete_edge_attr('b', 'a', 'c', ['truthiness']) triples = kb.to_triples(data=True) assert triples == [('b', 'a', 'c', {'is_letter': 1.0}, {'both_alpha': 1.0}, {'is_letter': 0.9}, False)] print('All graph tests passed.')
import context from zincbase import KB kb = KB() kb.store('connected(node1, node2)') node1 = kb.node('node1') was_called = False def hello_neighbor(new_neighbor): global was_called was_called = True assert new_neighbor == 'node3' node1.watch_for_new_neighbor(hello_neighbor) kb.store('connected(node1, node3)') assert was_called node1.grains = 0 def watch_fn(node, prev_val): for n, predicate in node.neighbors: kb.node(n).grains += 1 node1_watch = node1.watch('grains', watch_fn) node1 = kb.node('node1') assert 'grains' in node1._watches kb.store('connected(node3, node4)') node2, node3, node4 = kb.node('node2'), kb.node('node3'), kb.node('node4')
import context from zincbase import KB kb = KB() kb.seed(555) kb.store('person(tom)') kb.store('person(shamala)') kb.store('knows(tom, shamala)') assert kb.neighbors('tom') == [('shamala', [{'pred': 'knows'}])] kb.node('tom')['grains'] = 0 tom = kb.node('tom') assert tom.grains == 0 assert tom.i_dont_exist is None assert tom['i_dont_exist'] is None kb.node('shamala').grains = 4 shamala = kb.node('shamala') assert 'grains' in shamala assert 'grains' in shamala.attrs assert shamala.grains == 4 shamala.grains += 1 assert shamala.grains == 5 assert shamala['grains'] == 5 shamala['grains'] += 1 assert shamala['grains'] == 6 kb.store('person(jeraca)')
"""This example shows a simple graph with updating attributes. Usage: run `python -m zincbase.web` & `python basic.py` Open your web browser to localhost:5000 """ import random import time import torch from zincbase import KB from zincbase.web import GraphCaster kb = KB() g = GraphCaster() g.reset() kb.store('node(1)', node_attributes=[{'color': 0x00ff00}]) kb.store('node(2)', node_attributes=[{'color': 0x0000ff}]) kb.store('node(3)', node_attributes=[{'color': 0xff00ff}]) kb.store('node(4)', node_attributes=[{'color': 0xffee11}]) kb.store('edge(1, 2)', edge_attributes={'edge_attr': 1}) kb.store('edge(2, 3)', edge_attributes={'edge_attr': 2}) kb.store('edge(2, 4)', edge_attributes={'edge_attr': 3}) g.from_kb(kb) g.render(node_color='node => node.color', arrow_size=2, node_opacity=1, node_label='color', label_node=True,
import context from zincbase import KB kb = KB() kb.seed(555) kb.store('person(tom)') kb.store('person(shamala)') kb.store('knows(tom, shamala)') assert kb.neighbors('tom') == [('shamala', [{'pred': 'knows'}])] kb.attr('tom', {'grains': 0}) tom = kb.node('tom') assert tom.grains == 0 assert tom.i_dont_exist is None assert tom['i_dont_exist'] is None kb.attr('shamala', {'grains': 4}) shamala = kb.node('shamala') assert shamala.grains == 4 shamala.grains += 1 assert shamala.grains == 5 assert shamala['grains'] == 5 shamala['grains'] += 1 assert shamala['grains'] == 6 kb.store('person(jeraca)') kb.attr('jeraca', {'grains': 3})
# Neural Network tests # Possible that tests could flake given stochasticity of NN # but they are fairly relaxed. import context from zincbase import KB kb = KB() kb.seed(555) people = ['john', 'oleg', 'tom', 'vedant', 'akshay', 'todd'] for person in people: kb.store('works_at({}, primer)'.format(person)) other_people = ['other1', 'other2', 'other3', 'other4', 'other5', 'other6'] for person in other_people: kb.store('works_at({}, zillow)'.format(person)) kb.store('based_in(primer, bay_area)') kb.store('based_in(zillow, seattle)') for person in people: kb.store('lives_in({}, bay_area)'.format(person)) for person in other_people: kb.store('lives_in({}, seattle)'.format(person)) kb.store('knows(tom, john)') kb.store('knows(tom, oleg)') kb.store('knows(akshay, john)') kb.store('knows(tom, todd)') kb.store('knows(vedant, akshay)')
"""Demo operations on the `countries` dataset TODO: It would be interesting to put lat, lng as attributes on the entities """ from zincbase import KB kb = KB() kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t') print(list(kb.query('locatedin(X, northern_europe)'))) # prints [{'X': 'norway'}, {'X': 'iceland'}, {'X': 'faroe_islands'}, ...] print(list(kb.query('neighbor(austria, X)'))) # prints [{'X': 'italy'}, {'X': 'czechia'}, {'X': 'slovenia'}, ...] kb.build_kg_model(cuda=True, embedding_size=100) kb.train_kg_model(steps=1000, batch_size=512) # takes < 1 minute print(kb.estimate_triple_prob('mali', 'locatedin', 'africa')) # prints a number close to 1 print(kb.get_most_likely('singapore', 'locatedin', '?', k=2)) # prints [{'prob': 0.9672, 'triple': ('singapore', 'locatedin', 'south_eastern_asia')}, ...] print(kb.get_most_likely('austria', 'neighbor', '?', k=8)) # prints [{'prob': 0.9749, 'triple': ('austria', 'neighbor', 'liechtenstein')} ...]
# Example: Using Zincbase to create a Rules Engine # Our clothing shop is going to run itself. from zincbase import KB kb = KB() # Our shop sells 2 SKUs, a tshirt, and jeans. Each has some stock. kb.store('sku(tshirt)', node_attributes=[{'inventory': 10}]) kb.store('sku(jeans)', node_attributes=[{'inventory': 3}]) # Customers can buy them individually or as an outfit. kb.store('top(tshirt)') kb.store('bottom(jeans)') rule_num = kb.store('outfit(X, Y) :- sku(X), sku(Y), top(X), bottom(Y)') # grab the stored nodes and rule for later use tshirt = kb.node('tshirt') jeans = kb.node('jeans') outfit = kb.rule(rule_num) # Set the initial stock level of outfits outfit.inventory = min(tshirt.inventory, jeans.inventory) # Print our initial stock levels def print_stock():
from zincbase import KB from zincbase.utils.data_science import calc_mrr kb = KB() kb.from_csv('./assets/fb15k_train_mod.txt', delimiter='\t') kb.build_kg_model(cuda=True, embedding_size=1000, gamma=24) kb.train_kg_model(steps=20000, batch_size=2048, neg_to_pos=128) mrr = calc_mrr(kb, './assets/fb15k_test_mod.txt', delimiter='\t') # add optional `size` kwarg since eval is currently slow. print(mrr) # should be ~0.797 to match the paper.
import context from zincbase import KB kb = KB() kb.store('append([ ], List, List)') kb.store( 'append([Head | Tail], List, [Head | Result]) :- append(Tail, List, Result)' ) b = kb.query('append([a, b], [c, d], X)') b = list(b) assert len(b) == 1 assert b[0]['X'] == '[a,b,c,d]' b = kb.query('append([a, b], X, [a, b, c, d])') b = list(b) assert len(b) == 1 assert b[0]['X'] == '[c,d]' b = kb.query('append(X, Y, [a, b, c, d])') b = list(b) assert len(b) == 5 assert b[0]['X'] == '[]' assert b[0]['Y'] == '[a,b,c,d]' assert b[1]['X'] == '[a]' assert b[1]['Y'] == '[b,c,d]' assert b[2]['X'] == '[a,b]' assert b[2]['Y'] == '[c,d]' assert b[3]['X'] == '[a,b,c]' assert b[3]['Y'] == '[d]' assert b[4]['X'] == '[a,b,c,d]' assert b[4]['Y'] == '[]'
"""Runs evaluation on the Countries S3 dataset to reproduce the results from the RotatE paper. Follows the paper's authors' methodology for calculating the AUC PR. """ import csv import numpy as np from sklearn.metrics import average_precision_score import torch from zincbase import KB from zincbase.utils.data_science import calc_auc_roc kb = KB() Xs = [] Ys = [] csvfile = csv.reader(open('./assets/countries_s3_test.csv', 'r'), delimiter='\t') for row in csvfile: Xs.append([row[0], row[1]]) Ys.append(row[2]) kb.from_csv('./assets/countries_s3_train.csv', delimiter='\t') kb.build_kg_model(cuda=True, embedding_size=1000, gamma=0.1) kb.train_kg_model(steps=40000, batch_size=512, lr=0.000002, neg_to_pos=64) y_true = []
# Neural Network tests # Possible that tests could flake given stochasticity of NN # but they are fairly relaxed. import context from zincbase import KB kb = KB() kb.seed(555) people = ['john', 'oleg', 'tom', 'vedant', 'akshay', 'todd'] for person in people: kb.store('works_at({}, primer)'.format(person)) other_people = ['other1', 'other2', 'other3', 'other4', 'other5', 'other6'] for person in other_people: kb.store('works_at({}, zillow)'.format(person)) kb.store('based_in(primer, bay_area)') kb.store('based_in(zillow, seattle)') for person in people: kb.store('lives_in({}, bay_area)'.format(person)) for person in other_people: kb.store('lives_in({}, seattle)'.format(person)) kb.store('knows(tom, john)'); kb.store('knows(tom, oleg)'); kb.store('knows(akshay, john)'); kb.store('knows(tom, todd)'); kb.store('knows(vedant, akshay)') kb.store('knows(other2, other1)'); kb.store('knows(other6, other5)'); kb.store('knows(other1, other2)'); kb.store('knows(other4, other3)'); kb.store('knows(other3, other2)') kb.store('knows(tom, other4)') kb.store('lives_in(shamala, bay_area)'); kb.store('lives_in(mary, seattle)') kb.store('associated_with(zillow, amazon)'); kb.store('associated_with(primer, google)')