def make_unit_network(
        df,
        charge_types,
        target_charge_type='charge_1_description_category_macro'):
    lead_charge_code = 'lead_charge_code'
    lead_charge_code_type = f'{lead_charge_code}_type'

    kb = KB()
    kb.name = 'cpd'
    data = df[[
        'lead_charge_police_related', 'beat', 'unit', 'arrest_time',
        target_charge_type, lead_charge_code
    ]].copy(deep=True)

    for charge_type in charge_types:
        if charge_type == 'Felony':
            data[lead_charge_code_type] = np.where(data[lead_charge_code] > 7,
                                                   charge_type, "None")
        elif charge_type == 'Misdemeanor':
            data[lead_charge_code_type] = np.where(
                (data[lead_charge_code] > 4) & (data[lead_charge_code] <= 7),
                charge_type, data[lead_charge_code_type])
        elif charge_type == 'Petty or Other':
            data[lead_charge_code_type] = np.where(
                (data[lead_charge_code] > 0) & (data[lead_charge_code] <= 4),
                charge_type, data[lead_charge_code_type])
        elif charge_type == 'Not Specified':
            data[lead_charge_code_type] = np.where(
                (data[lead_charge_code] < 0), 'Not Specified',
                data[lead_charge_code_type])

    keys = ['unit', 'beat']

    for key in keys:

        fill_val = '000' if key == 'unit' else '0000' if key == 'beat' else 'None'
        data[key] = data[key].fillna(fill_val).astype(str)
        key_data = data[key].unique().tolist()
        key_data.sort()
        for i in key_data:
            kb.store(f'isA({i},{key})')

    unit_nodes = data[['unit', 'beat']].drop_duplicates()
    unit_nodes = list(zip(unit_nodes['unit'], unit_nodes['beat']))

    for (unit, beat) in unit_nodes:
        kb.store(f'assignedTo({beat}, {unit})')
Ejemplo n.º 2
0
There's no pressing need to do this using Zincbase -- it's just a demo.
"""

import random

import cv2
import numpy as np
from PIL import Image
import torch

from zincbase import KB

MAX_Y = 100
MAX_X = 100

kb = KB()      

for y in range(MAX_Y):
    for x in range(MAX_X):
        state = torch.bernoulli(torch.tensor([0.4])).int().item()
        kb.store(f'cell({(y * MAX_X) + x})', node_attributes=[{'x': x, 'y': y, 'state': state}])
        node = kb.node((y * MAX_X) + x)

for y in range(MAX_Y):
    for x in range(MAX_X):
        neighbors = ((-1, -1), (-1, 0), (-1, 1),
                     (0, -1), (0, 1),
                     (1, -1), (1, 0), (1, 1))
        for neighbor in neighbors:
            y_n = y + neighbor[0]
            x_n = x + neighbor[1]
Ejemplo n.º 3
0
import context
from zincbase import KB

kb = KB()
kb.store('a(b,c)')
kb.node('b')['is_letter'] = 1.0
assert kb.node('b').attrs == {'is_letter': 1.0}
assert 'is_letter' in kb.node('b')
kb.node('b')['is_letter'] = 2.0
del kb.node('b')['is_letter']
assert 'is_letter' not in kb.node('b').attrs
kb.node('b')['is_letter'] = 2.0
assert kb.node('b').attrs == {'is_letter': 2.0}
kb.edge('b', 'a', 'c').both_alpha = 1.0
assert kb.edge('b', 'a', 'c').attrs == {'both_alpha': 1.0}
kb.edge('b', 'a', 'c')['both_alpha'] = 2.0
assert kb.edge('b', 'a', 'c').attrs == {'both_alpha': 2.0}
assert kb.to_triples() == [('b', 'a', 'c')]
triples = kb.to_triples(data=True)
assert triples == [('b', 'a', 'c', {
    'is_letter': 2.0
}, {
    'both_alpha': 2.0
}, {}, False)]
kb.node('c').is_letter = 0.9
triples = kb.to_triples(data=True)
assert triples == [('b', 'a', 'c', {
    'is_letter': 2.0
}, {
    'both_alpha': 2.0
}, {
Ejemplo n.º 4
0
import context

from zincbase import KB
kb = KB()

kb.store('bought_ticket(tom)')

rule_num = kb.store('winner(X) :- bought_ticket(X), had_correct_numbers(X)')
assert str(kb.rule('winner(X)')) == 'winner(X)'
assert kb.rule(rule_num) == kb.rule('winner(X)')

assert list(kb.query('winner(X)')) == []
fake_lottery_win = kb.store('had_correct_numbers(tom)')
assert list(kb.query('winner(X)')) == [{'X': 'tom'}]
kb.delete_rule(fake_lottery_win)
assert list(kb.query('winner(X)')) == []

tom = kb.node('tom')

possible_winner_called = 0


def possible_winner(me, affected_nodes, node_that_changed, attr_changed,
                    cur_val, prev_val):
    global possible_winner_called
    if cur_val != 6:
        possible_winner_called += 1
        return False


kb.rule(rule_num).on_change = possible_winner
Ejemplo n.º 5
0
"""Test negative examples using Countries.

The main idea here is that if we explicitly enter some false facts (signalling
to the KB that they are false, it should make less-wrong predictions
for them, versus just going by its own synthetic negative examples.)

It may have the side effect of pushing UP the probability of other wrong
triples, see e.g. "canada in asia" below.
"""
import context

from zincbase import KB

kb = KB()
kb.seed(555)

kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t')

rule_num = kb.store('~locatedin(canada, africa)')

b = list(kb.query('locatedin(canada, X)'))
assert len(b) == 1
assert b[0]['X'] == 'northern_america'
assert kb.delete_rule(rule_num)

kb.build_kg_model(cuda=False, embedding_size=100)

kb.train_kg_model(steps=500, batch_size=512, neg_ratio=0.01)

canada_in_africa_naive = kb.estimate_triple_prob('canada', 'locatedin',
                                                 'africa')
Ejemplo n.º 6
0
size (MAX_Y & MAX_X in the code below), that's probably going to be too much.
"""

import random
import sys
import textwrap
import time

import cv2
import numpy as np
from PIL import Image
import torch

from zincbase import KB

kb = KB()

MAX_Y = 100
MAX_X = 100

try:
    recursion_limit = int(sys.argv[1])
except IndexError:
    print(
        textwrap.dedent(
            """\nSetting recursion limit to 15. This is cautious and slow. \
Specify a first arg to change it, e.g. `python3 abelian_sandpile.py 10000 5000`"""
        ))
    recursion_limit = 15
try:
    propagation_limit = int(sys.argv[2])
Ejemplo n.º 7
0
"""Test the combination of edge attributes and negative examples."""

import context

from zincbase import KB

kb = KB()
kb.seed(555)

kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t')

# specifying both a ~ and a truthiness < 0 is probably unnecessary.
kb.store('~locatedin(canada, africa)', {'truthiness': -1.})

kb.build_kg_model(cuda=False,
                  embedding_size=100,
                  pred_attributes=['truthiness'])

kb.train_kg_model(steps=1000, batch_size=4, neg_ratio=0.01)

canada_in_africa = kb.estimate_triple_prob('canada', 'locatedin', 'africa')
canada_in_asia = kb.estimate_triple_prob('canada', 'locatedin', 'asia')
canada_in_america = kb.estimate_triple_prob('canada', 'locatedin',
                                            'northern_america')
assert 2 * canada_in_africa < canada_in_asia
assert canada_in_america > 2 * canada_in_asia

clafrica_truthiness = kb.estimate_triple_prob_with_attrs(
    'canada', 'locatedin', 'africa', 'truthiness')
clasia_truthiness = kb.estimate_triple_prob_with_attrs('canada', 'locatedin',
                                                       'asia', 'truthiness')
Ejemplo n.º 8
0
import types

import context
from zincbase import KB

kb = KB()
b = kb.store('c(x)')
assert b == 0
b = kb.query('c(X)')
assert isinstance(b, types.GeneratorType)
b = list(b)
assert len(b) == 1
assert b[0]['X'] == 'x'
assert kb.node(b[0]['X']) == {}
b = kb.store('c(y)')
assert b == 1
b = kb.delete_rule(1)
assert b
assert not list(kb.query('c(y)'))
b = kb.store('c(y)')
assert b == 1
b = kb.query('c(X)')
b = list(b)
assert len(b) == 2
assert b[0]['X'] in ('x', 'y')
assert b[1]['X'] in ('x', 'y')
assert b[0]['X'] != b[1]['X']
assert kb.node(b[0]['X']) == {}
assert kb.node(b[1]['X']) == {}
b = kb.store('loves(tom, shamala)')
assert 'tom' in kb.G
Ejemplo n.º 9
0
import context

from zincbase import KB

kb = KB()
kb.store('a(b,c)')
kb.attr('b', {'is_letter': 1.0})
assert kb.node('b') == {'is_letter': 1.0}
kb.edge_attr('b', 'a', 'c', {'both_alpha': 1.0})
assert kb.edge('b', 'a', 'c') == {'both_alpha': 1.0}
assert kb.to_triples() == [('b', 'a', 'c')]
triples = kb.to_triples(data=True)
assert triples == [('b', 'a', 'c', {'is_letter': 1.0}, {'both_alpha': 1.0}, {}, False)]
kb.attr('c', {'is_letter': 0.9})
triples = kb.to_triples(data=True)
assert triples == [('b', 'a', 'c', {'is_letter': 1.0}, {'both_alpha': 1.0}, {'is_letter': 0.9}, False)]
neg_rule_idx = kb.store('~a(b,c)')
triples = kb.to_triples(data=True)
assert triples == [('b', 'a', 'c', {'is_letter': 1.0}, {'both_alpha': 1.0}, {'is_letter': 0.9}, True)]
kb.delete_rule(neg_rule_idx)
triples = kb.to_triples(data=True)
assert triples == [('b', 'a', 'c', {'is_letter': 1.0}, {'both_alpha': 1.0}, {'is_letter': 0.9}, False)]
kb.edge_attr('b', 'a', 'c', {'truthiness':-1})
triples = kb.to_triples(data=True)
assert triples == [('b', 'a', 'c', {'is_letter': 1.0}, {'both_alpha': 1.0, 'truthiness': -1}, {'is_letter': 0.9}, True)]
kb.delete_edge_attr('b', 'a', 'c', ['truthiness'])
triples = kb.to_triples(data=True)
assert triples == [('b', 'a', 'c', {'is_letter': 1.0}, {'both_alpha': 1.0}, {'is_letter': 0.9}, False)]
print('All graph tests passed.')
Ejemplo n.º 10
0
import context

from zincbase import KB
kb = KB()

kb.store('connected(node1, node2)')

node1 = kb.node('node1')

was_called = False
def hello_neighbor(new_neighbor):
    global was_called
    was_called = True
    assert new_neighbor == 'node3'

node1.watch_for_new_neighbor(hello_neighbor)

kb.store('connected(node1, node3)')
assert was_called

node1.grains = 0
def watch_fn(node, prev_val):
    for n, predicate in node.neighbors:
        kb.node(n).grains += 1

node1_watch = node1.watch('grains', watch_fn)
node1 = kb.node('node1')
assert 'grains' in node1._watches

kb.store('connected(node3, node4)')
node2, node3, node4 = kb.node('node2'), kb.node('node3'), kb.node('node4')
Ejemplo n.º 11
0
import context

from zincbase import KB
kb = KB()

kb.store('connected(node1, node2)')

node1 = kb.node('node1')

was_called = False
def hello_neighbor(new_neighbor):
    global was_called
    was_called = True
    assert new_neighbor == 'node3'

node1.watch_for_new_neighbor(hello_neighbor)

kb.store('connected(node1, node3)')
assert was_called

node1.grains = 0
def watch_fn(node, prev_val):
    for n, predicate in node.neighbors:
        kb.node(n).grains += 1

node1_watch = node1.watch('grains', watch_fn)
node1 = kb.node('node1')
assert 'grains' in node1._watches

kb.store('connected(node3, node4)')
node2, node3, node4 = kb.node('node2'), kb.node('node3'), kb.node('node4')
Ejemplo n.º 12
0
import context

from zincbase import KB
kb = KB()

kb.store('connected(node1, node2)')

node1 = kb.node('node1')

was_called = False
def hello_neighbor(new_neighbor):
    global was_called
    was_called = True
    assert new_neighbor == 'node3'

node1.watch_for_new_neighbor(hello_neighbor)

kb.store('connected(node1, node3)')
assert was_called

node1.grains = 0
def watch_fn(node, prev_val):
    for n, predicate in node.neighbors:
        kb.node(n).grains += 1

node1_watch = node1.watch('grains', watch_fn)
node1 = kb.node('node1')
assert 'grains' in node1._watches

kb.store('connected(node3, node4)')
node2, node3, node4 = kb.node('node2'), kb.node('node3'), kb.node('node4')
Ejemplo n.º 13
0
import context

from zincbase import KB

kb = KB()
kb.seed(555)

kb.store('person(tom)')
kb.store('person(shamala)')
kb.store('knows(tom, shamala)')
assert kb.neighbors('tom') == [('shamala', [{'pred': 'knows'}])]

kb.node('tom')['grains'] = 0

tom = kb.node('tom')
assert tom.grains == 0
assert tom.i_dont_exist is None
assert tom['i_dont_exist'] is None

kb.node('shamala').grains = 4
shamala = kb.node('shamala')
assert 'grains' in shamala
assert 'grains' in shamala.attrs
assert shamala.grains == 4
shamala.grains += 1
assert shamala.grains == 5
assert shamala['grains'] == 5
shamala['grains'] += 1
assert shamala['grains'] == 6

kb.store('person(jeraca)')
Ejemplo n.º 14
0
"""This example shows a simple graph with updating attributes.
Usage: run `python -m zincbase.web` & `python basic.py`
Open your web browser to localhost:5000
"""

import random
import time

import torch

from zincbase import KB
from zincbase.web import GraphCaster

kb = KB()
g = GraphCaster()
g.reset()

kb.store('node(1)', node_attributes=[{'color': 0x00ff00}])
kb.store('node(2)', node_attributes=[{'color': 0x0000ff}])
kb.store('node(3)', node_attributes=[{'color': 0xff00ff}])
kb.store('node(4)', node_attributes=[{'color': 0xffee11}])
kb.store('edge(1, 2)', edge_attributes={'edge_attr': 1})
kb.store('edge(2, 3)', edge_attributes={'edge_attr': 2})
kb.store('edge(2, 4)', edge_attributes={'edge_attr': 3})

g.from_kb(kb)
g.render(node_color='node => node.color',
         arrow_size=2,
         node_opacity=1,
         node_label='color',
         label_node=True,
Ejemplo n.º 15
0
import context

from zincbase import KB

kb = KB()
kb.seed(555)

kb.store('person(tom)')
kb.store('person(shamala)')
kb.store('knows(tom, shamala)')
assert kb.neighbors('tom') == [('shamala', [{'pred': 'knows'}])]

kb.attr('tom', {'grains': 0})

tom = kb.node('tom')
assert tom.grains == 0
assert tom.i_dont_exist is None
assert tom['i_dont_exist'] is None

kb.attr('shamala', {'grains': 4})
shamala = kb.node('shamala')
assert shamala.grains == 4
shamala.grains += 1
assert shamala.grains == 5
assert shamala['grains'] == 5
shamala['grains'] += 1
assert shamala['grains'] == 6

kb.store('person(jeraca)')
kb.attr('jeraca', {'grains': 3})
Ejemplo n.º 16
0
# Neural Network tests

# Possible that tests could flake given stochasticity of NN
# but they are fairly relaxed.

import context
from zincbase import KB

kb = KB()
kb.seed(555)

people = ['john', 'oleg', 'tom', 'vedant', 'akshay', 'todd']
for person in people:
    kb.store('works_at({}, primer)'.format(person))
other_people = ['other1', 'other2', 'other3', 'other4', 'other5', 'other6']
for person in other_people:
    kb.store('works_at({}, zillow)'.format(person))
kb.store('based_in(primer, bay_area)')
kb.store('based_in(zillow, seattle)')

for person in people:
    kb.store('lives_in({}, bay_area)'.format(person))

for person in other_people:
    kb.store('lives_in({}, seattle)'.format(person))

kb.store('knows(tom, john)')
kb.store('knows(tom, oleg)')
kb.store('knows(akshay, john)')
kb.store('knows(tom, todd)')
kb.store('knows(vedant, akshay)')
Ejemplo n.º 17
0
"""Demo operations on the `countries` dataset

TODO: It would be interesting to put lat, lng as attributes
on the entities

"""

from zincbase import KB

kb = KB()

kb.from_csv('./assets/countries_s1_train.csv', delimiter='\t')

print(list(kb.query('locatedin(X, northern_europe)')))
# prints [{'X': 'norway'}, {'X': 'iceland'}, {'X': 'faroe_islands'}, ...]

print(list(kb.query('neighbor(austria, X)')))
# prints [{'X': 'italy'}, {'X': 'czechia'}, {'X': 'slovenia'}, ...]

kb.build_kg_model(cuda=True, embedding_size=100)

kb.train_kg_model(steps=1000, batch_size=512)  # takes < 1 minute

print(kb.estimate_triple_prob('mali', 'locatedin', 'africa'))
# prints a number close to 1

print(kb.get_most_likely('singapore', 'locatedin', '?', k=2))
# prints [{'prob': 0.9672, 'triple': ('singapore', 'locatedin', 'south_eastern_asia')}, ...]

print(kb.get_most_likely('austria', 'neighbor', '?', k=8))
# prints [{'prob': 0.9749, 'triple': ('austria', 'neighbor', 'liechtenstein')} ...]
Ejemplo n.º 18
0
# Example: Using Zincbase to create a Rules Engine
# Our clothing shop is going to run itself.

from zincbase import KB

kb = KB()

# Our shop sells 2 SKUs, a tshirt, and jeans. Each has some stock.

kb.store('sku(tshirt)', node_attributes=[{'inventory': 10}])
kb.store('sku(jeans)', node_attributes=[{'inventory': 3}])

# Customers can buy them individually or as an outfit.

kb.store('top(tshirt)')
kb.store('bottom(jeans)')
rule_num = kb.store('outfit(X, Y) :- sku(X), sku(Y), top(X), bottom(Y)')

# grab the stored nodes and rule for later use

tshirt = kb.node('tshirt')
jeans = kb.node('jeans')
outfit = kb.rule(rule_num)

# Set the initial stock level of outfits

outfit.inventory = min(tshirt.inventory, jeans.inventory)


# Print our initial stock levels
def print_stock():
Ejemplo n.º 19
0
from zincbase import KB
from zincbase.utils.data_science import calc_mrr

kb = KB()

kb.from_csv('./assets/fb15k_train_mod.txt', delimiter='\t')

kb.build_kg_model(cuda=True, embedding_size=1000, gamma=24)

kb.train_kg_model(steps=20000, batch_size=2048, neg_to_pos=128)

mrr = calc_mrr(kb, './assets/fb15k_test_mod.txt', delimiter='\t') # add optional `size` kwarg since eval is currently slow.

print(mrr) # should be ~0.797 to match the paper.
Ejemplo n.º 20
0
import context

from zincbase import KB

kb = KB()

kb.store('append([ ], List, List)')
kb.store(
    'append([Head | Tail], List, [Head | Result]) :- append(Tail, List, Result)'
)
b = kb.query('append([a, b], [c, d], X)')
b = list(b)
assert len(b) == 1
assert b[0]['X'] == '[a,b,c,d]'
b = kb.query('append([a, b], X, [a, b, c, d])')
b = list(b)
assert len(b) == 1
assert b[0]['X'] == '[c,d]'
b = kb.query('append(X, Y, [a, b, c, d])')
b = list(b)
assert len(b) == 5
assert b[0]['X'] == '[]'
assert b[0]['Y'] == '[a,b,c,d]'
assert b[1]['X'] == '[a]'
assert b[1]['Y'] == '[b,c,d]'
assert b[2]['X'] == '[a,b]'
assert b[2]['Y'] == '[c,d]'
assert b[3]['X'] == '[a,b,c]'
assert b[3]['Y'] == '[d]'
assert b[4]['X'] == '[a,b,c,d]'
assert b[4]['Y'] == '[]'
Ejemplo n.º 21
0
"""Runs evaluation on the Countries S3 dataset
to reproduce the results from the RotatE paper.

Follows the paper's authors' methodology for calculating the AUC PR.
"""

import csv

import numpy as np
from sklearn.metrics import average_precision_score
import torch

from zincbase import KB
from zincbase.utils.data_science import calc_auc_roc

kb = KB()

Xs = []
Ys = []
csvfile = csv.reader(open('./assets/countries_s3_test.csv', 'r'),
                     delimiter='\t')
for row in csvfile:
    Xs.append([row[0], row[1]])
    Ys.append(row[2])

kb.from_csv('./assets/countries_s3_train.csv', delimiter='\t')

kb.build_kg_model(cuda=True, embedding_size=1000, gamma=0.1)
kb.train_kg_model(steps=40000, batch_size=512, lr=0.000002, neg_to_pos=64)

y_true = []
Ejemplo n.º 22
0
# Neural Network tests

# Possible that tests could flake given stochasticity of NN
# but they are fairly relaxed.

import context
from zincbase import KB

kb = KB()
kb.seed(555)

people = ['john', 'oleg', 'tom', 'vedant', 'akshay', 'todd']
for person in people:
    kb.store('works_at({}, primer)'.format(person))
other_people = ['other1', 'other2', 'other3', 'other4', 'other5', 'other6']
for person in other_people:
    kb.store('works_at({}, zillow)'.format(person))
kb.store('based_in(primer, bay_area)')
kb.store('based_in(zillow, seattle)')

for person in people:
    kb.store('lives_in({}, bay_area)'.format(person))

for person in other_people:
    kb.store('lives_in({}, seattle)'.format(person))

kb.store('knows(tom, john)'); kb.store('knows(tom, oleg)'); kb.store('knows(akshay, john)'); kb.store('knows(tom, todd)'); kb.store('knows(vedant, akshay)')
kb.store('knows(other2, other1)'); kb.store('knows(other6, other5)'); kb.store('knows(other1, other2)'); kb.store('knows(other4, other3)'); kb.store('knows(other3, other2)')
kb.store('knows(tom, other4)')
kb.store('lives_in(shamala, bay_area)'); kb.store('lives_in(mary, seattle)')
kb.store('associated_with(zillow, amazon)'); kb.store('associated_with(primer, google)')