Esempio n. 1
0
import sys
import magellan as mg
from magellan.gui.debug_gui_base import vis_debug_dt

sys.path.append('/Users/Pradap/Documents/Research/Python-Package/enrique/')
mg.init_jvm()

A = mg.load_dataset('table_A')
B = mg.load_dataset('table_B')
ab = mg.AttrEquivalenceBlocker()
C = ab.block_tables(A, B, 'zipcode', 'zipcode', ['name'], ['name'])
L = mg.read_csv('label_demo.csv', ltable=A, rtable=B)
feat_table = mg.get_features_for_matching(A, B)
G = mg.extract_feature_vecs(L, feature_table=feat_table, attrs_after='gold')
S = mg.train_test_split(G, 8, 7)
dt = mg.DTMatcher(name='DecisionTree')
dt.fit(table = S['train'], exclude_attrs=['_id', 'ltable.ID', 'rtable.ID', 'gold'], target_attr='gold')
dt.predict(table=S['test'], exclude_attrs=['_id', 'ltable.ID', 'rtable.ID', 'gold'], target_attr='predicted',
           append=True)
d = mg.eval_matches(S['test'], 'gold', 'predicted')

vis_debug_dt(dt, d, S['test'], exclude_attrs=['_id', 'ltable.ID', 'rtable.ID', 'gold'], feat_table=feat_table)
print "Hi"
Esempio n. 2
0
import sys
#sys.path.append('/Users/pradap/Documents/Research/Python-Package/enrique')
#sys.path.append('/scratch/pradap/python-work/enrqiue')
import os
import magellan as mg
import jpype
p = mg.get_install_path()
path_for_A = os.sep.join([p, 'datasets', 'table_A.csv'])
path_for_B = os.sep.join([p, 'datasets', 'table_B.csv'])
# mg.init_jvm('/Library/Java/JavaVirtualMachines/jdk1.8.0_45.jdk/Contents/Home/jre/lib/server/libjvm.dylib')
jvm_path = jpype.get_default_jvm_path()
if os.path.isfile(jvm_path):
    mg.init_jvm(jvm_path)
    #mg.init_jvm('/Library/Java/JavaVirtualMachines/jdk1.8.0_45.jdk/Contents/Home/jre/lib/server/libjvm.dylib')
else:
    x = []
    for t in jvm_path.split(os.sep):
        if t == 'client':
            t = 'server'
        elif t == 'server':
            r = 'client'
        x.append(t)
    jp = os.sep.join(x)
    if os.path.isfile(jp):
        mg.init_jvm(jp)
    else:
        jp = raw_input('Give path to jvm library (i.e libjvm.so in linux) : ')
        if os.path.isfile(jp):
            mg.init_jvm(jp)
        else:
            print 'Invalid path; cannot run tests; exiting'
Esempio n. 3
0

# In[8]:

import jpype


# In[9]:

jpype.getDefaultJVMPath()


# In[10]:

# Initialize JVM
mg.init_jvm('C:\\Program Files\\Java\\jre7\\bin\\server\\jvm.dll')


# In[11]:

# import toy datasets
A = mg.load_dataset('table_A')
B = mg.load_dataset('table_B')


# In[12]:

A


# In[13]:
Esempio n. 4
0
import sys
import magellan as mg
from magellan.gui.debug_gui_base import vis_debug_dt

sys.path.append('/Users/Pradap/Documents/Research/Python-Package/enrique/')
mg.init_jvm()

A = mg.load_dataset('table_A')
B = mg.load_dataset('table_B')
ab = mg.AttrEquivalenceBlocker()
C = ab.block_tables(A, B, 'zipcode', 'zipcode', ['name'], ['name'])
L = mg.read_csv('label_demo.csv', ltable=A, rtable=B)
feat_table = mg.get_features_for_matching(A, B)
G = mg.extract_feature_vecs(L, feature_table=feat_table, attrs_after='gold')
S = mg.train_test_split(G, 8, 7)
dt = mg.DTMatcher(name='DecisionTree')
dt.fit(table=S['train'],
       exclude_attrs=['_id', 'ltable.ID', 'rtable.ID', 'gold'],
       target_attr='gold')
dt.predict(table=S['test'],
           exclude_attrs=['_id', 'ltable.ID', 'rtable.ID', 'gold'],
           target_attr='predicted',
           append=True)
d = mg.eval_matches(S['test'], 'gold', 'predicted')

vis_debug_dt(dt,
             d,
             S['test'],
             exclude_attrs=['_id', 'ltable.ID', 'rtable.ID', 'gold'],
             feat_table=feat_table)
print "Hi"
Esempio n. 5
0
# In[7]:

A.head(10)

# In[8]:

import jpype

# In[9]:

jpype.getDefaultJVMPath()

# In[10]:

# Initialize JVM
mg.init_jvm('C:\\Program Files\\Java\\jre7\\bin\\server\\jvm.dll')

# In[11]:

# import toy datasets
A = mg.load_dataset('table_A')
B = mg.load_dataset('table_B')

# In[12]:

A

# In[13]:

# block using zipcode
ab = mg.AttrEquivalenceBlocker()
Esempio n. 6
0
import magellan as mg

A = mg.load_dataset('table_A')
B = mg.load_dataset('table_B')

mg.init_jvm('/Library/Java/JavaVirtualMachines/jdk1.8.0_45.jdk/Contents/Home/jre/lib/server/libjvm.dylib')
from magellan.feature.simfunctions import *
from magellan.feature.tokenizers import *
def block_fn_1(ltuple, rtuple):
    val = jaccard(tok_qgram(ltuple['address'], 3), tok_qgram(rtuple['address'], 3))
    if  val < 0.4:
        return True
    else:
        return False

def block_fn_2(x, y):
    val = lev(x['name'], y['name'])
    if val < 0.5:
        return True
    else:
        return False

bb = mg.BlackBoxBlocker()
bb.set_black_box_function(block_fn_1)
C = bb.block_tables(A, B, l_output_attrs='name', r_output_attrs='name')
print C
bb.set_black_box_function(block_fn_2)
D = bb.block_candset(C)
print D