def setup(self):
     p = mg.get_install_path()
     path_for_A = os.sep.join(
         [p, 'datasets', 'example_datasets', 'bikes', 'A.csv'])
     path_for_B = os.sep.join(
         [p, 'datasets', 'example_datasets', 'bikes', 'B.csv'])
     l_key = 'id'
     r_key = 'id'
     self.A = mg.read_csv_metadata(path_for_A)
     mg.set_key(self.A, l_key)
     self.B = mg.read_csv_metadata(path_for_B)
     mg.set_key(self.B, r_key)
     l_block_attr_1 = 'city_posted'
     r_block_attr_1 = 'city_posted'
     l_output_attrs = [
         'bike_name', 'city_posted', 'km_driven', 'price', 'color',
         'model_year'
     ]
     r_output_attrs = [
         'bike_name', 'city_posted', 'km_driven', 'price', 'color',
         'model_year'
     ]
     self.ab = mg.AttrEquivalenceBlocker()
     self.C = self.ab.block_tables(self.A,
                                   self.B,
                                   l_block_attr_1,
                                   r_block_attr_1,
                                   l_output_attrs,
                                   r_output_attrs,
                                   verbose=False)
     self.l_block_attr = 'model_year'
     self.r_block_attr = 'model_year'
Esempio n. 2
0
import sys
import magellan as mg
from magellan.gui.debug_gui_base import vis_debug_dt

sys.path.append('/Users/Pradap/Documents/Research/Python-Package/enrique/')
mg.init_jvm()

A = mg.load_dataset('table_A')
B = mg.load_dataset('table_B')
ab = mg.AttrEquivalenceBlocker()
C = ab.block_tables(A, B, 'zipcode', 'zipcode', ['name'], ['name'])
L = mg.read_csv('label_demo.csv', ltable=A, rtable=B)
feat_table = mg.get_features_for_matching(A, B)
G = mg.extract_feature_vecs(L, feature_table=feat_table, attrs_after='gold')
S = mg.train_test_split(G, 8, 7)
dt = mg.DTMatcher(name='DecisionTree')
dt.fit(table=S['train'],
       exclude_attrs=['_id', 'ltable.ID', 'rtable.ID', 'gold'],
       target_attr='gold')
dt.predict(table=S['test'],
           exclude_attrs=['_id', 'ltable.ID', 'rtable.ID', 'gold'],
           target_attr='predicted',
           append=True)
d = mg.eval_matches(S['test'], 'gold', 'predicted')

vis_debug_dt(dt,
             d,
             S['test'],
             exclude_attrs=['_id', 'ltable.ID', 'rtable.ID', 'gold'],
             feat_table=feat_table)
print "Hi"
Esempio n. 3
0
 def setUp(self):
     self.A = mg.read_csv_metadata(path_for_A)
     mg.set_key(self.A, l_key)
     self.B = mg.read_csv_metadata(path_for_B)
     mg.set_key(self.B, r_key)
     self.ab = mg.AttrEquivalenceBlocker()