from magellan.matcherselection._mlmatcherselection import select_matcher_test from magellan.matcherselection.mlmatcherselection import select_matcher import magellan as mg mg.init_jvm() A = mg.load_dataset('table_A') B = mg.load_dataset('table_B') ab = mg.AttrEquivalenceBlocker() C = ab.block_tables(A, B, 'zipcode', 'zipcode', ['name'], ['name']) # L = mg.read_csv('../../magellan/testcases/debug-tests/label_ab_correct_books.csv', ltable=A, rtable=B) L = mg.label_table(C, 'gold') L.to_csv('mur_labels') F = mg.get_features_for_matching(A, B) G = mg.extract_feature_vecs(L, feature_table=F, attrs_after='gold') dt = mg.DTMatcher() select_matcher_test(dt, table=G, exclude_attrs=['_id', 'ltable.ID', 'rtable.ID', 'gold'], target_attr='gold', random_state=0)
F = mg.combine_block_outputs_via_union([C, D]) # In[20]: # display F F # In[21]: # sample candidate set F S = mg.sample_table(F, 13) # In[22]: # label candidate set and name the label column as gold_label L = mg.label_table(S, 'gold_label') # In[ ]: # In[24]: # get features automatically (internally it computes types, attr_corres, sim functions, tokenizers ) feat_table = mg.get_features_for_blocking(A, B) # In[25]: # display feature table feat_table # In[26]:
# In[20]: # display F F # In[21]: # sample candidate set F S = mg.sample_table(F, 13) # In[22]: # label candidate set and name the label column as gold_label L = mg.label_table(S, 'gold_label') # In[ ]: # In[24]: # get features automatically (internally it computes types, attr_corres, sim functions, tokenizers ) feat_table = mg.get_features_for_blocking(A, B) # In[25]:
import magellan as mg import pandas as pd import os from PyQt4 import QtCore datasets_path = os.sep.join([mg.get_install_path(), 'datasets', 'test_datasets']) path_a = os.sep.join([datasets_path, 'A.csv']) path_b = os.sep.join([datasets_path, 'B.csv']) path_c = os.sep.join([datasets_path, 'C.csv']) A = mg.read_csv_metadata(path_a) B = mg.read_csv_metadata(path_b, key='ID') C = mg.read_csv_metadata(path_c, ltable=A, rtable=B) D = mg.label_table(C, 'label') print(D) # timer = QtCore.QTimer() # timer.setInterval(2000) # 2 seconds # mg._viewapp.loadFinished.connect(timer.start) # timer.timeout.connect(mg._viewapp.quit)