from magellan.matcherselection._mlmatcherselection import select_matcher_test
from magellan.matcherselection.mlmatcherselection import select_matcher

import magellan as mg
mg.init_jvm()

A = mg.load_dataset('table_A')
B = mg.load_dataset('table_B')
ab = mg.AttrEquivalenceBlocker()
C = ab.block_tables(A, B, 'zipcode', 'zipcode', ['name'], ['name'])
# L = mg.read_csv('../../magellan/testcases/debug-tests/label_ab_correct_books.csv', ltable=A, rtable=B)
L = mg.label_table(C, 'gold')
L.to_csv('mur_labels')
F = mg.get_features_for_matching(A, B)
G = mg.extract_feature_vecs(L, feature_table=F, attrs_after='gold')

dt = mg.DTMatcher()

select_matcher_test(dt, table=G, exclude_attrs=['_id', 'ltable.ID', 'rtable.ID', 'gold'], target_attr='gold',
                    random_state=0)


Exemple #2
0
F = mg.combine_block_outputs_via_union([C, D])

# In[20]:

# display F
F

# In[21]:

# sample candidate set F
S = mg.sample_table(F, 13)

# In[22]:

# label candidate set and name the label column as gold_label
L = mg.label_table(S, 'gold_label')

# In[ ]:

# In[24]:

# get features automatically (internally it computes types, attr_corres, sim functions, tokenizers )
feat_table = mg.get_features_for_blocking(A, B)

# In[25]:

# display feature table
feat_table

# In[26]:
Exemple #3
0
# In[20]:

# display F
F


# In[21]:

# sample candidate set F
S = mg.sample_table(F, 13)


# In[22]:

# label candidate set and name the label column as gold_label
L = mg.label_table(S, 'gold_label')


# In[ ]:




# In[24]:

# get features automatically (internally it computes types, attr_corres, sim functions, tokenizers )
feat_table = mg.get_features_for_blocking(A, B)


# In[25]:
Exemple #4
0
import magellan as mg
import pandas as pd
import os
from PyQt4 import QtCore
datasets_path = os.sep.join([mg.get_install_path(), 'datasets', 'test_datasets'])


path_a = os.sep.join([datasets_path, 'A.csv'])
path_b = os.sep.join([datasets_path, 'B.csv'])
path_c = os.sep.join([datasets_path, 'C.csv'])

A = mg.read_csv_metadata(path_a)
B = mg.read_csv_metadata(path_b, key='ID')
C = mg.read_csv_metadata(path_c, ltable=A, rtable=B)

D = mg.label_table(C, 'label')

print(D)
# timer = QtCore.QTimer()
# timer.setInterval(2000) # 2 seconds
# mg._viewapp.loadFinished.connect(timer.start)
# timer.timeout.connect(mg._viewapp.quit)