예제 #1
0
A = mg.load_dataset('table_A')
B = mg.load_dataset('table_B')
ab = mg.AttrEquivalenceBlocker()
C = ab.block_tables(A, B, 'zipcode', 'zipcode', ['name'], ['name'])
mg.init_jvm()
#L = mg.label_table(C, 'gold')
#L.to_csv('label.csv')
L = mg.read_csv('label.csv', ltable=A, rtable=B)
feature_table = mg.get_features_for_matching(A, B)
G = mg.extract_feature_vecs(L, feature_table=feature_table, attrs_after='gold')
m = mg.LinRegMatcher()
t = cv_matcher_and_trigger(
    m,
    None,
    table=G,
    exclude_attrs=['_id', 'ltable.ID', 'rtable.ID', 'gold'],
    target_attr='gold',
    k=5,
    metric='precision',
    random_state=0)

res = mg.select_matcher(
    [m],
    table=G,
    exclude_attrs=['_id', 'ltable.ID', 'rtable.ID', 'gold'],
    target_attr='gold',
    k=5,
    metric='f1',
    random_state=0)
print res['cv_stats']
예제 #2
0
파일: workflow.py 프로젝트: Yashg19/enrique
s_prime


# In[35]:

# Fitting/Predicting
# Create a set of matchers
nb = mg.NBMatcher() # naive bayes 
dt = mg.DTMatcher() # decision tree
rf = mg.RFMatcher() # random forest


# In[36]:

# Select a matcher using cross validation
m = mg.select_matcher([nb, dt, rf], x=s_prime[list(feat_table['feature_name'])], y=s_prime['gold_label'], k=5 )


# In[37]:

# see what was selected and the stats
m


# In[38]:

# instead of a single matcher, we can choose ensemble of matchers
mc, stats = mg.selector_matcher_combiner([nb, dt, rf], ['majority'], x=s_prime[list(feat_table['feature_name'])], y=s_prime['gold_label'], k=5)


# In[39]:
예제 #3
0
# display feature vector table
s_prime

# In[35]:

# Fitting/Predicting
# Create a set of matchers
nb = mg.NBMatcher()  # naive bayes
dt = mg.DTMatcher()  # decision tree
rf = mg.RFMatcher()  # random forest

# In[36]:

# Select a matcher using cross validation
m = mg.select_matcher([nb, dt, rf],
                      x=s_prime[list(feat_table['feature_name'])],
                      y=s_prime['gold_label'],
                      k=5)

# In[37]:

# see what was selected and the stats
m

# In[38]:

# instead of a single matcher, we can choose ensemble of matchers
mc, stats = mg.selector_matcher_combiner([nb, dt, rf], ['majority'],
                                         x=s_prime[list(
                                             feat_table['feature_name'])],
                                         y=s_prime['gold_label'],
                                         k=5)