def _debug_decisiontree_matcher(dt, t1, t2, feat_table, fv_columns, exclude_attrs,
                                ensemble_flag=False):
    if isinstance(dt, DTMatcher):
        clf = dt.clf
    else:
        clf = dt

    if exclude_attrs is None:
        feature_names = fv_columns
    else:
        cols = [c not in exclude_attrs for c in fv_columns]
        feature_names = fv_columns[cols]

    code = get_code(clf, feature_names, ['False', 'True'])
    feat_vals = apply_feat_fns(t1, t2, feat_table)
    code = get_dbg_fn(code)
    d = {}
    d.update(feat_vals)
    six.exec_(code, d)
    ret_val = d['debug_fn']()
    if ensemble_flag is True:
        spacer = "    "
    else:
        spacer = ""

    if ensemble_flag is True:
        p = get_prob(clf, t1, t2, feat_table, feature_names)
        print(spacer + "Prob. for non-match : " + str(p[0]))
        print(spacer + "Prob for match : " + str(p[1]))
        return p
    else:
        print(spacer + "Match status : " + str(ret_val))
def get_prob(clf, t1, t2, feat_table, feature_names):
    feat_values = apply_feat_fns(t1, t2, feat_table)
    feat_values = pd.Series(feat_values)
    feat_values = feat_values[feature_names]
    v = feat_values.values
    v = v.reshape(1, -1)
    p = clf.predict_proba(v)
    return p[0]
Esempio n. 3
0
def get_prob(clf, t1, t2, feat_table, feature_names):
    feat_values = apply_feat_fns(t1, t2, feat_table)
    feat_values = pd.Series(feat_values)
    feat_values = feat_values[feature_names]
    v = feat_values.values
    if mg._impute_flag == True:
        imp = Imputer(missing_values='NaN', strategy='median', axis=0)
        imp.fit(v)
        v = imp.transform(v)
    p = clf.predict_proba(v)
    return p[0]
def get_prob(clf, t1, t2, feat_table, feature_names):
    feat_values = apply_feat_fns(t1, t2, feat_table)
    feat_values = pd.Series(feat_values)
    feat_values = feat_values[feature_names]
    v = feat_values.values
    if mg._impute_flag == True:
        imp = Imputer(missing_values='NaN', strategy='median', axis=0)
        imp.fit(v)
        v = imp.transform(v)
    p = clf.predict_proba(v)
    return p[0]
Esempio n. 5
0
def debug_decisiontree_matcher(dt,
                               t1,
                               t2,
                               feat_table,
                               fv_columns,
                               exclude_attrs,
                               ensemble_flag=False):
    if isinstance(dt, DTMatcher):
        clf = dt.clf
    else:
        clf = dt

    if exclude_attrs is None:
        feature_names = fv_columns
    else:
        cols = [c not in exclude_attrs for c in fv_columns]
        feature_names = fv_columns[cols]

    code = get_code(clf, feature_names, ['False', 'True'])
    feat_vals = apply_feat_fns(t1, t2, feat_table)
    code = get_dbg_fn(code)
    # print code
    d = {}
    d.update(feat_vals)
    exec code in d
    ret_val = d['debug_fn']()
    if ensemble_flag is True:
        spacer = "    "
    else:
        spacer = ""

    if ensemble_flag is True:
        p = get_prob(clf, t1, t2, feat_table, feature_names)
        print spacer + "Prob. for non-match : " + str(p[0])
        print spacer + "Prob for match : " + str(p[1])
        return p
    else:
        print spacer + "Match status : " + str(ret_val)
def get_feature_vector(t1, t2, feat_table):
    fv = apply_feat_fns(t1, t2, feat_table)
    return fv
Esempio n. 7
0
def get_feature_vector(t1, t2, feat_table):
    fv = apply_feat_fns(t1, t2, feat_table)
    return fv