def _debug_decisiontree_matcher(dt, t1, t2, feat_table, fv_columns, exclude_attrs, ensemble_flag=False): if isinstance(dt, DTMatcher): clf = dt.clf else: clf = dt if exclude_attrs is None: feature_names = fv_columns else: cols = [c not in exclude_attrs for c in fv_columns] feature_names = fv_columns[cols] code = get_code(clf, feature_names, ['False', 'True']) feat_vals = apply_feat_fns(t1, t2, feat_table) code = get_dbg_fn(code) d = {} d.update(feat_vals) six.exec_(code, d) ret_val = d['debug_fn']() if ensemble_flag is True: spacer = " " else: spacer = "" if ensemble_flag is True: p = get_prob(clf, t1, t2, feat_table, feature_names) print(spacer + "Prob. for non-match : " + str(p[0])) print(spacer + "Prob for match : " + str(p[1])) return p else: print(spacer + "Match status : " + str(ret_val))
def get_prob(clf, t1, t2, feat_table, feature_names): feat_values = apply_feat_fns(t1, t2, feat_table) feat_values = pd.Series(feat_values) feat_values = feat_values[feature_names] v = feat_values.values v = v.reshape(1, -1) p = clf.predict_proba(v) return p[0]
def get_prob(clf, t1, t2, feat_table, feature_names): feat_values = apply_feat_fns(t1, t2, feat_table) feat_values = pd.Series(feat_values) feat_values = feat_values[feature_names] v = feat_values.values if mg._impute_flag == True: imp = Imputer(missing_values='NaN', strategy='median', axis=0) imp.fit(v) v = imp.transform(v) p = clf.predict_proba(v) return p[0]
def get_prob(clf, t1, t2, feat_table, feature_names): feat_values = apply_feat_fns(t1, t2, feat_table) feat_values = pd.Series(feat_values) feat_values = feat_values[feature_names] v = feat_values.values if mg._impute_flag == True: imp = Imputer(missing_values='NaN', strategy='median', axis=0) imp.fit(v) v = imp.transform(v) p = clf.predict_proba(v) return p[0]
def debug_decisiontree_matcher(dt, t1, t2, feat_table, fv_columns, exclude_attrs, ensemble_flag=False): if isinstance(dt, DTMatcher): clf = dt.clf else: clf = dt if exclude_attrs is None: feature_names = fv_columns else: cols = [c not in exclude_attrs for c in fv_columns] feature_names = fv_columns[cols] code = get_code(clf, feature_names, ['False', 'True']) feat_vals = apply_feat_fns(t1, t2, feat_table) code = get_dbg_fn(code) # print code d = {} d.update(feat_vals) exec code in d ret_val = d['debug_fn']() if ensemble_flag is True: spacer = " " else: spacer = "" if ensemble_flag is True: p = get_prob(clf, t1, t2, feat_table, feature_names) print spacer + "Prob. for non-match : " + str(p[0]) print spacer + "Prob for match : " + str(p[1]) return p else: print spacer + "Match status : " + str(ret_val)
def get_feature_vector(t1, t2, feat_table): fv = apply_feat_fns(t1, t2, feat_table) return fv
def get_feature_vector(t1, t2, feat_table): fv = apply_feat_fns(t1, t2, feat_table) return fv