def vis_debug_rf(matcher, train, test, exclude_attrs, target_attr): """ Visual debugger for random forest matcher Parameters ---------- matcher : object, RFMatcher object train : MTable, containing training data with "True" labels test : MTable, containing test data with "True labels. The "True" labels are used for evaluation. exclude_attrs : List, attributes to be excluded from train and test, for training and testing. target_attr : String, column name in validation_set containing 'True' labels """ assert set(test.columns) == set(train.columns), "The train and test columns are not same" assert set(train.columns).intersection(exclude_attrs) == set(exclude_attrs), "Some of exclude attrs are not part of" \ "train columns" # redundant assert set(test.columns).intersection(exclude_attrs) == set(exclude_attrs), "Some of exclude attrs are not part of" \ "test columns" # fit using training data matcher.fit(table=train, exclude_attrs=exclude_attrs, target_attr=target_attr) predict_attr_name = get_name_for_predict_column(test.columns) predicted = matcher.predict(table=test, exclude_attrs=exclude_attrs, target_attr=predict_attr_name, append=True, inplace=False) eval_summary = mg.eval_matches(predicted, target_attr, predict_attr_name) metric = get_metric(eval_summary) fp_dataframe = get_dataframe(predicted, eval_summary['false_pos_ls']) fn_dataframe = get_dataframe(predicted, eval_summary['false_neg_ls']) app = mg._viewapp m = MainWindowManager(matcher, "rf", exclude_attrs, metric, predicted, fp_dataframe, fn_dataframe) m.show() app.exec_()
def _vis_debug_dt(matcher, train, test, exclude_attrs, target_attr, show_window=True): if not isinstance(matcher, DTMatcher): logger.error('Input matcher is not of type Decision Tree matcher') raise AssertionError('Input matcher is not of type Decision Tree matcher') if not isinstance(target_attr, six.string_types): logger.error('Target attribute is not of type string') raise AssertionError('Target attribute is not of type string') if not check_attrs_present(train, exclude_attrs): logger.error('The exclude attrs are not in train table columns') raise AssertionError('The exclude attrs are not in the train table columns') if not check_attrs_present(train, target_attr): logger.error('The target attr is not in train table columns') raise AssertionError('The target attr is not in the train table columns') if not check_attrs_present(test, exclude_attrs): logger.error('The exclude attrs are not in test table columns') raise AssertionError('The exclude attrs are not in the test table columns') if not isinstance(exclude_attrs, list): exclude_attrs = [exclude_attrs] exclude_attrs = list_drop_duplicates(exclude_attrs) if target_attr not in exclude_attrs: exclude_attrs.append(target_attr) # fit using training data matcher.fit(table=train, exclude_attrs=exclude_attrs, target_attr=target_attr) predict_attr_name = get_name_for_predict_column(test.columns) # predict using the test data predicted = matcher.predict(table=test, exclude_attrs=exclude_attrs, target_attr=predict_attr_name, append=True, inplace=False) eval_summary = mg.eval_matches(predicted, target_attr, predict_attr_name) metric = get_metric(eval_summary) fp_dataframe = get_dataframe(predicted, eval_summary['false_pos_ls']) fn_dataframe = get_dataframe(predicted, eval_summary['false_neg_ls']) app = mg._viewapp m = MainWindowManager(matcher, "dt", exclude_attrs, metric, predicted, fp_dataframe, fn_dataframe) if show_window == True: m.show() app.exec_()
def vis_debug_dt(matcher, train, test, exclude_attrs, target_attr): """ Visual debugger for decision tree matcher Parameters ---------- matcher : object, DTMatcher object train : MTable, containing training data with "True" labels test : MTable, containing test data with "True labels. The "True" labels are used for evaluation. exclude_attrs : List, attributes to be excluded from train and test, for training and testing. target_attr : String, column name in validation_set containing 'True' labels """ assert set(test.columns) == set( train.columns), "The train and test columns are not same" assert set(train.columns).intersection(exclude_attrs) == set(exclude_attrs), "Some of exclude attrs are not part of" \ "train columns" # redundant assert set(test.columns).intersection(exclude_attrs) == set(exclude_attrs), "Some of exclude attrs are not part of" \ "test columns" # fit using training data matcher.fit(table=train, exclude_attrs=exclude_attrs, target_attr=target_attr) predict_attr_name = get_name_for_predict_column(test.columns) predicted = matcher.predict(table=test, exclude_attrs=exclude_attrs, target_attr=predict_attr_name, append=True, inplace=False) # print predicted eval_summary = mg.eval_matches(predicted, target_attr, predict_attr_name) # print eval_summary metric = get_metric(eval_summary) fp_dataframe = get_dataframe(predicted, eval_summary['false_pos_ls']) # print fp_dataframe.dtypes fn_dataframe = get_dataframe(predicted, eval_summary['false_neg_ls']) app = mg._viewapp m = MainWindowManager(matcher, "dt", exclude_attrs, metric, predicted, fp_dataframe, fn_dataframe) m.show() app.exec_()
def vis_debug_rm(matcher, validation_set, target_attr, feature_table): """ Visual debugger for boolean rule-based matcher Parameters ---------- matcher : object, Boolean rule-based matcher object validation_set : MTable, used to debug target_attr : String, column name in validation_set containing 'True' labels feature_table : pandas dataframe, containing feature information """ predict_attr_name = get_name_for_predict_column(validation_set.columns) predicted = matcher.predict(validation_set, predict_attr_name, append=True, inplace=False) eval_summary = mg.eval_matches(predicted, target_attr, predict_attr_name) metric = get_metric(eval_summary) fp_dataframe = get_dataframe(predicted, eval_summary['false_pos_ls']) fn_dataframe = get_dataframe(predicted, eval_summary['false_neg_ls']) app = mg._viewapp m = MainWindowManager(matcher, "rm", feature_table, metric, predicted, fp_dataframe, fn_dataframe) m.show() app.exec_()
def _vis_debug_dt(matcher, train, test, exclude_attrs, target_attr, show_window=True): if not isinstance(matcher, DTMatcher): logger.error('Input matcher is not of type Decision Tree matcher') raise AssertionError( 'Input matcher is not of type Decision Tree matcher') if not isinstance(target_attr, six.string_types): logger.error('Target attribute is not of type string') raise AssertionError('Target attribute is not of type string') if not check_attrs_present(train, exclude_attrs): logger.error('The exclude attrs are not in train table columns') raise AssertionError( 'The exclude attrs are not in the train table columns') if not check_attrs_present(train, target_attr): logger.error('The target attr is not in train table columns') raise AssertionError( 'The target attr is not in the train table columns') if not check_attrs_present(test, exclude_attrs): logger.error('The exclude attrs are not in test table columns') raise AssertionError( 'The exclude attrs are not in the test table columns') if not isinstance(exclude_attrs, list): exclude_attrs = [exclude_attrs] exclude_attrs = list_drop_duplicates(exclude_attrs) if target_attr not in exclude_attrs: exclude_attrs.append(target_attr) # fit using training data matcher.fit(table=train, exclude_attrs=exclude_attrs, target_attr=target_attr) predict_attr_name = get_name_for_predict_column(test.columns) # predict using the test data predicted = matcher.predict(table=test, exclude_attrs=exclude_attrs, target_attr=predict_attr_name, append=True, inplace=False) eval_summary = mg.eval_matches(predicted, target_attr, predict_attr_name) metric = get_metric(eval_summary) fp_dataframe = get_dataframe(predicted, eval_summary['false_pos_ls']) fn_dataframe = get_dataframe(predicted, eval_summary['false_neg_ls']) app = mg._viewapp m = MainWindowManager(matcher, "dt", exclude_attrs, metric, predicted, fp_dataframe, fn_dataframe) if show_window == True: m.show() app.exec_()