def print_eval_summary(eval_summary): """ Prints a summary of evaluation results. Args: eval_summary (dictionary): Dictionary containing evaluation results, typically from 'eval_matches' function. """ m = _get_metric(eval_summary) for key, value in six.iteritems(m): print(key + " : " + value)
def print_eval_summary(eval_summary): """ Prints a summary of evaluation results. Args: eval_summary (dictionary): Dictionary containing evaluation results, typically from 'eval_matches' function. Examples: >>> import py_entitymatching as em >>> # G is the labeled data used for development purposes, match_f is the feature table >>> H = em.extract_feat_vecs(G, feat_table=match_f, attrs_after='gold_labels') >>> dt = em.DTMatcher() >>> dt.fit(table=H, exclude_attrs=['_id', 'ltable_id', 'rtable_id', 'gold_labels'], target_attr='gold_labels') >>> pred_table = dt.predict(table=H, exclude_attrs=['_id', 'ltable_id', 'rtable_id', 'gold_labels'], append=True, target_attr='predicted_labels') >>> eval_summary = em.eval_matches(pred_table, 'gold_labels', 'predicted_labels') >>> em.print_eval_summary(eval_summary) """ m = _get_metric(eval_summary) for key, value in six.iteritems(m): print(key + " : " + value)
def _vis_debug_rf(matcher, train, test, exclude_attrs, target_attr, show_window=True): """ Wrapper function for debugging the Random Forest matcher visually. """ try: from PyQt5 import QtWidgets from py_entitymatching.gui.debug_gui_base import MainWindowManager except ImportError: raise ImportError('PyQt5 is not installed. Please install PyQt5 to use ' 'GUI related functions in py_entitymatching.') # Validate the input parameters # # We expect the matcher to be of type RfMatcher if not isinstance(matcher, RFMatcher): logger.error('Input matcher is not of type ' 'Random Forest matcher') raise AssertionError('Input matcher is not of type ' 'Random Forest matcher') # # We expect the target attribute to be of type string. validate_object_type(target_attr, six.string_types, error_prefix='Target attribute') # # Check whether the exclude attributes are indeed present in the train # DataFrame. if not check_attrs_present(train, exclude_attrs): logger.error('The exclude attrs are not in train table columns') raise AssertionError('The exclude attrs are not in the train table columns') # # Check whether the target attribute is indeed present in the train # DataFrame. if not check_attrs_present(train, target_attr): logger.error('The target attr is not in train table columns') raise AssertionError('The target attr is not in the train table columns') # # Check whether the exclude attributes are indeed present in the test # DataFrame. if not check_attrs_present(test, exclude_attrs): logger.error('The exclude attrs are not in test table columns') raise AssertionError('The exclude attrs are not in the test table columns') # The exclude attributes is expected to be of type list, if not # explicitly convert this into a list. if not isinstance(exclude_attrs, list): exclude_attrs = [exclude_attrs] # Drop the duplicates from the exclude attributes exclude_attrs = list_drop_duplicates(exclude_attrs) # If the target attribute is not present in the exclude attributes, # then explicitly add it to the exclude attributes. if target_attr not in exclude_attrs: exclude_attrs.append(target_attr) # Now, fit using training data matcher.fit(table=train, exclude_attrs=exclude_attrs, target_attr=target_attr) # Get a column name to store the predictions. predict_attr_name = get_name_for_predict_column(test.columns) # Predict using the test data predicted = matcher.predict(table=test, exclude_attrs=exclude_attrs, target_attr=predict_attr_name, append=True, inplace=False) # Get the evaluation summary. eval_summary = em.eval_matches(predicted, target_attr, predict_attr_name) em._viewapp = QtWidgets.QApplication.instance() if em._viewapp is None: em._viewapp = QtWidgets.QApplication([]) # Get metric in a form that can be displayed from the evaluation summary metric = _get_metric(eval_summary) # Get false negatives and false positives as a DataFrame fp_dataframe = _get_dataframe(predicted, eval_summary['false_pos_ls']) fn_dataframe = _get_dataframe(predicted, eval_summary['false_neg_ls']) # Get the main window application app = em._viewapp m = MainWindowManager(matcher, "rf", exclude_attrs, metric, predicted, fp_dataframe, fn_dataframe) # If the show window is true, then display the window. if show_window: m.show() app.exec_()
def _vis_debug_dt(matcher, train, test, exclude_attrs, target_attr, show_window=True): """ Wrapper function for debugging the Random Forest matcher visually. """ try: from PyQt5 import QtWidgets from py_entitymatching.gui.debug_gui_base import MainWindowManager except ImportError: raise ImportError( 'PyQt5 is not installed. Please install PyQt5 to use ' 'GUI related functions in py_entitymatching.') # Validate the input parameters # # We expect the matcher to be of type DTMatcher if not isinstance(matcher, DTMatcher): logger.error('Input matcher is not of type Decision Tree matcher') raise AssertionError('Input matcher is not of type ' 'Decision Tree matcher') # # We expect the target attribute to be of type string. validate_object_type(target_attr, six.string_types, error_prefix='Target attribute') # # Check whether the exclude attributes are indeed present in the train # DataFrame. if not ch.check_attrs_present(train, exclude_attrs): logger.error('The exclude attrs are not in train table columns') raise AssertionError('The exclude attrs are not in the ' 'train table columns') # # Check whether the target attribute is indeed present in the train # DataFrame. if not ch.check_attrs_present(train, target_attr): logger.error('The target attr is not in train table columns') raise AssertionError('The target attr is not in the ' 'train table columns') # # Check whether the exclude attributes are indeed present in the test # DataFrame. if not ch.check_attrs_present(test, exclude_attrs): logger.error('The exclude attrs are not in test table columns') raise AssertionError('The exclude attrs are not in the ' 'test table columns') # The exclude attributes is expected to be of type list, if not # explicitly convert this into a list. if not isinstance(exclude_attrs, list): exclude_attrs = [exclude_attrs] # Drop the duplicates from the exclude attributes exclude_attrs = gh.list_drop_duplicates(exclude_attrs) # If the target attribute is not present in the exclude attributes, # then explicitly add it to the exclude attributes. if target_attr not in exclude_attrs: exclude_attrs.append(target_attr) # Now, fit using training data matcher.fit(table=train, exclude_attrs=exclude_attrs, target_attr=target_attr) # Get a column name to store the predictions. predict_attr_name = get_name_for_predict_column(test.columns) # Predict using the test data predicted = matcher.predict(table=test, exclude_attrs=exclude_attrs, target_attr=predict_attr_name, append=True, inplace=False) # Get the evaluation summary. eval_summary = eval_matches(predicted, target_attr, predict_attr_name) # Get metric in a form that can be displayed from the evaluation summary metric = _get_metric(eval_summary) # Get false negatives and false positives as a DataFrame fp_dataframe = _get_dataframe(predicted, eval_summary['false_pos_ls']) fn_dataframe = _get_dataframe(predicted, eval_summary['false_neg_ls']) em._viewapp = QtWidgets.QApplication.instance() if em._viewapp is None: em._viewapp = QtWidgets.QApplication([]) app = em._viewapp # Get the main window application app = em._viewapp m = MainWindowManager(matcher, "dt", exclude_attrs, metric, predicted, fp_dataframe, fn_dataframe) # If the show window is true, then display the window. if show_window: m.show() app.exec_()