Ejemplo n.º 1
0
def print_eval_summary(eval_summary):
    """
    Prints a summary of evaluation results.

    Args:
        eval_summary (dictionary): Dictionary containing evaluation results,
            typically from 'eval_matches' function.
    """
    m = _get_metric(eval_summary)
    for key, value in six.iteritems(m):
        print(key + " : " + value)
Ejemplo n.º 2
0
def print_eval_summary(eval_summary):
    """
    Prints a summary of evaluation results.

    Args:
        eval_summary (dictionary): Dictionary containing evaluation results,
            typically from 'eval_matches' function.

    Examples:
        >>> import py_entitymatching as em
        >>> # G is the labeled data used for development purposes, match_f is the feature table
        >>> H = em.extract_feat_vecs(G, feat_table=match_f, attrs_after='gold_labels')
        >>> dt = em.DTMatcher()
        >>> dt.fit(table=H, exclude_attrs=['_id', 'ltable_id', 'rtable_id', 'gold_labels'], target_attr='gold_labels')
        >>> pred_table = dt.predict(table=H,  exclude_attrs=['_id', 'ltable_id', 'rtable_id', 'gold_labels'],  append=True, target_attr='predicted_labels')
        >>> eval_summary = em.eval_matches(pred_table, 'gold_labels', 'predicted_labels')
        >>> em.print_eval_summary(eval_summary)

    """
    m = _get_metric(eval_summary)
    for key, value in six.iteritems(m):
        print(key + " : " + value)
def _vis_debug_rf(matcher, train, test, exclude_attrs, target_attr,
                  show_window=True):
    """
    Wrapper function for debugging the Random Forest matcher visually.
    """
    try:
        from PyQt5 import QtWidgets
        from py_entitymatching.gui.debug_gui_base import MainWindowManager
    except ImportError:
        raise ImportError('PyQt5 is not installed. Please install PyQt5 to use '
                      'GUI related functions in py_entitymatching.')


    # Validate the input parameters
    # # We expect the matcher to be of type RfMatcher
    if not isinstance(matcher, RFMatcher):
        logger.error('Input matcher is not of type '
                     'Random Forest matcher')
        raise AssertionError('Input matcher is not of type '
                             'Random Forest matcher')

    # # We expect the target attribute to be of type string.
    validate_object_type(target_attr, six.string_types, error_prefix='Target attribute')

    # # Check whether the exclude attributes are indeed present in the train
    #  DataFrame.
    if not check_attrs_present(train, exclude_attrs):
        logger.error('The exclude attrs are not in train table columns')
        raise AssertionError('The exclude attrs are not in the train table columns')

    # # Check whether the target attribute is indeed present in the train
    #  DataFrame.
    if not check_attrs_present(train, target_attr):
        logger.error('The target attr is not in train table columns')
        raise AssertionError('The target attr is not in the train table columns')

    # # Check whether the exclude attributes are indeed present in the test
    #  DataFrame.
    if not check_attrs_present(test, exclude_attrs):
        logger.error('The exclude attrs are not in test table columns')
        raise AssertionError('The exclude attrs are not in the test table columns')


    # The exclude attributes is expected to be of type list, if not
    # explicitly convert this into a list.
    if not isinstance(exclude_attrs, list):
        exclude_attrs = [exclude_attrs]

    # Drop the duplicates from the exclude attributes
    exclude_attrs = list_drop_duplicates(exclude_attrs)

    # If the target attribute is not present in the exclude attributes,
    # then explicitly add it to the exclude attributes.
    if target_attr not in exclude_attrs:
        exclude_attrs.append(target_attr)

    # Now, fit using training data
    matcher.fit(table=train, exclude_attrs=exclude_attrs,
                target_attr=target_attr)

    # Get a column name to store the predictions.
    predict_attr_name = get_name_for_predict_column(test.columns)

    # Predict using the test data
    predicted = matcher.predict(table=test, exclude_attrs=exclude_attrs,
                                target_attr=predict_attr_name, append=True,
                                inplace=False)

    # Get the evaluation summary.
    eval_summary = em.eval_matches(predicted, target_attr, predict_attr_name)
    em._viewapp = QtWidgets.QApplication.instance()
    if em._viewapp is None:
        em._viewapp = QtWidgets.QApplication([])

    # Get metric in a form that can be displayed from the evaluation summary
    metric = _get_metric(eval_summary)

    # Get false negatives and false positives as a DataFrame
    fp_dataframe = _get_dataframe(predicted, eval_summary['false_pos_ls'])
    fn_dataframe = _get_dataframe(predicted, eval_summary['false_neg_ls'])

    # Get the main window application

    app = em._viewapp

    m = MainWindowManager(matcher, "rf", exclude_attrs, metric, predicted, fp_dataframe,
                          fn_dataframe)

    # If the show window is true, then display the window.
    if show_window:
        m.show()
        app.exec_()
def _vis_debug_dt(matcher,
                  train,
                  test,
                  exclude_attrs,
                  target_attr,
                  show_window=True):
    """
    Wrapper function for debugging the Random Forest matcher visually.
    """

    try:
        from PyQt5 import QtWidgets
        from py_entitymatching.gui.debug_gui_base import MainWindowManager
    except ImportError:
        raise ImportError(
            'PyQt5 is not installed. Please install PyQt5 to use '
            'GUI related functions in py_entitymatching.')

    # Validate the input parameters
    # # We expect the matcher to be of type DTMatcher
    if not isinstance(matcher, DTMatcher):
        logger.error('Input matcher is not of type Decision Tree matcher')
        raise AssertionError('Input matcher is not of type '
                             'Decision Tree matcher')

    # # We expect the target attribute to be of type string.
    validate_object_type(target_attr,
                         six.string_types,
                         error_prefix='Target attribute')

    # # Check whether the exclude attributes are indeed present in the train
    #  DataFrame.
    if not ch.check_attrs_present(train, exclude_attrs):
        logger.error('The exclude attrs are not in train table columns')
        raise AssertionError('The exclude attrs are not in the '
                             'train table columns')

    # # Check whether the target attribute is indeed present in the train
    #  DataFrame.
    if not ch.check_attrs_present(train, target_attr):
        logger.error('The target attr is not in train table columns')
        raise AssertionError('The target attr is not in the '
                             'train table columns')

    # # Check whether the exclude attributes are indeed present in the test
    #  DataFrame.
    if not ch.check_attrs_present(test, exclude_attrs):
        logger.error('The exclude attrs are not in test table columns')
        raise AssertionError('The exclude attrs are not in the '
                             'test table columns')

    # The exclude attributes is expected to be of type list, if not
    # explicitly convert this into a list.
    if not isinstance(exclude_attrs, list):
        exclude_attrs = [exclude_attrs]

    # Drop the duplicates from the exclude attributes
    exclude_attrs = gh.list_drop_duplicates(exclude_attrs)

    # If the target attribute is not present in the exclude attributes,
    # then explicitly add it to the exclude attributes.
    if target_attr not in exclude_attrs:
        exclude_attrs.append(target_attr)

    # Now, fit using training data
    matcher.fit(table=train,
                exclude_attrs=exclude_attrs,
                target_attr=target_attr)

    # Get a column name to store the predictions.
    predict_attr_name = get_name_for_predict_column(test.columns)

    # Predict using the test data
    predicted = matcher.predict(table=test,
                                exclude_attrs=exclude_attrs,
                                target_attr=predict_attr_name,
                                append=True,
                                inplace=False)

    # Get the evaluation summary.
    eval_summary = eval_matches(predicted, target_attr, predict_attr_name)

    # Get metric in a form that can be displayed from the evaluation summary
    metric = _get_metric(eval_summary)

    # Get false negatives and false positives as a DataFrame
    fp_dataframe = _get_dataframe(predicted, eval_summary['false_pos_ls'])
    fn_dataframe = _get_dataframe(predicted, eval_summary['false_neg_ls'])

    em._viewapp = QtWidgets.QApplication.instance()

    if em._viewapp is None:
        em._viewapp = QtWidgets.QApplication([])
    app = em._viewapp

    # Get the main window application
    app = em._viewapp
    m = MainWindowManager(matcher, "dt", exclude_attrs, metric, predicted,
                          fp_dataframe, fn_dataframe)
    # If the show window is true, then display the window.
    if show_window:
        m.show()
        app.exec_()