def vis_debug_rf(matcher, train, test, exclude_attrs, target_attr):
    """
    Visual debugger for random forest matcher

    Parameters
    ----------
    matcher : object, RFMatcher object
    train : MTable, containing training data with "True" labels
    test : MTable, containing test data with "True labels.
            The "True" labels are used for evaluation.
    exclude_attrs : List, attributes to be excluded from train and test,
        for training and testing.

    target_attr : String, column name in validation_set containing 'True' labels

    """
    assert set(test.columns) == set(train.columns), "The train and test columns are not same"
    assert set(train.columns).intersection(exclude_attrs) == set(exclude_attrs), "Some of exclude attrs are not part of" \
                                                                                 "train columns"
    # redundant
    assert set(test.columns).intersection(exclude_attrs) == set(exclude_attrs), "Some of exclude attrs are not part of" \
                                                                                "test columns"
    # fit using training data
    matcher.fit(table=train, exclude_attrs=exclude_attrs, target_attr=target_attr)
    predict_attr_name = get_name_for_predict_column(test.columns)
    predicted = matcher.predict(table=test, exclude_attrs=exclude_attrs, target_attr=predict_attr_name, append=True,
                                inplace=False)
    eval_summary = mg.eval_matches(predicted, target_attr, predict_attr_name)
    metric = get_metric(eval_summary)
    fp_dataframe = get_dataframe(predicted, eval_summary['false_pos_ls'])
    fn_dataframe = get_dataframe(predicted, eval_summary['false_neg_ls'])
    app = mg._viewapp
    m = MainWindowManager(matcher, "rf", exclude_attrs, metric, predicted, fp_dataframe, fn_dataframe)
    m.show()
    app.exec_()
def _vis_debug_dt(matcher, train, test, exclude_attrs, target_attr, show_window=True):
    if not isinstance(matcher, DTMatcher):
        logger.error('Input matcher is not of type Decision Tree matcher')
        raise AssertionError('Input matcher is not of type Decision Tree matcher')

    if not isinstance(target_attr, six.string_types):
        logger.error('Target attribute is not of type string')
        raise AssertionError('Target attribute is not of type string')

    if not check_attrs_present(train, exclude_attrs):
        logger.error('The exclude attrs are not in train table columns')
        raise AssertionError('The exclude attrs are not in the train table columns')

    if not check_attrs_present(train, target_attr):
        logger.error('The target attr is not in train table columns')
        raise AssertionError('The target attr is not in the train table columns')

    if not check_attrs_present(test, exclude_attrs):
        logger.error('The exclude attrs are not in test table columns')
        raise AssertionError('The exclude attrs are not in the test table columns')

    if not isinstance(exclude_attrs, list):
        exclude_attrs = [exclude_attrs]

    exclude_attrs = list_drop_duplicates(exclude_attrs)

    if target_attr not in exclude_attrs:
        exclude_attrs.append(target_attr)

    # fit using training data
    matcher.fit(table=train, exclude_attrs=exclude_attrs, target_attr=target_attr)

    predict_attr_name = get_name_for_predict_column(test.columns)

    # predict using the test data
    predicted = matcher.predict(table=test, exclude_attrs=exclude_attrs,
                                target_attr=predict_attr_name, append=True,
                                inplace=False)

    eval_summary = mg.eval_matches(predicted, target_attr, predict_attr_name)

    metric = get_metric(eval_summary)
    fp_dataframe = get_dataframe(predicted, eval_summary['false_pos_ls'])

    fn_dataframe = get_dataframe(predicted, eval_summary['false_neg_ls'])
    app = mg._viewapp
    m = MainWindowManager(matcher, "dt", exclude_attrs, metric, predicted, fp_dataframe,
                          fn_dataframe)
    if show_window == True:
        m.show()
        app.exec_()
Esempio n. 3
0
def vis_debug_dt(matcher, train, test, exclude_attrs, target_attr):
    """
    Visual debugger for decision tree matcher

    Parameters
    ----------
    matcher : object, DTMatcher object
    train : MTable, containing training data with "True" labels
    test : MTable, containing test data with "True labels.
            The "True" labels are used for evaluation.
    exclude_attrs : List, attributes to be excluded from train and test,
        for training and testing.

    target_attr : String, column name in validation_set containing 'True' labels

    """

    assert set(test.columns) == set(
        train.columns), "The train and test columns are not same"
    assert set(train.columns).intersection(exclude_attrs) == set(exclude_attrs), "Some of exclude attrs are not part of" \
                                                                                 "train columns"
    # redundant
    assert set(test.columns).intersection(exclude_attrs) == set(exclude_attrs), "Some of exclude attrs are not part of" \
                                                                                "test columns"
    # fit using training data
    matcher.fit(table=train,
                exclude_attrs=exclude_attrs,
                target_attr=target_attr)
    predict_attr_name = get_name_for_predict_column(test.columns)
    predicted = matcher.predict(table=test,
                                exclude_attrs=exclude_attrs,
                                target_attr=predict_attr_name,
                                append=True,
                                inplace=False)
    # print predicted
    eval_summary = mg.eval_matches(predicted, target_attr, predict_attr_name)
    # print eval_summary
    metric = get_metric(eval_summary)
    fp_dataframe = get_dataframe(predicted, eval_summary['false_pos_ls'])
    # print fp_dataframe.dtypes
    fn_dataframe = get_dataframe(predicted, eval_summary['false_neg_ls'])
    app = mg._viewapp
    m = MainWindowManager(matcher, "dt", exclude_attrs, metric, predicted,
                          fp_dataframe, fn_dataframe)
    m.show()
    app.exec_()
def vis_debug_rm(matcher, validation_set, target_attr, feature_table):
    """
    Visual debugger for boolean rule-based matcher

    Parameters
    ----------
    matcher : object, Boolean rule-based matcher object
    validation_set : MTable, used to debug
    target_attr : String, column name in validation_set containing 'True' labels
    feature_table : pandas dataframe, containing feature information

    """
    predict_attr_name = get_name_for_predict_column(validation_set.columns)
    predicted = matcher.predict(validation_set, predict_attr_name, append=True, inplace=False)
    eval_summary = mg.eval_matches(predicted, target_attr, predict_attr_name)
    metric = get_metric(eval_summary)
    fp_dataframe = get_dataframe(predicted, eval_summary['false_pos_ls'])
    fn_dataframe = get_dataframe(predicted, eval_summary['false_neg_ls'])
    app = mg._viewapp
    m = MainWindowManager(matcher, "rm", feature_table, metric, predicted, fp_dataframe, fn_dataframe)
    m.show()
    app.exec_()
Esempio n. 5
0
def _vis_debug_dt(matcher,
                  train,
                  test,
                  exclude_attrs,
                  target_attr,
                  show_window=True):
    if not isinstance(matcher, DTMatcher):
        logger.error('Input matcher is not of type Decision Tree matcher')
        raise AssertionError(
            'Input matcher is not of type Decision Tree matcher')

    if not isinstance(target_attr, six.string_types):
        logger.error('Target attribute is not of type string')
        raise AssertionError('Target attribute is not of type string')

    if not check_attrs_present(train, exclude_attrs):
        logger.error('The exclude attrs are not in train table columns')
        raise AssertionError(
            'The exclude attrs are not in the train table columns')

    if not check_attrs_present(train, target_attr):
        logger.error('The target attr is not in train table columns')
        raise AssertionError(
            'The target attr is not in the train table columns')

    if not check_attrs_present(test, exclude_attrs):
        logger.error('The exclude attrs are not in test table columns')
        raise AssertionError(
            'The exclude attrs are not in the test table columns')

    if not isinstance(exclude_attrs, list):
        exclude_attrs = [exclude_attrs]

    exclude_attrs = list_drop_duplicates(exclude_attrs)

    if target_attr not in exclude_attrs:
        exclude_attrs.append(target_attr)

    # fit using training data
    matcher.fit(table=train,
                exclude_attrs=exclude_attrs,
                target_attr=target_attr)

    predict_attr_name = get_name_for_predict_column(test.columns)

    # predict using the test data
    predicted = matcher.predict(table=test,
                                exclude_attrs=exclude_attrs,
                                target_attr=predict_attr_name,
                                append=True,
                                inplace=False)

    eval_summary = mg.eval_matches(predicted, target_attr, predict_attr_name)

    metric = get_metric(eval_summary)
    fp_dataframe = get_dataframe(predicted, eval_summary['false_pos_ls'])

    fn_dataframe = get_dataframe(predicted, eval_summary['false_neg_ls'])
    app = mg._viewapp
    m = MainWindowManager(matcher, "dt", exclude_attrs, metric, predicted,
                          fp_dataframe, fn_dataframe)
    if show_window == True:
        m.show()
        app.exec_()