def predictive_evaluation(model, ds_test=None, count_none_predictions=False, n_test_predictions=None, skip_errors=True, **kwds):
    """Executes a predictive evaluation process, where the given model will be evaluated under the provided settings.

    Args:
        model: An instance of a Recommender to be evaluated.
        ds_test: An optional test InteractionDataset. If none is provided, then the test data will be the model
            training data. Evaluating on train data is not ideal for assessing the model's performance.
        count_none_predictions: An optional boolean indicating whether to count none predictions (i.e. when the model
            predicts None, count it as being a 0) or not (i.e. skip that user-item pair). Default: False.
        n_test_predictions: An optional integer representing the number of predictions to evaluate.
            Default: predict for every (user, item) pair on the test dataset.
        skip_errors: A boolean indicating whether to ignore errors produced during the predict calls, or not.
            Default: False.
        metrics: An optional list containing instances of PredictiveMetricABC. Default: [RMSE(), MSE()].
        verbose: A boolean indicating whether state logs should be produced or not. Default: true.

    Returns:
        A dict containing each metric name mapping to the corresponding metric value.
    """
    if ds_test is None: ds_test = model.interaction_dataset
    if n_test_predictions is None: n_test_predictions = len(ds_test)

    assert n_test_predictions > 0, f'The number of test users ({n_test_predictions}) should be > 0.'

    metrics = kwds.get('metrics', [RMSE(), MSE()])

    assert isinstance(metrics, list), f'Expected "metrics" argument to be a list and found {type(metrics)}. ' \
        f'Should contain instances of PredictiveMetricABC.'

    for m in metrics:
        assert isinstance(m, PredictiveMetricABC), f'Expected metric {m} to be an instance of type PredictiveMetricABC.'

    n_test_predictions = min(n_test_predictions, len(ds_test))
    if kwds.get('verbose', True):
        _iter = tqdm(ds_test.values(['user', 'item', 'interaction'], to_list=True),
                     total=n_test_predictions, desc='Evaluating model predictive performance', position=0, leave=True)
    else:
        _iter = ds_test.values(['user', 'item', 'interaction'], to_list=True)

    num_predictions_made = 0
    y_pred, y_true = [], []
    for user, item, interaction in _iter:
        if num_predictions_made >= n_test_predictions: break  # reach max number of predictions

        pred = model.predict(user, item, skip_errors=skip_errors)
        if pred is None:
            if count_none_predictions:
                num_predictions_made += 1
                y_pred.append(0)
                y_true.append(interaction)
            continue
        y_pred.append(pred)
        y_true.append(interaction)
        num_predictions_made += 1

    # evaluate performance
    metric_values = {m.name: round(m(y_true, y_pred), 4) for m in metrics}

    return metric_values
Exemple #2
0
def test_predictive_evaluation_5(model, test_interactions_ds):
    """Evaluation using the MSE metric only."""
    assert predictive_evaluation(model,
                                 test_interactions_ds,
                                 count_none_predictions=False,
                                 n_test_predictions=None,
                                 skip_errors=True,
                                 metrics=[MSE()]) == {
                                     'MSE': 0.6667
                                 }
Exemple #3
0
def test_mse_1():
    assert MSE()([4, 3, 2, 1], [1, 2, 3, 4]) == 5
Exemple #4
0
def test_mse_0():
    assert MSE()([1, 2, 3, 4], [1, 2, 3, 4]) == 0
Exemple #5
0
def test_mse_4():
    assert MSE()([0], [0]) == 0
Exemple #6
0
def test_mse_3():
    assert MSE()([0, 0], [1, 1]) == 1
Exemple #7
0
def test_mse_2():
    assert round(MSE()([5, 10, 2, -1, -20, 6], [9, 90, 24, 2, 0, -15]), 2) == 1291.67