Example #1
0
def cross_validate(model_def, data, folds, repeat=1):
    """
    """
    results = []

    if isinstance(folds, int):
        folds = make_default_folds(num_folds=folds, data=data)

    for i in range(repeat):
        for fold in folds:
            if len(fold) == 2:
                train_index, test_index = fold
                prep_index = None
            elif len(fold) == 3:
                train_index, test_index, prep_index = fold
            else:
                raise ValueError("Fold is not of right dimension (%d, not 2 or 3)"%len(fold))
            assert len(train_index & test_index) == 0, "train and test overlap!!! %s, %s" % (train_index, test_index)
            x_train, y_train, fitted_model = fit_model(model_def, data, prep_index, train_index)
            test_data = data.loc[test_index]
            x_test, y_test = generate_test(model_def, test_data, fitted_model)
            assert len(x_train.index & x_test.index) == 0, "train and test overlap!!! %s" % (x_train.index & x_test.index)
            y_preds = predict(fitted_model, x_test)
            if model_def.evaluation_target is not None:
                y_test, ff = build_target_safe(model_def.evaluation_target, test_data)
            result = Result(x_train, x_test, y_train, y_test, y_preds, model_def, fitted_model, data)
            results.append(result)

            # for reporter in reporters:
            #     reporter.update(result)
    return results
Example #2
0
def cross_validate(model_def, data, folds, reporters=[], repeat=1):
    """
    """
    results = []

    if isinstance(folds, int):
        folds = make_default_folds(num_folds=folds, data=data)
    
    for i in range(repeat):
        for fold in folds:
            if len(fold) == 2:
                train_index, test_index = fold
                prep_index = None
            elif len(fold) == 3:
                train_index, test_index, prep_index = fold
            else:
                raise ValueError("Fold is not of right dimension (%d, not 2 or 3)"%len(fold))
            x_train, y_train, fitted_model = fit_model(model_def, data, prep_index, train_index)
            x_test, y_test = generate_test(model_def, data, fitted_model)
            y_preds = fitted_model.fitted_estimator.predict(x_test)
            result = Result(x_train, x_test, y_train, y_test, y_preds, model_def, fitted_model, data)
            results.append(result)
            
            for reporter in reporters:
                reporter.update(result)
    return results, reporters
Example #3
0
def cross_validate(model_def, data, folds, repeat=1):
    """
    """
    results = []

    if isinstance(folds, int):
        folds = make_default_folds(num_folds=folds, data=data)

    for i in range(repeat):
        for fold in folds:
            if len(fold) == 2:
                train_index, test_index = fold
                prep_index = None
            elif len(fold) == 3:
                train_index, test_index, prep_index = fold
            else:
                raise ValueError(
                    "Fold is not of right dimension (%d, not 2 or 3)" %
                    len(fold))
            assert len(
                train_index
                & test_index) == 0, "train and test overlap!!! %s, %s" % (
                    train_index, test_index)
            x_train, y_train, fitted_model = fit_model(model_def, data,
                                                       prep_index, train_index)
            test_data = data.loc[test_index]
            x_test, y_test = generate_test(model_def, test_data, fitted_model)
            assert len(x_train.index
                       & x_test.index) == 0, "train and test overlap!!! %s" % (
                           x_train.index & x_test.index)
            y_preds = predict(fitted_model, x_test)
            if model_def.evaluation_target is not None:
                y_test, ff = build_target_safe(model_def.evaluation_target,
                                               test_data)
            result = Result(x_train, x_test, y_train, y_test, y_preds,
                            model_def, fitted_model, data)
            results.append(result)

            # for reporter in reporters:
            #     reporter.update(result)
    return results