def cross_validate(model_def, data, folds, repeat=1): """ """ results = [] if isinstance(folds, int): folds = make_default_folds(num_folds=folds, data=data) for i in range(repeat): for fold in folds: if len(fold) == 2: train_index, test_index = fold prep_index = None elif len(fold) == 3: train_index, test_index, prep_index = fold else: raise ValueError("Fold is not of right dimension (%d, not 2 or 3)"%len(fold)) assert len(train_index & test_index) == 0, "train and test overlap!!! %s, %s" % (train_index, test_index) x_train, y_train, fitted_model = fit_model(model_def, data, prep_index, train_index) test_data = data.loc[test_index] x_test, y_test = generate_test(model_def, test_data, fitted_model) assert len(x_train.index & x_test.index) == 0, "train and test overlap!!! %s" % (x_train.index & x_test.index) y_preds = predict(fitted_model, x_test) if model_def.evaluation_target is not None: y_test, ff = build_target_safe(model_def.evaluation_target, test_data) result = Result(x_train, x_test, y_train, y_test, y_preds, model_def, fitted_model, data) results.append(result) # for reporter in reporters: # reporter.update(result) return results
def cross_validate(model_def, data, folds, reporters=[], repeat=1): """ """ results = [] if isinstance(folds, int): folds = make_default_folds(num_folds=folds, data=data) for i in range(repeat): for fold in folds: if len(fold) == 2: train_index, test_index = fold prep_index = None elif len(fold) == 3: train_index, test_index, prep_index = fold else: raise ValueError("Fold is not of right dimension (%d, not 2 or 3)"%len(fold)) x_train, y_train, fitted_model = fit_model(model_def, data, prep_index, train_index) x_test, y_test = generate_test(model_def, data, fitted_model) y_preds = fitted_model.fitted_estimator.predict(x_test) result = Result(x_train, x_test, y_train, y_test, y_preds, model_def, fitted_model, data) results.append(result) for reporter in reporters: reporter.update(result) return results, reporters
def cross_validate(model_def, data, folds, repeat=1): """ """ results = [] if isinstance(folds, int): folds = make_default_folds(num_folds=folds, data=data) for i in range(repeat): for fold in folds: if len(fold) == 2: train_index, test_index = fold prep_index = None elif len(fold) == 3: train_index, test_index, prep_index = fold else: raise ValueError( "Fold is not of right dimension (%d, not 2 or 3)" % len(fold)) assert len( train_index & test_index) == 0, "train and test overlap!!! %s, %s" % ( train_index, test_index) x_train, y_train, fitted_model = fit_model(model_def, data, prep_index, train_index) test_data = data.loc[test_index] x_test, y_test = generate_test(model_def, test_data, fitted_model) assert len(x_train.index & x_test.index) == 0, "train and test overlap!!! %s" % ( x_train.index & x_test.index) y_preds = predict(fitted_model, x_test) if model_def.evaluation_target is not None: y_test, ff = build_target_safe(model_def.evaluation_target, test_data) result = Result(x_train, x_test, y_train, y_test, y_preds, model_def, fitted_model, data) results.append(result) # for reporter in reporters: # reporter.update(result) return results