Ejemplo n.º 1
0
def test_gridsearchcv_refit(u1_ml100k):
    """Test refit function of GridSearchCV."""

    data_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_test')
    data = Dataset.load_from_file(data_file, Reader('ml-100k'))

    param_grid = {
        'n_epochs': [5],
        'lr_all': [0.002, 0.005],
        'reg_all': [0.4, 0.6],
        'n_factors': [2]
    }

    # assert gs.fit() and gs.test will use best estimator for mae (first
    # appearing in measures)
    gs = GridSearchCV(SVD,
                      param_grid,
                      measures=['mae', 'rmse'],
                      cv=2,
                      refit=True)
    gs.fit(data)
    gs_preds = gs.test(data.construct_testset(data.raw_ratings))
    mae_preds = gs.best_estimator['mae'].test(
        data.construct_testset(data.raw_ratings))
    assert gs_preds == mae_preds

    # assert gs.fit() and gs.test will use best estimator for rmse
    gs = GridSearchCV(SVD,
                      param_grid,
                      measures=['mae', 'rmse'],
                      cv=2,
                      refit='rmse')
    gs.fit(data)
    gs_preds = gs.test(data.construct_testset(data.raw_ratings))
    rmse_preds = gs.best_estimator['rmse'].test(
        data.construct_testset(data.raw_ratings))
    assert gs_preds == rmse_preds
    # test that predict() can be called
    gs.predict(2, 4)

    # assert test() and predict() cannot be used when refit is false
    gs = GridSearchCV(SVD,
                      param_grid,
                      measures=['mae', 'rmse'],
                      cv=2,
                      refit=False)
    gs.fit(data)
    with pytest.raises(ValueError):
        gs_preds = gs.test(data.construct_testset(data.raw_ratings))
    with pytest.raises(ValueError):
        gs.predict('1', '2')

    # test that error is raised if used with load_from_folds
    gs = GridSearchCV(SVD,
                      param_grid,
                      measures=['mae', 'rmse'],
                      cv=2,
                      refit=True)
    with pytest.raises(ValueError):
        gs.fit(u1_ml100k)
Ejemplo n.º 2
0
def test_gridsearchcv_refit(u1_ml100k):
    """Test refit function of GridSearchCV."""

    data_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_test')
    data = Dataset.load_from_file(data_file, Reader('ml-100k'),
                                  rating_scale=(1, 5))

    param_grid = {'n_epochs': [5], 'lr_all': [0.002, 0.005],
                  'reg_all': [0.4, 0.6], 'n_factors': [2]}

    # assert gs.fit() and gs.test will use best estimator for mae (first
    # appearing in measures)
    gs = GridSearchCV(SVD, param_grid, measures=['mae', 'rmse'], cv=2,
                      refit=True)
    gs.fit(data)
    gs_preds = gs.test(data.construct_testset(data.raw_ratings))
    mae_preds = gs.best_estimator['mae'].test(
        data.construct_testset(data.raw_ratings))
    assert gs_preds == mae_preds

    # assert gs.fit() and gs.test will use best estimator for rmse
    gs = GridSearchCV(SVD, param_grid, measures=['mae', 'rmse'], cv=2,
                      refit='rmse')
    gs.fit(data)
    gs_preds = gs.test(data.construct_testset(data.raw_ratings))
    rmse_preds = gs.best_estimator['rmse'].test(
        data.construct_testset(data.raw_ratings))
    assert gs_preds == rmse_preds
    # test that predict() can be called
    gs.predict(2, 4)

    # assert test() and predict() cannot be used when refit is false
    gs = GridSearchCV(SVD, param_grid, measures=['mae', 'rmse'], cv=2,
                      refit=False)
    gs.fit(data)
    with pytest.raises(ValueError):
        gs_preds = gs.test(data.construct_testset(data.raw_ratings))
    with pytest.raises(ValueError):
        gs.predict('1', '2')

    # test that error is raised if used with load_from_folds
    gs = GridSearchCV(SVD, param_grid, measures=['mae', 'rmse'], cv=2,
                      refit=True)
    with pytest.raises(ValueError):
        gs.fit(u1_ml100k)
Ejemplo n.º 3
0
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import make_scorer

year_data = np.array(release_year)
labels_year = []
for i in year_data:
    labels_year.append(i[0])

#split into training and testing
xtrain_y, xtest_y, ytrain_y, ytest_y = train_test_split(v,
                                                        labels_year,
                                                        test_size=0.25)
clf = LinearRegression().fit(xtrain_y, ytrain_y)
y_pred = clf.predict(xtest_y)
regression = mean_squared_error(ytest_y, y_pred)
print('regression mse: ', regression)  #regression mse

# In[51]:

#calculate naive mase
naive_y = np.mean(ytrain_y)

naive_y_test = []
for i in range(len(ytest_y)):
    naive_y_test.append(int(naive_y))

naive = mean_squared_error(naive_y_test, ytest_y)
print('naive mse: ', naive)  #naive mse