コード例 #1
0
def svd(data, training, testing):
    '''
        Tune SVD parameters then calculates RMSE, coverage and running time of SVD

        Args:
            data(Dataset): the whole dataset divided into 5 folds
            training(Dataset): training dataset
            testing(Dataset): test dataset

        Returns:
            rmse: RMSE of SVD with Z-score with optimized parameters
            top_n: number of unique predictions for top n items
    '''

    # candidate parameters
    param_grid = {'n_factors': [25, 50, 100, 250], 'n_epochs': [10, 20, 30, 40, 50]}

    # optimize parameters
    grid_search = GridSearch(SVD, param_grid, measures=['RMSE'], verbose=False)
    grid_search.evaluate(data)
    param = grid_search.best_params['RMSE']
    print('SVD:', param)

    # fit model using the optimized parameters
    svd = SVD(n_factors=param['n_factors'], n_epochs=param['n_epochs'])
    svd.train(training)

    # evaluate the model using test data
    predictions = svd.test(testing)
    top_n = get_top_n(predictions, n=5)
    rmse = accuracy.rmse(predictions, verbose=True)
    return rmse, top_n
コード例 #2
0
def nmf(data, training, testing):
    '''
    Tune NMF parameters then calculates RMSE, coverage and running time of NMF

    Args:
        data(Dataset): the whole dataset divided into 5 folds
        training(Dataset): training dataset
        testing(Dataset): test dataset

    Returns:
        rmse: RMSE of NMF with optimized parameters
        top_n: number of unique predictions for top n items
    '''

    # candidate parameters
    nmf_param_grid = {'n_factors': [45, 50, 55, 60], 'n_epochs': [45, 50, 55]}

    # optimize parameters
    grid_search = GridSearch(NMF, nmf_param_grid, measures=['RMSE'], verbose=False)
    grid_search.evaluate(data)
    param = grid_search.best_params['RMSE']
    print('NMF:', param)

    # fit model using the optimized parameters
    nmf = NMF(n_factors=param['n_factors'], n_epochs=param['n_epochs'])
    nmf.train(training)

    # evaluate the model using test data
    predictions = nmf.test(testing)
    top_n = get_top_n(predictions, n=5)
    rmse = accuracy.rmse(predictions, verbose=True)
    return rmse, top_n
コード例 #3
0
def knn_m(data, training, testing):
    '''
        Tune KNN with Means parameters then calculates RMSE, coverage and running time of KNN with Means

        Args:
            data(Dataset): the whole dataset divided into 5 folds
            training(Dataset): training dataset
            testing(Dataset): test dataset

        Returns:
            rmse: RMSE of KNN with Means with optimized parameters
            top_n: number of unique predictions for top n items
    '''

    # candidate parameters
    knn_param_grid = {'k': [5, 10, 20], 'sim_options': {'name': ['msd', 'cosine', 'pearson'],
                                                        'min_support': [1, 5], 'user_based': [False]}}

    # optimize parameters
    knnm_grid_search = GridSearch(KNNWithMeans, knn_param_grid, measures=['RMSE'], verbose=False)
    knnm_grid_search.evaluate(data)
    param = knnm_grid_search.best_params['RMSE']
    print('KNNWithMeans:', param)

    # fit model using the optimized parameters
    knnm = KNNWithMeans(k=param['k'], name=param['sim_options']['name'],
                        min_support=param['sim_options']['min_support'], user_based=param['sim_options']['user_based'])
    knnm.train(training)

    # evaluate the model using test data
    predictions = knnm.test(testing)
    top_n = get_top_n(predictions, n=5)
    rmse = accuracy.rmse(predictions, verbose=True)

    return rmse, top_n
コード例 #4
0
def grid_search_knn_surprise(data_train, n_epochs, reg_u, reg_i):
    print('KNN Surprise grid search')

    #Construct KNN algo with params
    param_grid = {
        'bsl_options': {
            'method': ['als'],
            'n_epochs': n_epochs,
            'reg_u': reg_u,
            'reg_i': reg_i
        },
        'k': [3],
        'sim_options': {
            'name': ['msd'],
            'min_support': [5],
            'user_based': [False]
        }
    }

    #Create the Grid search algo with the params
    grid_search = GridSearch(KNNBaseline, param_grid, measures=['RMSE'])
    #Evaluate the Grid search and print the best params
    grid_search.evaluate(data_train)
    print(grid_search.best_score['RMSE'])
    print(grid_search.best_params['RMSE'])
    # Return the best params for the ALS algo
    return grid_search.best_params['RMSE']["bsl_options"]
コード例 #5
0
ファイル: test_grid_search.py プロジェクト: zwcdp/Surprise
def test_dict_parameters(small_ml):
    """Dict parameters like bsl_options and sim_options require special
    treatment in the param_grid argument. We here test both in one shot with
    KNNBaseline."""

    param_grid = {
        'bsl_options': {
            'method': ['als', 'sgd'],
            'reg': [1, 2]
        },
        'k': [2, 3],
        'sim_options': {
            'name': ['msd', 'cosine'],
            'min_support': [1, 5],
            'user_based': [False]
        }
    }

    small_ml.split(2)

    with pytest.warns(UserWarning):
        grid_search = GridSearch(KNNBaseline,
                                 param_grid,
                                 measures=['FCP', 'mae', 'rMSE'],
                                 n_jobs=1)
    with pytest.warns(UserWarning):
        grid_search.evaluate(small_ml)
    assert len(grid_search.cv_results['params']) == 32
コード例 #6
0
ファイル: test_grid_search.py プロジェクト: nathania/Surprise
def test_measure_is_not_case_sensitive():
    param_grid = {'n_epochs': [1], 'lr_all': [0.002, 0.005],
                  'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]}
    grid_search = GridSearch(SVD, param_grid, measures=['FCP', 'mae', 'rMSE'])
    grid_search.evaluate(data)
    assert grid_search.best_index['fcp'] == grid_search.best_index['FCP']
    assert grid_search.best_params['mAe'] == grid_search.best_params['MaE']
    assert grid_search.best_score['RmSE'] == grid_search.best_score['RMSE']
コード例 #7
0
ファイル: test_grid_search.py プロジェクト: nathania/Surprise
def test_best_estimator():
    param_grid = {'n_epochs': [5], 'lr_all': [0.002, 0.005],
                  'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]}
    grid_search = GridSearch(SVD, param_grid, measures=['FCP', 'mae', 'rMSE'])
    grid_search.evaluate(data)
    best_estimator = grid_search.best_estimator['MAE']
    assert evaluate(
        best_estimator, data)['MAE'] == grid_search.best_score['MAE']
コード例 #8
0
ファイル: test_grid_search.py プロジェクト: zxshinxz/Surprise
def test_grid_search_cv_results():
    """Ensure that the number of parameter combinations is correct."""
    param_grid = {'n_epochs': [1, 2], 'lr_all': [0.002, 0.005],
                  'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]}
    with pytest.warns(UserWarning):
        grid_search = GridSearch(SVD, param_grid, n_jobs=1)
    with pytest.warns(UserWarning):
        grid_search.evaluate(data)
    assert len(grid_search.cv_results['params']) == 8
コード例 #9
0
def test_grid_search_cv_results():
    param_grid = {
        'n_epochs': [1, 2],
        'lr_all': [0.002, 0.005],
        'reg_all': [0.4, 0.6],
        'n_factors': [1]
    }
    grid_search = GridSearch(SVD, param_grid)
    grid_search.evaluate(data)
    assert len(grid_search.cv_results['params']) == 8
コード例 #10
0
ファイル: test_grid_search.py プロジェクト: zxshinxz/Surprise
def test_measure_is_not_case_sensitive():
    """Ensure that all best_* dictionaries are case insensitive."""
    param_grid = {'n_epochs': [1], 'lr_all': [0.002, 0.005],
                  'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]}
    with pytest.warns(UserWarning):
        grid_search = GridSearch(SVD, param_grid, measures=['FCP', 'mae',
                                                            'rMSE'], n_jobs=1)
    with pytest.warns(UserWarning):
        grid_search.evaluate(data)
    assert grid_search.best_index['fcp'] == grid_search.best_index['FCP']
    assert grid_search.best_params['mAe'] == grid_search.best_params['MaE']
    assert grid_search.best_score['RmSE'] == grid_search.best_score['RMSE']
コード例 #11
0
def knnz_running_time(data):
    '''
        Calculates the running times for training and predictions for KNN with Z-score

        Args:
            data(Dataset): a list of datasets with different numbers of users

        Returns:
            elapsedtime_KnnZtrain: running time for training
            elapsedtime_KnnZtest: running time for predictions on testset
    '''
    elapsedtime_KnnZtrain = []
    elapsedtime_KnnZtest = []

    # tune the parameters on the entire data
    param_grid = {
        'k': [5, 10, 20],
        'sim_options': {
            'name': ['msd', 'cosine', 'pearson'],
            'min_support': [1, 5],
            'user_based': [False]
        }
    }
    grid_search = GridSearch(KNNWithZScore,
                             param_grid,
                             measures=['RMSE'],
                             verbose=False)
    grid_search.evaluate(data[3])
    param = grid_search.best_params['RMSE']
    k = param['k']
    sim = param['sim_options']['name']
    min_support = param['sim_options']['min_support']
    user_based = param['sim_options']['user_based']

    # using the tuned parameters calculate running times
    for i in range(len(data)):
        # training running time
        training_start = time.time()
        training = data[i].build_full_trainset()
        testing = training.build_anti_testset()
        knnz = KNNWithZScore(k=k,
                             name=sim,
                             min_support=min_support,
                             user_based=user_based)
        knnz.train(training)
        elapsedtime_KnnZtrain.append(time.time() - training_start)

        # prediction running time
        test_start = time.time()
        knnz.test(testing)
        elapsedtime_KnnZtest.append(time.time() - test_start)
    return elapsedtime_KnnZtrain, elapsedtime_KnnZtest
コード例 #12
0
ファイル: test_grid_search.py プロジェクト: nathania/Surprise
def test_best_rmse_fcp():
    param_grid = {'n_epochs': [1, 2], 'lr_all': [0.002, 0.005],
                  'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]}
    grid_search = GridSearch(SVD, param_grid, measures=['FCP', 'rmse'])
    grid_search.evaluate(data)

    assert grid_search.best_params['RMSE'] == {
        'lr_all': 0.005, 'n_factors': 1, 'reg_all': 0.4, 'n_epochs': 2,
        'init_std_dev': 0}

    assert grid_search.best_params['FCP'] == {
        'reg_all': 0.6, 'n_epochs': 2, 'lr_all': 0.002, 'n_factors': 1,
        'init_std_dev': 0}
コード例 #13
0
    def parameter_tuning(self):
        param_grid = {'n_epochs': [10, 20, 40], 'lr_all': [0.002, 0.005, 0.01],
                      'reg_all': [0.05, 0.1, 0.2]}

        print("Starting grid search...")
        start_time = time.perf_counter()
        self.grid_search = GridSearch(SVD, param_grid, measures=['RMSE'])
        self.grid_search.evaluate(self.data)
        print('Grid search took {}s'.format(time.perf_counter() - start_time))

        self.svd = self.grid_search.best_estimator['RMSE']

        print(self.grid_search.best_score['RMSE'])
        print(self.grid_search.best_params['RMSE'])
コード例 #14
0
ファイル: test_grid_search.py プロジェクト: zxshinxz/Surprise
def test_best_estimator():
    """Ensure that the best estimator is the one giving the best score (by
    re-running it)"""
    param_grid = {'n_epochs': [5], 'lr_all': [0.002, 0.005],
                  'reg_all': [0.4, 0.6], 'n_factors': [1], 'init_std_dev': [0]}
    with pytest.warns(UserWarning):
        grid_search = GridSearch(SVD, param_grid, measures=['FCP', 'mae',
                                                            'rMSE'], n_jobs=1)
    with pytest.warns(UserWarning):
        grid_search.evaluate(data)
    best_estimator = grid_search.best_estimator['MAE']
    with pytest.warns(UserWarning):
        assert evaluate(
            best_estimator, data)['MAE'] == grid_search.best_score['MAE']
コード例 #15
0
def surprise_gridsearch(param_grid, model, data):
    """
    Gridsearch on a surprise recommender model to extract the best parameters

    Args:
        param_grid: dictionary of model parameters and potential parameter values
        model: instantiated recommender model
        data: surprise dataframe

    Returns:
        The best score from the gridsearch as well as the accompanying best parameters
    """
    grid_search = GridSearch(model,
                             param_grid,
                             measures=['RMSE'],
                             verbose=False)
    grid_search.evaluate(data)
    return grid_search.best_score['RMSE'], grid_search.best_params['RMSE']
コード例 #16
0
def grid_search_rec(data, algo, param_grid):
    """
	Grid search a RS algorithm using surprise lib.

	Inputs
	---------
		data: surprise Dataset trainset format for ratings
		algo: surprise algorithm for grid searching 
		param_grid: parameter map to grid search

	Returns
	--------
		best_estimator: surprise algorithm with best hyperparameters

	"""

    grid_search = GridSearch(algo, param_grid, measures=['RMSE'], verbose=True)
    grid_search.evaluate(data)

    return grid_search.best_estimator["RMSE"]
コード例 #17
0
def grid_search_svd_surprise(data_train, n_epochs, lr_all, reg_all, init_mean,
                             n_factors):

    print('SVD Surprise grid search')

    # Construct SVD algo with params
    param_grid = {
        'n_epochs': n_epochs,
        'lr_all': lr_all,
        'reg_all': reg_all,
        'init_mean': init_mean,
        'n_factors': n_factors
    }
    # Create the Grid search algo with the params
    grid_search = GridSearch(SVD, param_grid, measures=['RMSE'], verbose=False)
    # Evaluate the Grid search and print the best params
    grid_search.evaluate(data_train)
    print(grid_search.best_score['RMSE'])
    print(grid_search.best_params['RMSE'])
    # Return the best params for the SVD algo
    return grid_search.best_params['RMSE']
コード例 #18
0
def svdpp_running_time(data):
    '''
        Calculates the running times for training and predictions for SVD++

        Args:
            data(Dataset): a list of datasets with different numbers of users

        Returns:
            elapsedtime_SVDpptrain: running time for training
            elapsedtime_SVDpptest: running time for predictions on testset
    '''
    elapsedtime_SVDpptrain = []
    elapsedtime_SVDpptest = []

    # tune the parameters on the entire data
    param_grid = {
        'n_factors': [25, 50, 100, 250],
        'n_epochs': [10, 20, 30, 40, 50]
    }
    grid_search = GridSearch(SVD, param_grid, measures=['RMSE'], verbose=False)
    grid_search.evaluate(data[3])
    param = grid_search.best_params['RMSE']
    n_factors = param['n_factors']
    n_epochs = param['n_epochs']

    # using the tuned parameters calculate running times
    for i in range(len(data)):
        # training running time
        training_start = time.time()
        training = data[i].build_full_trainset()
        testing = training.build_anti_testset()
        svdpp = SVDpp(n_factors=n_factors, n_epochs=n_epochs)
        svdpp.train(training)
        elapsedtime_SVDpptrain.append(time.time() - training_start)

        # prediction running time
        test_start = time.time()
        svdpp.test(testing)
        elapsedtime_SVDpptest.append(time.time() - test_start)
    return elapsedtime_SVDpptrain, elapsedtime_SVDpptest
コード例 #19
0
def test_dict_parameters():
    """Dict parameters like bsl_options and sim_options require special
    treatment. We here test both in one shot with KNNBaseline."""

    param_grid = {
        'bsl_options': {
            'method': ['als', 'sgd'],
            'reg': [1, 2]
        },
        'k': [2, 3],
        'sim_options': {
            'name': ['msd', 'cosine'],
            'min_support': [1, 5],
            'user_based': [False]
        }
    }

    grid_search = GridSearch(KNNBaseline,
                             param_grid,
                             measures=['FCP', 'mae', 'rMSE'])
    grid_search.evaluate(data)
    assert len(grid_search.cv_results['params']) == 32
コード例 #20
0
def knn(data, training, testing):
    '''
        Tune Basic KNN parameters then calculates RMSE, coverage and running time of Basic KNN

        Args:
            data(Dataset): the whole dataset divided into 5 folds
            training(Dataset): training dataset
            testing(Dataset): test dataset

        Returns:
            rmse: RMSE of Basic KNN with optimized parameters
            top_n: number of unique predictions for top n items
    '''

    # candidate parameters
    knn_param_grid = {'k': [5, 10, 20], 'sim_options': {'name': ['msd', 'cosine', 'pearson'],
                                                        'min_support': [1, 5], 'user_based': [False]}}

    # optimize parameters
    knn_grid_search = GridSearch(KNNBasic, knn_param_grid, measures=['RMSE'], verbose=False)
    knn_grid_search.evaluate(data)
    param = knn_grid_search.best_params['RMSE']
    print('KNNBasic:', param)
    # RMSE against parameters
    result_df = pd.DataFrame.from_dict(knn_grid_search.cv_results)
    result_df.to_csv('data/knn_rmse_against_param.csv')


    # fit model using the optimized parameters
    knn = KNNBasic(k=param['k'], name=param['sim_options']['name'], min_support=param['sim_options']['min_support'], user_based=param['sim_options']['user_based'] )
    knn.train(training)

    # evaluate the model using test data
    predictions = knn.test(testing)
    top_n = get_top_n(predictions, n=5)

    rmse = accuracy.rmse(predictions, verbose=True)
    return rmse, top_n
コード例 #21
0
def svdpp(data, training, testing):
    '''
    Tune SVD++ parameters then calculates RMSE, coverage and running time of SVD++

    Args:
        data(Dataset): the whole dataset divided into 5 folds
        training(Dataset): training dataset
        testing(Dataset): test dataset

    Returns:
        rmse: RMSE of SVD++ with optimized parameters
        top_n: number of unique predictions for top n items
    '''
    # candidate parameters
    param_grid = {'n_factors': [25, 50, 100, 250], 'n_epochs': [10, 20, 30, 40, 50]}

    # optimize parameters
    grid_search = GridSearch(SVDpp, param_grid, measures=['RMSE'], verbose=False)
    grid_search.evaluate(data)
    param = grid_search.best_params['RMSE']
    print('SVDpp:', param)
    # RMSE against parameters
    result_df = pd.DataFrame.from_dict(grid_search.cv_results)
    result_df.to_csv('data/svdpp_rmse_against_param.csv')


    # fit model using the optimized parameters
    svdpp = SVDpp(n_factors=param['n_factors'], n_epochs=param['n_epochs'])
    svdpp.train(training)

    # evaluate the model using test data
    predictions = svdpp.test(testing)
    top_n = get_top_n(predictions, n=5)
    rmse = accuracy.rmse(predictions, verbose=True)

    return rmse, top_n
コード例 #22
0
ファイル: test_grid_search.py プロジェクト: zwcdp/Surprise
def test_same_splits(small_ml):
    """Ensure that all parameter combinations are tested on the same splits (we
    check that average RMSE scores are the same, which should be enough)."""

    small_ml.split(3)

    # all RMSE should be the same (as param combinations are the same)
    param_grid = {'n_epochs': [1, 1], 'lr_all': [.5, .5]}
    with pytest.warns(UserWarning):
        grid_search = GridSearch(SVD, param_grid, measures=['RMSE'], n_jobs=-1)
    grid_search.evaluate(small_ml)

    rmse_scores = [s['RMSE'] for s in grid_search.cv_results['scores']]
    assert len(set(rmse_scores)) == 1  # assert rmse_scores are all equal

    # evaluate grid search again, to make sure that splits are still the same.
    grid_search.evaluate(small_ml)
    rmse_scores += [s['RMSE'] for s in grid_search.cv_results['scores']]
    assert len(set(rmse_scores)) == 1
コード例 #23
0
def test_same_splits():
    """Ensure that all parameter combinations are tested on the same splits (we
    check that average RMSE scores are the same, which should be enough)."""

    data_file = os.path.join(os.path.dirname(__file__), './u1_ml100k_train')
    data = Dataset.load_from_file(data_file, reader=Reader('ml-100k'))
    data.split(3)

    # all RMSE should be the same (as param combinations are the same)
    param_grid = {'n_epochs': [1, 1], 'lr_all': [.5, .5]}
    grid_search = GridSearch(SVD, param_grid, measures=['RMSE'], n_jobs=-1)
    grid_search.evaluate(data)

    rmse_scores = [s['RMSE'] for s in grid_search.cv_results['scores']]
    assert len(set(rmse_scores)) == 1  # assert rmse_scores are all equal

    # evaluate grid search again, to make sure that splits are still the same.
    grid_search.evaluate(data)
    rmse_scores += [s['RMSE'] for s in grid_search.cv_results['scores']]
    assert len(set(rmse_scores)) == 1
コード例 #24
0
df = df[cols]

reader = Reader(rating_scale=(1, 5))
trainset = Dataset.load_from_df(df, reader)
trainset.split(n_folds=10)

# http://surprise.readthedocs.io/en/stable/getting_started.html#tune-algorithm-parameters-with-gridsearch
# best params: MEAN RMSE: 1.2525329258
param_grid = {
    'n_factors': [1],
    'n_epochs': [45],
    'lr_bu': [0.004],
    'lr_bi': [0.008],
    'lr_pu': [0.0015],
    'lr_qi': [0.000025],
    'reg_bu': [0.24],
    'reg_bi': [0.24],
    'reg_pu': [0.055],
    'reg_qi': [0.0085],
    'init_mean': [0],
    'init_std_dev': [0]
}

grid_search = GridSearch(SVD, param_grid, measures=['RMSE'])

grid_search.evaluate(trainset)

print(grid_search.best_score['RMSE'])

print(grid_search.best_params['RMSE'])
コード例 #25
0
reader = Reader(rating_scale=(1, 5))
dataset = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)
dataset.split(n_folds=5)
"""
#Sample Run
algo = pa.KNNBasic(k=10, min_k=5)
perf = evaluate(algo, dataset, measures=['MAE', 'RMSE', 'FCP'])
print_perf(perf)
"""

similarities = ['cosine', 'msd', 'pearson', 'pearson_baseline']
user_based = [True, False]

start_time = ('Timestamp: {:%Y-%b-%d %H:%M:%S}'.format(
    datetime.datetime.now()))
sim_options = {'name': similarities, 'user_based': user_based}
param_grid = {
    'k': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'min_k': [5],
    'sim_options': sim_options
}
grid_search = GridSearch(pa.KNNBasic,
                         param_grid=param_grid,
                         measures=['MAE', 'RMSE', 'FCP'])
grid_search.evaluate(dataset)
results_df = pd.DataFrame.from_dict(grid_search.cv_results)
results_df.to_csv("KNNBasic_Results.csv")
end_time = ('Timestamp: {:%Y-%b-%d %H:%M:%S}'.format(datetime.datetime.now()))
print "Start Time: ", start_time
print "End Time: ", end_time
コード例 #26
0
"""
This module describes how to manually train and test an algorithm without using
the evaluate() function.
"""

from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

from surprise import GridSearch
from surprise import SVD
from surprise import Dataset

param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005],
              'reg_all': [0.4, 0.6]}

grid_search = GridSearch(SVD, param_grid, measures={'RMSE', 'FCP'},
                         verbose=False)

# Prepare Data
data = Dataset.load_builtin('ml-100k')
data.split(n_folds=3)

grid_search.evaluate(data)

# best RMSE score
print(grid_search.best_score['RMSE'])
# >>> 0.96117566386

# combination of parameters that gave the best RMSE score
print(grid_search.best_params['RMSE'])
# >>> {'reg_all': 0.4, 'lr_all': 0.005, 'n_epochs': 10}
コード例 #27
0
print(ratings_explicit.shape)
#
#### split the ratings table into taining and testing dataset

ratings_train, ratings_test = train_test_split(
    ratings_explicit,
    stratify=ratings_explicit['UserID'],
    test_size=0.30,
    random_state=0)
#

#
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_train[['UserID', 'ISBN', 'Rating']],
                            reader)

parameter_grid = {'n_factors': [50, 100, 150, 200, 250, 300]}

grid_search = GridSearch(SVD, parameter_grid, measures=['RMSE', 'MAE'])

grid_search.evaluate(data)

best_parameters = grid_search.best_params
print(best_parameters)

# best RMSE and MAE score
best_result = grid_search.best_score
print(best_result)

# In[ ]:
コード例 #28
0
# shuffle ratings if you want
random.shuffle(raw_ratings)

# A = 90% of the data, B = 10% of the data
threshold = int(.9 * len(raw_ratings))
A_raw_ratings = raw_ratings[:threshold]
B_raw_ratings = raw_ratings[threshold:]

data.raw_ratings = A_raw_ratings  # data is now the set A
data.split(n_folds=3)

# Select your best algo with grid search.
print('Grid Search...')
param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005]}
grid_search = GridSearch(SVD, param_grid, measures={'RMSE'}, verbose=True)
grid_search.evaluate(data)

algo = grid_search.best_estimator['RMSE']

# retrain on the whole set A
trainset = data.build_full_trainset()
algo.train(trainset)

# Compute biased accuracy on A
predictions = algo.test(trainset.build_testset())
print('Biased accuracy on A,', end='   ')
accuracy.rmse(predictions)

# Compute unbiased accuracy on B
testset = data._construct_testset(B_raw_ratings)  # testset is now the set B
コード例 #29
0
    def recommend(self, uids, n_items=10, verbose=False):
        if verbose:
            print('■ ■ ■ {} ■ ■ ■'.format(self.algorithm.__name__))

        data = self.data
        trained_model = os.path.expanduser(self.dump_file_name)

        try:
            _, algo = dump.load(trained_model)
        except FileNotFoundError:
            if verbose:
                print('■ Performing random sampling on the dataset')
            raw_ratings = data.raw_ratings
            np.random.shuffle(raw_ratings)
            threshold = int(self.trainset_size * len(raw_ratings))
            trainset_raw_ratings = raw_ratings[:threshold]
            testset_raw_ratings = raw_ratings[threshold:]

            data.raw_ratings = trainset_raw_ratings

            if any(self.param_grid):
                if self.perf_measure not in ['rmse', 'mae', 'fcp']:
                    raise ValueError('■ Invalid accuracy measurement provided')

                if verbose:
                    print('■ Performing Grid Search')
                data.split(n_folds=self.n_folds)
                grid_search = GridSearch(self.algorithm,
                                         param_grid=self.param_grid,
                                         measures=[self.perf_measure],
                                         verbose=verbose)
                grid_search.evaluate(data)
                algo = grid_search.best_estimator[self.perf_measure]
                if self.sim_options is not None:
                    algo.sim_options = self.sim_options
                if self.bsl_options is not None:
                    algo.bsl_options = self.bsl_options
                if verbose:
                    print('■ Grid Search summary')
                    cv_results = grid_search.cv_results
                    del cv_results['scores']
                    df = pd.DataFrame.from_dict(cv_results)
                    sort_column = self.perf_measure.upper()
                    if df.columns.contains(sort_column):
                        df = df.sort_values([sort_column], ascending=True)
                    pretty_print(df)

                    print('■ Algorithm properties')
                    print_object(algo)
            else:
                algo = self.algorithm()

            algo.verbose = verbose

            if verbose:
                print('■ Training using trainset')
                trainset = data.build_full_trainset()
                algo.train(trainset)

                print('■ Evaluating using testset')
                testset = data.construct_testset(testset_raw_ratings)
                predictions = algo.test(testset)
                accuracy.rmse(predictions)

        if verbose:
            print('■ Using the best estimator on the full dataset')
        data = self.data
        trainset = data.build_full_trainset()
        if self.anti_testset:
            testset = trainset.build_anti_testset()
        else:
            testset = trainset.build_testset()

        start = default_timer()

        algo.train(trainset)
        predictions = algo.test(testset)

        if self.dump_model:
            if verbose:
                print('■ Saving the trained model')
            dump.dump(trained_model, predictions, algo, verbose)

        print('■ Accuracy scores')
        accuracy.mae(predictions)
        accuracy.rmse(predictions)

        self.print_precision_call(predictions, uids, n_items)
        recommendations = self.get_recommendations_for_users(
            uids, predictions, n_items)

        duration = default_timer() - start
        duration = datetime.timedelta(seconds=math.ceil(duration))
        print('■ Time elapsed:', duration)

        if verbose:
            print('■ Recommendations:')
            pretty_print(recommendations)

        return recommendations
コード例 #30
0
axarr[1].plot(best_item_est_oma, marker='o')
axarr[1].plot(best_item_est, marker='<')
axarr[1].plot(best_item_est_svd, marker='>')
f.show()

raise ('ddd')

#%% SVD
param_grid_SVD = {
    'n_factors': [5, 10, 15, 20, 40, 80],
    'n_epochs': [35],
    'lr_all': [0.007, 0.005, 0.003],
    'reg_all': [0.005, 0.01, 0.02, 0.05]
}

grid_search = GridSearch(SVD, param_grid_SVD, measures=['MAE', 'RMSE'])

grid_search.evaluate(data)

# best MAE
print('best params (MAE): ' + str(grid_search.best_params['MAE']))

# combination of parameters that gave the best FCP score
print('best params (RMSE): ' + str(grid_search.best_params['RMSE']))

params = grid_search.best_params['MAE']
algo_SVD = SVD(verbose=True,
               n_factors=params['n_factors'],
               n_epochs=params['n_epochs'],
               lr_all=params['lr_all'],
               reg_all=params['reg_all'])