Ejemplo n.º 1
0
def knn_z(data, training, testing):
    '''
    Tune KNN with Z-score parameters then calculates RMSE, coverage and running time of KNN with Z-score

    Args:
        data(Dataset): the whole dataset divided into 5 folds
        training(Dataset): training dataset
        testing(Dataset): test dataset

    Returns:
        rmse: RMSE of KNN with Z-score with optimized parameters
        top_n: number of unique predictions for top n items
    '''

    # candidate parameters
    knn_param_grid = {'k': [5, 10, 20], 'sim_options': {'name': ['msd', 'cosine', 'pearson'],
                                                        'min_support': [1, 5],'user_based': [False]}}

    # optimize parameters
    knnz_grid_search = GridSearch(KNNWithZScore, knn_param_grid, measures=['RMSE'], verbose=False)
    knnz_grid_search.evaluate(data)
    param = knnz_grid_search.best_params['RMSE']
    print('KNNWithZScore:', param)

    # fit model using the optimized parameters
    knnz = KNNWithZScore(k = param['k'], name=param['sim_options']['name'],
                         min_support=param['sim_options']['min_support'], user_based=param['sim_options']['user_based'])
    knnz.train(training)

    # evaluate the model using test data
    predictions = knnz.test(testing)
    rmse = accuracy.rmse(predictions, verbose=True)
    top_n = get_top_n(predictions, n=5)

    return rmse, top_n
Ejemplo n.º 2
0
def knnz_running_time(data):
    '''
        Calculates the running times for training and predictions for KNN with Z-score

        Args:
            data(Dataset): a list of datasets with different numbers of users

        Returns:
            elapsedtime_KnnZtrain: running time for training
            elapsedtime_KnnZtest: running time for predictions on testset
    '''
    elapsedtime_KnnZtrain = []
    elapsedtime_KnnZtest = []

    # tune the parameters on the entire data
    param_grid = {
        'k': [5, 10, 20],
        'sim_options': {
            'name': ['msd', 'cosine', 'pearson'],
            'min_support': [1, 5],
            'user_based': [False]
        }
    }
    grid_search = GridSearch(KNNWithZScore,
                             param_grid,
                             measures=['RMSE'],
                             verbose=False)
    grid_search.evaluate(data[3])
    param = grid_search.best_params['RMSE']
    k = param['k']
    sim = param['sim_options']['name']
    min_support = param['sim_options']['min_support']
    user_based = param['sim_options']['user_based']

    # using the tuned parameters calculate running times
    for i in range(len(data)):
        # training running time
        training_start = time.time()
        training = data[i].build_full_trainset()
        testing = training.build_anti_testset()
        knnz = KNNWithZScore(k=k,
                             name=sim,
                             min_support=min_support,
                             user_based=user_based)
        knnz.train(training)
        elapsedtime_KnnZtrain.append(time.time() - training_start)

        # prediction running time
        test_start = time.time()
        knnz.test(testing)
        elapsedtime_KnnZtest.append(time.time() - test_start)
    return elapsedtime_KnnZtrain, elapsedtime_KnnZtest
Ejemplo n.º 3
0
count = 1

for o in name:
    for p in user_based:
        for q in shrinkage:
            for n1 in k:
                for n2 in min_k:
                    print("================================================")
                    sim_options = {'name': o, 'user_based': p, 'shrinkage': q}

                    algo = KNNWithZScore(k=n1,
                                         min_k=n2,
                                         sim_options=sim_options)

                    algo.train(trainset)

                    print("This is the #" + str(count) +
                          " parameter combination")

                    predictions = algo.test(testset)

                    print("name=" + str(o) + ", user_based=" + str(p) +
                          ", shrinkage=" + str(q) + ", k=" + str(n1) +
                          ", min_k=" + str(n2))

                    accuracy.rmse(predictions, verbose=True)
                    accuracy.fcp(predictions, verbose=True)
                    accuracy.mae(predictions, verbose=True)
                    count = count + 1