예제 #1
0
def test_user_knn():

    ubknn = pyreclab.UserKnn(dataset='dataset/u1.base',
                             dlmchar=b'\t',
                             header=False,
                             usercol=0,
                             itemcol=1,
                             ratingcol=2)

    ubknn.train(k=100, similarity='pearson')

    pred = ubknn.predict('457', '443')
    assert abs(pred - expected_prediction) < prediction_epsilon

    ranking = ubknn.recommend('457', 5, includeRated=False)
    assert ranking == expected_ranking

    predlist, mae, rmse = ubknn.test(input_file='dataset/u1.test',
                                     dlmchar=b'\t',
                                     header=False,
                                     usercol=0,
                                     itemcol=1,
                                     ratingcol=2,
                                     output_file='predictions.csv')

    assert abs(mae - expected_mae) < mae_epsilon
    assert abs(rmse - expected_rmse) < rmse_epsilon
예제 #2
0
def main(k=100):
    ubknn = pyreclab.UserKnn( dataset = 'dataset/u1.base',
                             dlmchar = b'\t',
                             header = False,
                             usercol = 0,
                             itemcol = 1,
                             ratingcol = 2 )

    print( '-> training model' )
    start = time.clock()
    ubknn.train(k=k, similarity = 'pearson' )
    end = time.clock()
    print( 'training time: ' + str( end - start ) )

    print( '-> individual test' )
    pred = ubknn.predict( '457', '443' )
    print( 'user 457, item 443, prediction ' + str( pred ) )

    ranking = ubknn.recommend( '457', 5, includeRated = False )
    print( 'recommendation for user 457: ' + str( ranking ) )

    print( '-> prediction test' )
    start = time.clock()
    predlist, mae, rmse = ubknn.test( input_file = 'dataset/u1.test',
                                     dlmchar = b'\t',
                                     header = False,
                                     usercol = 0,
                                     itemcol = 1,
                                     ratingcol = 2,
                                     output_file = 'predictions.csv' )
    end = time.clock()
    print( 'prediction time: ' + str( end - start ) )

    print( 'MAE: ' + str( mae ) )
    print( 'RMSE: ' + str( rmse ) )

    print( '-> recommendation test' )
    start = time.clock()
    recommendList = ubknn.testrec( input_file = 'dataset/u1.test',
                                  dlmchar = b'\t',
                                  header = False,
                                  usercol = 0,
                                  itemcol = 1,
                                  ratingcol = 2,
                                  topn = 10,
                                  output_file = 'ranking.json',
                                  includeRated = False )
    end = time.clock()
    print( 'recommendation time: ' + str( end - start ) )

    return mae, rmse
prediction_filename = data_url+'predictionsUserKnn' #5.csv'
ordenada_filename = data_url+'ordenadasUserKnn' #5.csv'

print 'UserKnn'
for i in range(1, data_chunks + 1):
    f_t = training_filename + str(i) + ".txt"
    f_p = probe_filename + str(i) + ".txt"
    f_pred = prediction_filename + str(i) + ".csv"
    f_ord = ordenada_filename + str(i) + ".csv"

    print "Corriendo experimento ", i, "..."
    print 'Entrenando...'
    obj = pyreclab.UserKnn( dataset = f_t,
                               dlmchar = b'\t',
                               header = True,
                               usercol = 0,
                               itemcol = 1,
                               ratingcol = 2 )
    obj.train(10, 'pearson')
    print 'Prediciendo...'
    #prediction = obj.predict( "630685", "1")
    #ranking = obj.recommend( "630685", 10, True)
    #print prediction
    #print ranking

    predictionList, mae, rmse = obj.test( input_file = f_p,
                                              dlmchar = b'\t',
                                              header = False,
                                              usercol = 0,
                                              itemcol = 1,
                                              ratingcol = 2,
예제 #4
0
import time
import pyreclab

if __name__ == '__main__':

    ubknn = pyreclab.UserKnn(dataset='dataset/u1.base',
                             dlmchar=b'\t',
                             header=False,
                             usercol=0,
                             itemcol=1,
                             ratingcol=2)

    print('-> training model')
    start = time.clock()
    ubknn.train(k=100, similarity='pearson')
    end = time.clock()
    print('training time: ' + str(end - start))

    print('-> individual test')
    pred = ubknn.predict('457', '443')
    print('user 457, item 443, prediction ' + str(pred))

    ranking = ubknn.recommend('457', 5, includeRated=False)
    print('recommendation for user 457: ' + str(ranking))

    print('-> prediction test')
    start = time.clock()
    predlist, mae, rmse = ubknn.test(input_file='dataset/u1.test',
                                     dlmchar=b'\t',
                                     header=False,
                                     usercol=0,
예제 #5
0
파일: KNN.py 프로젝트: clee7/KNN
                        help='input qual data')
    parser.add_argument('--output',
                        '-o',
                        required=True,
                        type=str,
                        help='output file path')
    parser.add_argument('--code_test',
                        default=0,
                        type=int,
                        help='test code using UserAvg')
    args = parser.parse_args()

    if (args.code_test == 0):
        obj = pyreclab.UserKnn(dataset=args.train,
                               dlmchar=b' ',
                               header=False,
                               usercol=0,
                               itemcol=1,
                               ratingcol=3)
    else:
        obj = pyreclab.UserAvg(dataset=args.train,
                               dlmchar=b' ',
                               header=False,
                               usercol=0,
                               itemcol=1,
                               ratingcol=3)

    # Train
    print("Training starting")
    knn = 100
    similarity = 'pearson'