if not len(sys.argv) == 1:
     leave_k_out = int(sys.argv[1]);
 print 'Use default leave k out: k=' + str(leave_k_out);
 
 if not len(sys.argv) <= 2:
     lafactor = int(sys.argv[2]);
 print 'Use default latent factor: ' + str(lafactor); 
 
 max_rank = 2000;
 
 # number of repetitions. 
 total_iteration = 2;
 
 # recommendation algorithms %RandUV(latent_factor=lafactor),
 method_list = [ LMaFit(latent_factor=lafactor),  item_item_sim(N = lafactor), \
                 HierLat(latent_factor=lafactor), NMF(latent_factor=lafactor),
                 PMF(latent_factor=lafactor),     TriUHV(latent_factor=lafactor)  ];
 
 # main method. 
 result = experiment_leave_k_out_map(exp_name, daily_data_file, \
             min_occ_user, min_occ_prog, num_user, num_prog,\
             method_list,  leave_k_out, total_iteration, max_rank, binary = True);
 
 matlab_output = {};
 for method_name, method_iter_perf in result.items():
     print 'Method: '+ method_name;
     rmse = sum( x['RMSE']   for x in method_iter_perf)/len(method_iter_perf);
     print  '>>Average RMSE      : %.5f' % rmse;
     
     perf_recall = np.zeros(len(method_iter_perf[0]['recall']));
 
 if not len(sys.argv) == 1:
     leave_k_out = int(sys.argv[1]);
 print 'Use default leave k out: k=' + str(leave_k_out);
 
 if not len(sys.argv) <= 2:
     lafactor = int(sys.argv[2]);
 print 'Use default latent factor: ' + str(lafactor); 
 
 max_rank = 2000;
 
 # number of repetitions. 
 total_iteration = 2;
 
 # recommendation algorithms RandUV(latent_factor=lafactor),
 method_list = [ LMaFit(latent_factor=lafactor),  item_item_sim(N = lafactor),\
                 HierLat(latent_factor=lafactor), NMF(latent_factor=lafactor),
                 PMF(latent_factor=lafactor),     TriUHV(latent_factor=lafactor)  ];
 
 # main method. 
 result = experiment_leave_k_out_map(exp_name, daily_data_file, \
             min_occ_user, min_occ_prog, num_user, num_prog,\
             method_list,  leave_k_out, total_iteration, max_rank, binary = False);
 
 matlab_output = {};
 for method_name, method_iter_perf in result.items():
     print 'Method: '+ method_name;
     rmse = sum( x['RMSE']   for x in method_iter_perf)/len(method_iter_perf);
     print  '>>Average RMSE      : %.5f' % rmse;
     
     perf_recall    = np.zeros(len(method_iter_perf[0]['recall']));
Esempio n. 3
0
        raise ValueError('Cannot find data file. ')

    exp_name = 'bin_exp_mid_prec_rec'
    # something meaningful.

    leave_k_out = 10
    # perform leave k out.

    # number of repetitions.
    total_iteration = 3

    # recommendation algorithms
    method_list = [ LMaFit(latent_factor=lafactor), RandUV(latent_factor=lafactor), \
                    HierLat(latent_factor=lafactor), NMF(latent_factor=lafactor),
                    PMF(latent_factor=lafactor),     TriUHV(latent_factor=lafactor),
                    item_item_sim(N = lafactor) ]

    #method_list = [ item_item_sim(N = lafactor) ];

    #method_list = [ PMF(latent_factor=lafactor),     TriUHV(latent_factor=lafactor)  ];

    # main method.
    result = experiment_leave_k_out(exp_name, daily_data_file, min_occ_user, min_occ_prog, \
                method_list,  leave_k_out, total_iteration, top_n, True)

    # display results (average RMSE).
    for method_name, method_iter_perf in result.items():
        print 'Method: ' + method_name
        print '>>Average precision : %.5f' % (sum(x['prec']
                                                  for x in method_iter_perf) /
                                              len(method_iter_perf))
 print 'processing file', daily_data_file;
 if not os.path.isfile(daily_data_file):
     raise ValueError('Cannot find data file. ');
 
 exp_name = 'bin_exp_mid_prec_rec'; # something meaningful. 
 
 leave_k_out = 10; # perform leave k out. 
 
 # number of repetitions. 
 total_iteration = 3;
 
 # recommendation algorithms 
 method_list = [ LMaFit(latent_factor=lafactor), RandUV(latent_factor=lafactor), \
                 HierLat(latent_factor=lafactor), NMF(latent_factor=lafactor),
                 PMF(latent_factor=lafactor),     TriUHV(latent_factor=lafactor),
                 item_item_sim(N = lafactor) ];
                 
 #method_list = [ item_item_sim(N = lafactor) ];
                
 #method_list = [ PMF(latent_factor=lafactor),     TriUHV(latent_factor=lafactor)  ];
 
 # main method. 
 result = experiment_leave_k_out(exp_name, daily_data_file, min_occ_user, min_occ_prog, \
             method_list,  leave_k_out, total_iteration, top_n, True);
 
 # display results (average RMSE). 
 for method_name, method_iter_perf in result.items():
     print 'Method: '+ method_name;
     print  '>>Average precision : %.5f' % (sum( x['prec']   for x in method_iter_perf)/len(method_iter_perf));
     print  '>>Average recall    : %.5f' % (sum( x['recall'] for x in method_iter_perf)/len(method_iter_perf));
     print  '>>Average rmse      : %.5f' % (sum( x['rmse']   for x in method_iter_perf)/len(method_iter_perf));
 
 exp_name = 'test_exp_mid_prec_rec'; # something meaningful. 
 
 # filtering criteria
 min_occ_user = 2; # should be more than leave_k_out. 
 min_occ_prog = 2;
 
 top_n = 50; # performance computed on top N; 
 
 leave_k_out = 1; # perform leave k out. 
 
 # number of repetitions. 
 total_iteration = 3;
 
 # recommendation algorithms 
 method_list = [ LMaFit(latent_factor=lafactor), RandUV(latent_factor=lafactor), \
                NMF(latent_factor=lafactor), PMF(latent_factor=lafactor), item_item_sim(N = lafactor) ];
 
 # main method. 
 result = experiment_leave_k_out(exp_name, daily_data_file, min_occ_user, min_occ_prog, \
             method_list,  leave_k_out, total_iteration, top_n);
 
 # display results (average RMSE). 
 for method_name, method_iter_perf in result.items():
     print 'Method: '+ method_name;
     print  '>>Average precision : %.5f' % (sum( x['prec']   for x in method_iter_perf)/len(method_iter_perf));
     print  '>>Average recall    : %.5f' % (sum( x['recall'] for x in method_iter_perf)/len(method_iter_perf));
     print  '>>Average rmse      : %.5f' % (sum( x['rmse']   for x in method_iter_perf)/len(method_iter_perf));
     #print method_iter_perf;
 
 #print result;
'''
This is a testing pipeline for KDD_2014 algorithm.
 
Created on Feb 17, 2014

@author: Shiyu C. ([email protected])
'''

from rs.data.daily_watchtime import DailyWatchTimeReader
from rs.algorithms.recommendation.item_item_sim import item_item_sim

if __name__ == '__main__':
    filename = "../../../datasample/agg_duid_pid_watchtime_genre/20131209_100000";
    
    # load data. 
    reader = DailyWatchTimeReader();  
    feedback_data = reader.read_file_with_minval(filename, 1, 1);
    feedback_data.normalize_row();
    
    N = 3;
     
    item_item_sim_model = item_item_sim(N); 
    item_item_sim_model.train(feedback_data);
   
    # test. 
    loc_row = [200,   4, 105];
    loc_col = [ 10,  22,   4];
    print 'Prediction:'
    print item_item_sim_model.predict(loc_row, loc_col);
    
Esempio n. 7
0
'''
This is a testing pipeline for KDD_2014 algorithm.
 
Created on Feb 17, 2014

@author: Shiyu C. ([email protected])
'''

from rs.data.daily_watchtime import DailyWatchTimeReader
from rs.algorithms.recommendation.item_item_sim import item_item_sim

if __name__ == '__main__':
    filename = "../../../datasample/agg_duid_pid_watchtime_genre/20131209_100000"

    # load data.
    reader = DailyWatchTimeReader()
    feedback_data = reader.read_file_with_minval(filename, 1, 1)
    feedback_data.normalize_row()

    N = 3

    item_item_sim_model = item_item_sim(N)
    item_item_sim_model.train(feedback_data)

    # test.
    loc_row = [200, 4, 105]
    loc_col = [10, 22, 4]
    print 'Prediction:'
    print item_item_sim_model.predict(loc_row, loc_col)