if not len(sys.argv) == 1:
        leave_k_out = int(sys.argv[1])
    print 'Use default leave k out: k=' + str(leave_k_out)

    if not len(sys.argv) <= 2:
        lafactor = int(sys.argv[2])
    print 'Use default latent factor: ' + str(lafactor)

    max_rank = 2000

    # number of repetitions.
    total_iteration = 2

    # recommendation algorithms RandUV(latent_factor=lafactor),
    method_list = [ HierLat(latent_factor=lafactor, cold_start = HierLat.CS_EQUAL_PROB),  \
                    LMaFit(latent_factor=lafactor),  NMF(latent_factor=lafactor),
                    PMF(latent_factor=lafactor),     TriUHV(latent_factor=lafactor)  ]

    # main method.
    result = experiment_coldstart_map(exp_name, daily_data_file, \
                min_occ_user, min_occ_prog, num_user, num_prog,\
                method_list,  leave_k_out, total_iteration, max_rank, binary = False)

    matlab_output = {}
    for method_name, method_iter_perf in result.items():
        print 'Method: ' + method_name
        rmse = sum(x['RMSE'] for x in method_iter_perf) / len(method_iter_perf)
        print '>>Average RMSE      : %.5f' % rmse

        perf_recall = np.zeros(len(method_iter_perf[0]['recall']))
 exp_name = 'test_exp_mid_prec_rec'; # something meaningful. 
 
 # filtering criteria
 min_occ_user = 35;
 min_occ_prog = 300;
 
 top_n = 50; # performance computed on top N; 
 
 leave_k_out = 10; # perform leave k out. 
 
 # number of repetitions. 
 total_iteration = 3;
 
 # recommendation algorithms 
 method_list = [ LMaFit(latent_factor=lafactor), RandUV(latent_factor=lafactor), \
                HierLat(latent_factor=lafactor), NMF(latent_factor=lafactor),
                PMF(latent_factor=lafactor),     TriUHV(latent_factor=lafactor)  ];
 
 # main method. 
 result = experiment_leave_k_out(exp_name, daily_data_file, min_occ_user, min_occ_prog, \
             method_list,  leave_k_out, total_iteration, top_n);
 
 # display results (average RMSE). 
 for method_name, method_iter_perf in result.items():
     print 'Method: '+ method_name;
     print  '>>Average precision : %.5f' % (sum( x['prec']   for x in method_iter_perf)/len(method_iter_perf));
     print  '>>Average recall    : %.5f' % (sum( x['recall'] for x in method_iter_perf)/len(method_iter_perf));
     print  '>>Average rmse      : %.5f' % (sum( x['rmse']   for x in method_iter_perf)/len(method_iter_perf));
     #print method_iter_perf;
 
 #print result;
    #feedback_data = reader.read_file_with_minval(filename, 25, 300);
    feedback_data = reader.read_file_with_minval(filename, 35, 300);
    
    print feedback_data;
    
    print 'Maximum Genre.'
    print np.max(feedback_data.meta['pggr_gr']) + 1;
    
    print 'Normalizing data.'
    feedback_data.normalize_row();
    
    # build model with 3 latent factors.
    r = 5;
    # the L_2 norm regularizer 
    lamb = 0.001; 
    # the stopping delta value 
    delta = 0.01;
    # the maximum iteration number
    maxiter = 500;
     
    HierLat_model = HierLat(r,lamb,delta,maxiter, verbose = True); 
    #HierLat_model.train(feedback_data, simplex_projection = False);
    HierLat_model.train(feedback_data, simplex_projection = True);
'''    
    # test. 
    loc_row = [200,   4, 105];
    loc_col = [ 10,  22,   4];
    print 'Prediction:'
    print HierLat_model.predict(loc_row, loc_col);
'''    
    
    # filtering criteria.
    min_occ_user = 50
    min_occ_prog = 1000

    num_user = 10000
    num_prog = 3000

    total_iteration = 2

    iteration = 1
    # iteration out of total_iteration.

    leave_k_out = 20
    lafactor = 5

    method = HierLat(latent_factor=lafactor)
    hash_file_str = str(hash(tuple(daily_data_file)))

    reader = DailyWatchTimeReader()
    feedback_data = reader.read_file_with_minval(daily_data_file, min_occ_user,
                                                 min_occ_prog, num_user,
                                                 num_prog)

    exp_id = 'lko_bi_' + exp_name + '_data' + hash_file_str\
                      + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_nu' + str(num_user) + '_np' + str(num_prog) \
                      + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration)

    result_resource_str = 'exp'      + exp_id + \
                          '_method'  + method.unique_str() + \
                          '_iter'    + str(iteration)
 
 # filtering criteria.
 min_occ_user = 50;
 min_occ_prog = 1000;
 
 num_user = 10000;
 num_prog = 3000;
 
 total_iteration = 2;
 
 iteration = 1; # iteration out of total_iteration. 
 
 leave_k_out = 20;
 lafactor = 5;
 
 method = HierLat(latent_factor=lafactor);
 hash_file_str = str(hash(tuple(daily_data_file)));
 
 reader = DailyWatchTimeReader();
 feedback_data = reader.read_file_with_minval(daily_data_file, min_occ_user, min_occ_prog, num_user, num_prog);
     
 exp_id = 'lko_bi_' + exp_name + '_data' + hash_file_str\
                   + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                   + '_nu' + str(num_user) + '_np' + str(num_prog) \
                   + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration);
 
 result_resource_str = 'exp'      + exp_id + \
                       '_method'  + method.unique_str() + \
                       '_iter'    + str(iteration);
 sub_folder = exp_id + '/models/' + method.unique_str(); # use a sub folder to store the experiment resource.
                       
Example #6
0
 # filtering criteria
 min_occ_user = 4;
 min_occ_prog = 1;
 
 top_n = 15; # performance computed on top N; 
 
 leave_k_out = 1; # perform leave k out. 
 
 # number of repetitions. 
 total_iteration = 3;
 
 # latent factor
 lf = 5;
 
 # recommendation algorithms 
 method_list = [ LMaFit(latent_factor = 5), RandUV(latent_factor = 5), \
                HierLat(latent_factor = 5) , NMF(latent_factor = 5) ];
 
 # main method. 
 result = experiment_leave_k_out(exp_name, daily_data_file, min_occ_user, min_occ_prog, \
             method_list,  leave_k_out, total_iteration, top_n);
 
 # display results (average RMSE). 
 for method_name, method_iter_perf in result.items():
     print 'Method: '+ method_name;
     print  '>>Average precision : %.5f' % (sum( x['prec']   for x in method_iter_perf)/len(method_iter_perf));
     print  '>>Average recall    : %.5f' % (sum( x['recall'] for x in method_iter_perf)/len(method_iter_perf));
     print  '>>Average rmse      : %.5f' % (sum( x['rmse']   for x in method_iter_perf)/len(method_iter_perf));
     #print method_iter_perf;
 
 #print result;
Example #7
0
    # filtering criteria
    min_occ_user = 35
    min_occ_prog = 300

    # specify the percentage of training and (1 - training_prec) is testing.
    training_prec = 0.5

    # number of repetitions.
    total_iteration = 5

    # latent factor
    lf = 10

    # recommendation algorithms
    method_list = [
        LMaFit(latent_factor=lf),
        RandUV(latent_factor=lf),
        HierLat(latent_factor=lf)
    ]

    # main method.
    result = experiment_rand_split(exp_name, daily_data_file, min_occ_user, min_occ_prog, \
                method_list,  training_prec, total_iteration)

    # display results (average RMSE).
    for method_name, method_iter_perf in result.items():
        print 'Method: ' + method_name
        print '>>Average performance RMSE: %.5f' % (
            sum(x for x in method_iter_perf) / len(method_iter_perf))

    #print result;