def experiment_leave_k_out(exp_name, daily_data_file, min_occ_user, min_occ_prog, \ method_list, leave_k_out, total_iteration, top_n, binary = False): ''' Parameters ---------- @param exp_name: the experiment name (prefix) @param daily_datafile: @param min_occ_user: @param method_list: @param leave_k_out: leave k out for each user. The k must be strict less than min_occ_user @param binary: if this is set to true then the binary data is used (non-zero set to 1). Returns ---------- @return out ''' if leave_k_out >= min_occ_user: raise ValueError( 'The k in the leave k out should be strictly less than min_occ_user.' ) # define lko_log style. lko_log = lambda msg: Logger.Log(msg, Logger.MSG_CATEGORY_EXP) # construct exp_id if binary: exp_id = 'lko_bi_' + exp_name + '_data' +str(hash(daily_data_file)) + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \ + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration) else: exp_id = 'lko_' + exp_name + '_data' +str(hash(daily_data_file)) + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \ + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration) lko_log('Experiment ID: ' + exp_id) # load data. lko_log('Read data...') reader = DailyWatchTimeReader() data = reader.read_file_with_minval(daily_data_file, min_occ_user, min_occ_prog) lko_log('Data loaded: ' + str(data)) if binary: lko_log('Binarizing data...') data.binarize() else: # normalize lko_log('Normalizing data...') data.normalize_row() result = {} for method in method_list: # do for each method perf_vect = [] for iteration in range(total_iteration): # do for each iteration for each method. lko_log('Method: ' + method.unique_str() + ' Iteration: ' + str(iteration)) # data split of the current iteration. split_resource_str = 'exp' + exp_id + '_lvidx_iter' + str( iteration) split_dir = exp_id + '/lv_idx' leave_k_out_idx = URM.LoadResource(URM.RTYPE_RESULT, split_resource_str, split_dir) if not leave_k_out_idx: # randomly generate k items from each row/user. leave_k_out_idx = ds.leave_k_out(data, leave_k_out) URM.SaveResource(URM.RTYPE_RESULT, split_resource_str, leave_k_out_idx, split_dir) # split the k items as a separate. [data_left, data_tr] = data.leave_k_out(leave_k_out_idx) iter_result = experiment_unit_leave_k_out(exp_id, method, \ data_tr, data_left, iteration, top_n) perf_vect.append(iter_result) result[method.unique_str()] = perf_vect return result
def experiment_leave_k_out(exp_name, daily_data_file, min_occ_user, min_occ_prog, \ method_list, leave_k_out, total_iteration, top_n, binary = False): ''' Parameters ---------- @param exp_name: the experiment name (prefix) @param daily_datafile: @param min_occ_user: @param method_list: @param leave_k_out: leave k out for each user. The k must be strict less than min_occ_user @param binary: if this is set to true then the binary data is used (non-zero set to 1). Returns ---------- @return out ''' if leave_k_out >= min_occ_user: raise ValueError('The k in the leave k out should be strictly less than min_occ_user.'); # define lko_log style. lko_log = lambda msg: Logger.Log(msg, Logger.MSG_CATEGORY_EXP); # construct exp_id if binary: exp_id = 'lko_bi_' + exp_name + '_data' +str(hash(daily_data_file)) + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \ + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration); else: exp_id = 'lko_' + exp_name + '_data' +str(hash(daily_data_file)) + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \ + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration); lko_log('Experiment ID: ' + exp_id); # load data. lko_log('Read data...'); reader = UtilityDataReader(); data = reader.read_file_with_minval(daily_data_file, min_occ_user, min_occ_prog); lko_log('Data loaded: ' + str(data)); if binary: lko_log('Binarizing data...'); data.binarize(); else: # normalize lko_log('Normalizing data...'); data.normalize_row(); result = {}; for method in method_list: # do for each method perf_vect = []; for iteration in range(total_iteration): # do for each iteration for each method. lko_log('Method: '+ method.unique_str() + ' Iteration: '+ str(iteration)); # data split of the current iteration. split_resource_str = 'exp' + exp_id + '_lvidx_iter' + str(iteration); split_dir = exp_id + '/lv_idx'; leave_k_out_idx = URM.LoadResource(URM.RTYPE_RESULT, split_resource_str, split_dir); if not leave_k_out_idx: # randomly generate k items from each row/user. leave_k_out_idx = ds.leave_k_out(data, leave_k_out); URM.SaveResource(URM.RTYPE_RESULT, split_resource_str, leave_k_out_idx, split_dir); # split the k items as a separate. [data_left, data_tr] = data.leave_k_out(leave_k_out_idx); iter_result = experiment_unit_leave_k_out(exp_id, method, \ data_tr, data_left, iteration, top_n); perf_vect.append(iter_result); result[method.unique_str()] = perf_vect; return result;
lo_data = csr_matrix( (data,indices,indptr), shape=(3, 5)).tocoo(); fb_data = FeedbackData(lo_data.row.tolist(), lo_data.col.tolist(), lo_data.data.tolist(), 3, 5, np.array([]), np.array([]), np.array([])); print 'Original data:' print fb_data.get_sparse_matrix().todense(); # leave_k_out_idx = {}; # leave_k_out_idx [0] = set([4]); # leave_k_out_idx [1] = set([0, 3]); # leave_k_out_idx [2] = set([2]); # generate leave k indices. leave_k_out_idx = ds.leave_k_out(fb_data, 2); print 'leave_k_indices' print leave_k_out_idx; [lo_data, tr_data] = fb_data.leave_k_out(leave_k_out_idx); print 'Leave k out:' print lo_data.get_sparse_matrix().todense(); print 'Remaining:' print tr_data.get_sparse_matrix().todense();
def experiment_leave_k_out_map(exp_name, daily_data_file,\ min_occ_user, min_occ_prog, num_user, num_prog,\ method_list, leave_k_out, total_iteration, max_rank, binary = False): ''' Parameters ---------- @param exp_name: the experiment name (prefix) @param daily_datafile: a list of files. @param min_occ_user: cold start user criteria @param min_occ_prog: cold start user criteria @param num_user: the number of users selected in the experiment. @param num_prog: the number of programs selected in the experiment. @param method_list: @param leave_k_out: leave k out for each user. The k must be strict less than min_occ_user @param binary: if this is set to true then the binary data is used (non-zero set to 1). Returns ---------- @return out ''' print 'Leave k out: k = ', str(leave_k_out); print 'Min_occ_user: '******'Min_occ_prog: ', str(min_occ_prog); if leave_k_out >= min_occ_user: raise ValueError('The k in the leave k out [' + str(leave_k_out) +'] should be strictly less than min_occ_user [' + str(min_occ_user) +'].'); # define lko_log style. lko_log = lambda msg: Logger.Log(msg, Logger.MSG_CATEGORY_EXP); if isinstance(daily_data_file, list): hash_file_str = str(hash(tuple(daily_data_file))); else: hash_file_str = str(hash(daily_data_file)); # construct exp_id if binary: exp_id = 'lko_bi_' + exp_name + '_data' + hash_file_str\ + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \ + '_nu' + str(num_user) + '_np' + str(num_prog) \ + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration); else: exp_id = 'lko_' + exp_name + '_data' + hash_file_str\ + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \ + '_nu' + str(num_user) + '_np' + str(num_prog) \ + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration); lko_log('Experiment ID: ' + exp_id); # load data. lko_log('Read data...'); reader = DailyWatchTimeReader(); data = reader.read_file_with_minval(daily_data_file, min_occ_user, min_occ_prog, num_user, num_prog); lko_log('Data loaded: ' + str(data)); if binary: lko_log('Binarizing data...'); data.binarize(); else: # normalize lko_log('Normalizing data...'); data.normalize_row(); result = {}; for method in method_list: # do for each method perf_vect = []; for iteration in range(total_iteration): # do for each iteration for each method. lko_log('Method: '+ method.unique_str() + ' Iteration: '+ str(iteration)); # data split of the current iteration. split_resource_str = 'exp' + exp_id + '_lvidx_iter' + str(iteration); split_dir = exp_id + '/lv_idx'; leave_k_out_idx = URM.LoadResource(URM.RTYPE_RESULT, split_resource_str, split_dir); if not leave_k_out_idx: # randomly generate k items from each row/user. leave_k_out_idx = ds.leave_k_out(data, leave_k_out); URM.SaveResource(URM.RTYPE_RESULT, split_resource_str, leave_k_out_idx, split_dir); # split the k items as a separate. [data_left, data_tr] = data.leave_k_out(leave_k_out_idx); iter_result = experiment_unit_leave_k_out_map(exp_id, method, \ data_tr, data_left, iteration, max_rank); perf_vect.append(iter_result); result[method.unique_str()] = perf_vect; return result;
indices = np.array([0, 2, 4, 0, 2, 3, 1, 2, 4]) indptr = np.array([0, 3, 6, 9]) lo_data = csr_matrix((data, indices, indptr), shape=(3, 5)).tocoo() fb_data = FeedbackData(lo_data.row.tolist(), lo_data.col.tolist(), lo_data.data.tolist(), 3, 5, np.array([]), np.array([]), np.array([])) print 'Original data:' print fb_data.get_sparse_matrix().todense() # leave_k_out_idx = {}; # leave_k_out_idx [0] = set([4]); # leave_k_out_idx [1] = set([0, 3]); # leave_k_out_idx [2] = set([2]); # generate leave k indices. leave_k_out_idx = ds.leave_k_out(fb_data, 2) print 'leave_k_indices' print leave_k_out_idx [lo_data, tr_data] = fb_data.leave_k_out(leave_k_out_idx) print 'Leave k out:' print lo_data.get_sparse_matrix().todense() print 'Remaining:' print tr_data.get_sparse_matrix().todense()