Exemple #1
0
def experiment_leave_k_out(exp_name, daily_data_file, min_occ_user, min_occ_prog, \
                           method_list, leave_k_out, total_iteration, top_n, binary = False):
    '''
    
    Parameters
    ----------
    @param exp_name: the experiment name (prefix) 
    @param daily_datafile:
    @param min_occ_user:
    
    @param method_list:
    @param leave_k_out: leave k out for each user. The k must be strict less than
         min_occ_user
    
    @param binary: if this is set to true then the binary data is used (non-zero set to 1). 
    
    Returns
    ----------
    @return out 
    '''

    if leave_k_out >= min_occ_user:
        raise ValueError(
            'The k in the leave k out should be strictly less than min_occ_user.'
        )

    # define lko_log style.
    lko_log = lambda msg: Logger.Log(msg, Logger.MSG_CATEGORY_EXP)

    # construct exp_id
    if binary:
        exp_id = 'lko_bi_' + exp_name + '_data' +str(hash(daily_data_file)) + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration)
    else:
        exp_id = 'lko_' + exp_name + '_data' +str(hash(daily_data_file)) + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration)
    lko_log('Experiment ID: ' + exp_id)

    # load data.
    lko_log('Read data...')
    reader = DailyWatchTimeReader()
    data = reader.read_file_with_minval(daily_data_file, min_occ_user,
                                        min_occ_prog)
    lko_log('Data loaded: ' + str(data))

    if binary:
        lko_log('Binarizing data...')
        data.binarize()
    else:
        # normalize
        lko_log('Normalizing data...')
        data.normalize_row()

    result = {}

    for method in method_list:
        # do for each method

        perf_vect = []
        for iteration in range(total_iteration):
            # do for each iteration for each method.

            lko_log('Method: ' + method.unique_str() + ' Iteration: ' +
                    str(iteration))

            # data split of the current iteration.
            split_resource_str = 'exp' + exp_id + '_lvidx_iter' + str(
                iteration)
            split_dir = exp_id + '/lv_idx'
            leave_k_out_idx = URM.LoadResource(URM.RTYPE_RESULT,
                                               split_resource_str, split_dir)
            if not leave_k_out_idx:
                # randomly generate k items from each row/user.
                leave_k_out_idx = ds.leave_k_out(data, leave_k_out)
                URM.SaveResource(URM.RTYPE_RESULT, split_resource_str,
                                 leave_k_out_idx, split_dir)

            # split the k items as a separate.
            [data_left, data_tr] = data.leave_k_out(leave_k_out_idx)

            iter_result = experiment_unit_leave_k_out(exp_id, method, \
                                    data_tr, data_left, iteration, top_n)

            perf_vect.append(iter_result)

        result[method.unique_str()] = perf_vect

    return result
def experiment_leave_k_out(exp_name, daily_data_file, min_occ_user, min_occ_prog, \
                           method_list, leave_k_out, total_iteration, top_n, binary = False):
    '''
    
    Parameters
    ----------
    @param exp_name: the experiment name (prefix) 
    @param daily_datafile:
    @param min_occ_user:
    
    @param method_list:
    @param leave_k_out: leave k out for each user. The k must be strict less than
         min_occ_user
    
    @param binary: if this is set to true then the binary data is used (non-zero set to 1). 
    
    Returns
    ----------
    @return out 
    '''
    
    if leave_k_out >= min_occ_user:
        raise ValueError('The k in the leave k out should be strictly less than min_occ_user.'); 
    
    # define lko_log style. 
    lko_log = lambda msg: Logger.Log(msg, Logger.MSG_CATEGORY_EXP);
    
    # construct exp_id
    if binary:
        exp_id = 'lko_bi_' + exp_name + '_data' +str(hash(daily_data_file)) + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration);
    else:
        exp_id = 'lko_' + exp_name + '_data' +str(hash(daily_data_file)) + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration);
    lko_log('Experiment ID: ' + exp_id);
    
    # load data. 
    lko_log('Read data...');
    reader = UtilityDataReader();
    data = reader.read_file_with_minval(daily_data_file, min_occ_user, min_occ_prog);
    lko_log('Data loaded: ' + str(data));
    
    if binary:
        lko_log('Binarizing data...');
        data.binarize();
    else:
        # normalize 
        lko_log('Normalizing data...');
        data.normalize_row();
    
    result = {};
    
    for method in method_list:
        # do for each method
    
        perf_vect = [];
        for iteration in range(total_iteration):
            # do for each iteration for each method. 
    
            lko_log('Method: '+ method.unique_str() + ' Iteration: '+ str(iteration));
    
            # data split of the current iteration. 
            split_resource_str = 'exp' + exp_id + '_lvidx_iter' + str(iteration); 
            split_dir = exp_id + '/lv_idx';
            leave_k_out_idx = URM.LoadResource(URM.RTYPE_RESULT, split_resource_str, split_dir);
            if not leave_k_out_idx:
                # randomly generate k items from each row/user.   
                leave_k_out_idx = ds.leave_k_out(data, leave_k_out);
                URM.SaveResource(URM.RTYPE_RESULT, split_resource_str, leave_k_out_idx, split_dir);
            
            # split the k items as a separate. 
            [data_left, data_tr] = data.leave_k_out(leave_k_out_idx); 
            
            iter_result = experiment_unit_leave_k_out(exp_id, method, \
                                    data_tr, data_left, iteration, top_n);
            
            perf_vect.append(iter_result);
    
        result[method.unique_str()] = perf_vect;
    
    return result;
    
    lo_data   = csr_matrix( (data,indices,indptr), shape=(3, 5)).tocoo();
    
    fb_data = FeedbackData(lo_data.row.tolist(), lo_data.col.tolist(), lo_data.data.tolist(), 3, 5,
                           np.array([]), np.array([]), np.array([]));
    
    print 'Original data:'
    print fb_data.get_sparse_matrix().todense();

#     leave_k_out_idx = {};
#     leave_k_out_idx [0] = set([4]);
#     leave_k_out_idx [1] = set([0, 3]);
#     leave_k_out_idx [2] = set([2]);
    
    # generate leave k indices. 
    leave_k_out_idx = ds.leave_k_out(fb_data, 2);
    
    print 'leave_k_indices'
    print leave_k_out_idx;
    
    [lo_data, tr_data] = fb_data.leave_k_out(leave_k_out_idx);
    
    print 'Leave k out:'
    print lo_data.get_sparse_matrix().todense();
    
    print 'Remaining:'
    print tr_data.get_sparse_matrix().todense();
    
    
    
    
Exemple #4
0
def experiment_leave_k_out_map(exp_name,     daily_data_file,\
                    min_occ_user, min_occ_prog, num_user, num_prog,\
                    method_list, leave_k_out, total_iteration, max_rank, binary = False):
    '''
    
    Parameters
    ----------
    @param exp_name:       the experiment name (prefix) 
    @param daily_datafile: a list of files. 
    @param min_occ_user:   cold start user criteria
    @param min_occ_prog:   cold start user criteria
    @param num_user:       the number of users selected in the experiment. 
    @param num_prog:       the number of programs selected in the experiment. 
    @param method_list:
    @param leave_k_out: leave k out for each user. The k must be strict less than
         min_occ_user
    
    @param binary: if this is set to true then the binary data is used (non-zero set to 1). 
    
    Returns
    ----------
    @return out 
    '''
    
    print 'Leave k out: k = ', str(leave_k_out);
    print 'Min_occ_user: '******'Min_occ_prog: ',    str(min_occ_prog);
    
    if leave_k_out >= min_occ_user:
        raise ValueError('The k in the leave k out [' + str(leave_k_out) 
                         +'] should be strictly less than min_occ_user [' + str(min_occ_user) +'].'); 
    
    # define lko_log style. 
    lko_log = lambda msg: Logger.Log(msg, Logger.MSG_CATEGORY_EXP);
    
    
    if isinstance(daily_data_file, list):    
        hash_file_str = str(hash(tuple(daily_data_file)));
    else:
        hash_file_str = str(hash(daily_data_file));
    
    # construct exp_id
    if binary:
        exp_id = 'lko_bi_' + exp_name + '_data' + hash_file_str\
                      + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_nu' + str(num_user) + '_np' + str(num_prog) \
                      + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration);
    else:
        exp_id = 'lko_'    + exp_name + '_data' + hash_file_str\
                      + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_nu' + str(num_user) + '_np' + str(num_prog) \
                      + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration);
    lko_log('Experiment ID: ' + exp_id);
    
    # load data. 
    lko_log('Read data...');
    reader = DailyWatchTimeReader();
    data = reader.read_file_with_minval(daily_data_file, min_occ_user, min_occ_prog, num_user, num_prog);
    lko_log('Data loaded: ' + str(data));
    
    if binary:
        lko_log('Binarizing data...');
        data.binarize();
    else:
        # normalize 
        lko_log('Normalizing data...');
        data.normalize_row();
    
    result = {};
    
    for method in method_list:
        # do for each method
    
        perf_vect = [];
        for iteration in range(total_iteration):
            # do for each iteration for each method. 
    
            lko_log('Method: '+ method.unique_str() + ' Iteration: '+ str(iteration));
    
            # data split of the current iteration. 
            split_resource_str = 'exp' + exp_id + '_lvidx_iter' + str(iteration); 
            split_dir = exp_id + '/lv_idx';
            leave_k_out_idx = URM.LoadResource(URM.RTYPE_RESULT, split_resource_str, split_dir);
            if not leave_k_out_idx:
                # randomly generate k items from each row/user.   
                leave_k_out_idx = ds.leave_k_out(data, leave_k_out);
                URM.SaveResource(URM.RTYPE_RESULT, split_resource_str, leave_k_out_idx, split_dir);
            
            # split the k items as a separate. 
            [data_left, data_tr] = data.leave_k_out(leave_k_out_idx); 
            
            iter_result = experiment_unit_leave_k_out_map(exp_id, method, \
                                    data_tr, data_left, iteration, max_rank);
            
            perf_vect.append(iter_result);
    
        result[method.unique_str()] = perf_vect;
    
    return result;
Exemple #5
0
    indices = np.array([0, 2, 4, 0, 2, 3, 1, 2, 4])
    indptr = np.array([0, 3, 6, 9])

    lo_data = csr_matrix((data, indices, indptr), shape=(3, 5)).tocoo()

    fb_data = FeedbackData(lo_data.row.tolist(), lo_data.col.tolist(),
                           lo_data.data.tolist(), 3, 5, np.array([]),
                           np.array([]), np.array([]))

    print 'Original data:'
    print fb_data.get_sparse_matrix().todense()

    #     leave_k_out_idx = {};
    #     leave_k_out_idx [0] = set([4]);
    #     leave_k_out_idx [1] = set([0, 3]);
    #     leave_k_out_idx [2] = set([2]);

    # generate leave k indices.
    leave_k_out_idx = ds.leave_k_out(fb_data, 2)

    print 'leave_k_indices'
    print leave_k_out_idx

    [lo_data, tr_data] = fb_data.leave_k_out(leave_k_out_idx)

    print 'Leave k out:'
    print lo_data.get_sparse_matrix().todense()

    print 'Remaining:'
    print tr_data.get_sparse_matrix().todense()