Ejemplo n.º 1
0
 def subsample_row(self, sel_row_num):
     '''
     randomly sub-sample the data of a given number of users. This will modify 
     the row/col/val. 
     The method first constructs a coo sparse matrix and then convert to csr matrix 
     for row slicing. And then the csr matrix is converted back.
     
     Parameters
     ----------
     @param sel_row_num: the number of selected rows. 
     
     Returns
     ----------
     @return out: a list of two components [sample_data, selidx]
     sample_data: 
     sel_idx:
     '''
     
     # sampling a set of rows 
     sel_idx = ds.sample_num(self.num_row, sel_row_num);
     
     # construct data set using the selected rows 
     sample_data = self.subdata_row(sel_idx);
     return [sample_data, sel_idx];
Ejemplo n.º 2
0
    def subsample_row(self, sel_row_num):
        '''
        randomly sub-sample the data of a given number of users. This will modify 
        the row/col/val. 
        The method first constructs a coo sparse matrix and then convert to csr matrix 
        for row slicing. And then the csr matrix is converted back.
        
        Parameters
        ----------
        @param sel_row_num: the number of selected rows. 
        
        Returns
        ----------
        @return out: a list of two components [sample_data, selidx]
        sample_data: 
        sel_idx:
        '''

        # sampling a set of rows
        sel_idx = ds.sample_num(self.num_row, sel_row_num)

        # construct data set using the selected rows
        sample_data = self.subdata_row(sel_idx)
        return [sample_data, sel_idx]
Ejemplo n.º 3
0
def experiment_coldstart_map(exp_name,     daily_data_file,\
                    min_occ_user, min_occ_prog, num_user, num_prog,\
                    method_list, blind_k_out, total_iteration, max_rank, binary = False):
    '''
    
    Parameters
    ----------
    @param exp_name:       the experiment name (prefix) 
    @param daily_datafile: a list of files. 
    @param min_occ_user:   cold start user criteria
    @param min_occ_prog:   cold start user criteria
    @param num_user:       the number of users selected in the experiment. 
    @param num_prog:       the number of programs selected in the experiment. 
    @param method_list:
    @param blind_k_out: leave k out for each user. The k must be strict less than
         min_occ_user
    
    @param binary: if this is set to true then the binary data is used (non-zero set to 1). 
    
    Returns
    ----------
    @return out 
    '''
    
    print 'Blind k out: k = ', str(blind_k_out);
    print 'Min_occ_user: '******'Min_occ_prog: ',    str(min_occ_prog);
    
    if blind_k_out >= min_occ_user:
        raise ValueError('The k in the leave k out [' + str(blind_k_out) 
                         +'] should be strictly less than min_occ_user [' + str(min_occ_user) +'].'); 
    
    # define lko_log style. 
    lko_log = lambda msg: Logger.Log(msg, Logger.MSG_CATEGORY_EXP);
    
    
    if isinstance(daily_data_file, list):    
        hash_file_str = str(hash(tuple(daily_data_file)));
    else:
        hash_file_str = str(hash(daily_data_file));
    
    # construct exp_id
    if binary:
        exp_id = 'cst_bi_' + exp_name + '_data' + hash_file_str\
                      + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_nu' + str(num_user) + '_np' + str(num_prog) \
                      + '_k' + str(blind_k_out) + '_toiter' + str(total_iteration);
    else:
        exp_id = 'cst_'    + exp_name + '_data' + hash_file_str\
                      + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_nu' + str(num_user) + '_np' + str(num_prog) \
                      + '_k' + str(blind_k_out) + '_toiter' + str(total_iteration);
    lko_log('Experiment ID: ' + exp_id);
    
    # load data. 
    lko_log('Read data...');
    reader = DailyWatchTimeReader();
    data = reader.read_file_with_minval(daily_data_file, min_occ_user, min_occ_prog, num_user, num_prog);
    lko_log('Data loaded: ' + str(data));
    
    if binary:
        lko_log('Binarizing data...');
        data.binarize();
    else:
        # normalize 
        lko_log('Normalizing data...');
        data.normalize_row();
    
    result = {};
    
    for method in method_list:
        # do for each method
    
        perf_vect = [];
        for iteration in range(total_iteration):
            # do for each iteration for each method. 
    
            lko_log('Method: '+ method.unique_str() + ' Iteration: '+ str(iteration));
    
            # data split of the current iteration. 
            split_resource_str = 'exp' + exp_id + '_blind_idx_iter' + str(iteration); 
            split_dir = exp_id + '/blind_idx';
            blind_out_idx = URM.LoadResource(URM.RTYPE_RESULT, split_resource_str, split_dir);
            if not blind_out_idx:
                # randomly generate k items to blind out.
                blind_out_idx   = ds.sample_num(data.num_col, blind_k_out);    
                URM.SaveResource(URM.RTYPE_RESULT, split_resource_str, blind_out_idx, split_dir);
            
            lko_log('Blind index done.');
            # split the k items as a separate. 
            [data_tr, data_left] = data.blind_k_out(blind_out_idx); 
            
            lko_log('Start index');
            iter_result = experiment_unit_leave_k_out_map(exp_id, method, \
                                    data_tr, data_left, iteration, max_rank);
            
            perf_vect.append(iter_result);
    
        result[method.unique_str()] = perf_vect;
    
    return result;