def experiment_future_program(exp_name, previous_data_files, future_data_file, \
                              min_occ_user, min_occ_prog, method_list, top_k):
    '''
    experiment entrance for future programs.
    
    Top-k precision. 
    
    Parameters
    ----------
    exp_name:    a human-readable experiment name.
    method_list: a list of recommendation models  
    
    Returns
    ---------- 
    '''
    # define mcpl_log style.
    mcpl_log = lambda msg: Logger.Log(msg, Logger.MSG_CATEGORY_EXP)

    exp_id = exp_name + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog)

    mcpl_log('Experimental ID: ' + exp_id)

    reader = DailyWatchTimeReader()
    tr_data = reader.read_file_with_minval(previous_data_files, min_occ_user,
                                           min_occ_prog)
    te_data = reader.read_file_with_minval(future_data_file, min_occ_user,
                                           min_occ_prog)

    mcpl_log('Normalization data ...')
    tr_data.normalize_row()
    # there is no need to normalize train data because we evaluate the hits.

    result = {}

    for method in method_list:
        # do for each method

        mcpl_log('Method: ' + method.unique_str())
        method_result = experiment_unit_future_program(exp_id, method, tr_data,
                                                       te_data, top_k)

        result[method.unique_str()] = method_result

    mcpl_log('Experiment Done [' + exp_id + ']')

    return result
Esempio n. 2
0
def experiment_leave_k_out(exp_name, daily_data_file, min_occ_user, min_occ_prog, \
                           method_list, leave_k_out, total_iteration, top_n, binary = False):
    '''
    
    Parameters
    ----------
    @param exp_name: the experiment name (prefix) 
    @param daily_datafile:
    @param min_occ_user:
    
    @param method_list:
    @param leave_k_out: leave k out for each user. The k must be strict less than
         min_occ_user
    
    @param binary: if this is set to true then the binary data is used (non-zero set to 1). 
    
    Returns
    ----------
    @return out 
    '''

    if leave_k_out >= min_occ_user:
        raise ValueError(
            'The k in the leave k out should be strictly less than min_occ_user.'
        )

    # define lko_log style.
    lko_log = lambda msg: Logger.Log(msg, Logger.MSG_CATEGORY_EXP)

    # construct exp_id
    if binary:
        exp_id = 'lko_bi_' + exp_name + '_data' +str(hash(daily_data_file)) + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration)
    else:
        exp_id = 'lko_' + exp_name + '_data' +str(hash(daily_data_file)) + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration)
    lko_log('Experiment ID: ' + exp_id)

    # load data.
    lko_log('Read data...')
    reader = DailyWatchTimeReader()
    data = reader.read_file_with_minval(daily_data_file, min_occ_user,
                                        min_occ_prog)
    lko_log('Data loaded: ' + str(data))

    if binary:
        lko_log('Binarizing data...')
        data.binarize()
    else:
        # normalize
        lko_log('Normalizing data...')
        data.normalize_row()

    result = {}

    for method in method_list:
        # do for each method

        perf_vect = []
        for iteration in range(total_iteration):
            # do for each iteration for each method.

            lko_log('Method: ' + method.unique_str() + ' Iteration: ' +
                    str(iteration))

            # data split of the current iteration.
            split_resource_str = 'exp' + exp_id + '_lvidx_iter' + str(
                iteration)
            split_dir = exp_id + '/lv_idx'
            leave_k_out_idx = URM.LoadResource(URM.RTYPE_RESULT,
                                               split_resource_str, split_dir)
            if not leave_k_out_idx:
                # randomly generate k items from each row/user.
                leave_k_out_idx = ds.leave_k_out(data, leave_k_out)
                URM.SaveResource(URM.RTYPE_RESULT, split_resource_str,
                                 leave_k_out_idx, split_dir)

            # split the k items as a separate.
            [data_left, data_tr] = data.leave_k_out(leave_k_out_idx)

            iter_result = experiment_unit_leave_k_out(exp_id, method, \
                                    data_tr, data_left, iteration, top_n)

            perf_vect.append(iter_result)

        result[method.unique_str()] = perf_vect

    return result
Esempio n. 3
0
This is a testing pipeline for KDD_2014 algorithm.
 
Created on Jan 30, 2014

@author: Shiyu C. ([email protected])
'''

from rs.data.daily_watchtime import DailyWatchTimeReader
from rs.algorithms.recommendation.TriUHV import TriUHV
from rs.data.recdata import FeedbackData

if __name__ == '__main__':
    filename = "../../../datasample/agg_duid_pid_watchtime_genre/20131209_100000"

    # load data.
    reader = DailyWatchTimeReader()
    feedback_data = reader.read_file_with_minval(filename, 1, 1)
    feedback_data.normalize_row()

    # build model with 3 latent factors.
    r = 5
    # the L_2 norm regularizer
    lamb = 0.2
    # the stopping delta value
    delta = 0.01
    # the maximium iteration number
    maxiter = 500

    #     TriUHV_model = TriUHV(r,lamb,delta,maxiter, verbose = True);
    #     TriUHV_model.train(feedback_data);
    '''    
 # turn off screen display.
 Logger.GetInstance().display_level = 10;  
 
 mcpl_log('Data file: ' + filename);
 mcpl_log('ROVI daily file: ' + rovi_daily_file);    
 
 # build ROVI daily mapping    
 mcpl_log('Building ROVI daily mapping');
 rovi_mapping = {};
 with open(rovi_daily_file) as csvfile:
     rovi_reader = csv.reader(csvfile, delimiter = '\t', quotechar = '|')
     for row in rovi_reader:
         rovi_mapping[row[3]] = row[7];
 
 # load data from file and transform into a sparse matrix. 
 reader = DailyWatchTimeReader();
 fbdata = reader.read_file_with_minval(filename, 5, 5);  
 
 mat = coo_matrix((fbdata.data_val, (fbdata.data_row, fbdata.data_col)), \
                  shape = (fbdata.num_row, fbdata.num_col));
 # memo: if we do multiple days, we might use coo_matrix summation, but we need
 #       to align the program and user.    
 
 # we have a mapping from program id to row.
 program_mapping = fbdata.col_mapping; 
 # from which we build a reverse mapping from row id to program
 # the reverse mapping allows us to find program ID from matrix position.
 program_inv_mapping = {y: x for x, y in program_mapping.items()};
 # check the consistency. 
 if not (len(program_mapping) == len(program_inv_mapping)):
     raise ValueError('Mapping inverse error!');
Esempio n. 5
0
def experiment_rand_split(exp_name, daily_data_file, min_occ_user, min_occ_prog, \
                          method_list,  training_prec, total_iteration):
    '''
    
    Parameters
    ----------
    exp_name:    a human-readable experiment name.
    method_list: a list of matrix completion models  
    
    Returns
    ---------- 
    
    '''
    # define mcpl_log style. 
    mcpl_log = lambda msg: Logger.Log(msg, Logger.MSG_CATEGORY_EXP);
    
    #mcpl_log('Data ID: ' + hash(daily_data_file));
    
    # here we use a regular hash. 
    exp_id = exp_name + '_data' +str(hash(daily_data_file)) + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_trprec' + str(training_prec) + '_toiter' + str(total_iteration);
    
    mcpl_log('Experiment ID: ' + exp_id);
    
    # save experiment splitting as resources. 
    reader = DailyWatchTimeReader();
    data = reader.read_file_with_minval(daily_data_file, min_occ_user, min_occ_prog);
    
    # we normalize here before splitting.
    mcpl_log('Normalizing data...'); 
    data.normalize_row();
    
    result = {};
    
    for method in method_list:
    # do for each method
        
        perf_vect = [];
        for iteration in range(total_iteration):
        # do for each iteration for each method;
            
            mcpl_log('Method: '+ method.unique_str() + ' Iteration: '+ str(iteration));
            
            # data split of the current iteration. 
            split_resource_str = 'exp' + exp_id + '_split_iter' + str(iteration); 
            split_dir = exp_id + '/split';
            split = URM.LoadResource(URM.RTYPE_RESULT, split_resource_str, split_dir);
            if not split:
                split = ds.split(data.num_row, training_prec);
                URM.SaveResource(URM.RTYPE_RESULT, split_resource_str, split, split_dir);
            
            [split_tr, split_te] = split;
            data_tr = data.subdata_row(split_tr);
            data_te = data.subdata_row(split_te);
            
            iter_result = experiment_unit_rand_split(exp_id, method, data_tr, data_te, iteration);
                            
            perf_vect.append(iter_result);
       
        result[method.unique_str()] = perf_vect;
        
    mcpl_log('Experiment Done [' + exp_id + ']');
    
    return result;
'''
This is a testing pipeline for KDD_2014 algorithm.
 
Created on Jan 30, 2014

@author: Shiyu C. ([email protected])
'''
import numpy as np;
from rs.data.daily_watchtime import DailyWatchTimeReader
from rs.algorithms.recommendation.HierLat import HierLat

if __name__ == '__main__':
    
    
    # load data. 
    reader = DailyWatchTimeReader();
    
    #filename = "../../../datasample/agg_duid_pid_watchtime_genre/20131209_100000";  
    #feedback_data = reader.read_file_with_minval(filename, 1, 1);
    
    
    filename = "/Users/jiayu.zhou/Data/duid-program-watchTime-genre/20131209/part-r-00000";
    #feedback_data = reader.read_file_with_minval(filename, 25, 300);
    feedback_data = reader.read_file_with_minval(filename, 35, 300);
    
    print feedback_data;
    
    print 'Maximum Genre.'
    print np.max(feedback_data.meta['pggr_gr']) + 1;
    
    print 'Normalizing data.'
@author: jiayu.zhou
'''
from rs.data.daily_watchtime import DailyWatchTimeReader

if __name__ == '__main__':
    #daily_data_file = "/Users/jiayu.zhou/Data/duid-program-watchTime-genre/test_comb/test";
    #daily_data_file_p1 = "/Users/jiayu.zhou/Data/duid-program-watchTime-genre/test_comb/test1";
    #daily_data_file_p2 = "/Users/jiayu.zhou/Data/duid-program-watchTime-genre/test_comb/test2";
    
    daily_data_file = "/Users/jiayu.zhou/Data/duid-program-watchTime-genre/20131210/part";
    daily_data_file_p1 = "/Users/jiayu.zhou/Data/duid-program-watchTime-genre/20131210/part-1";
    daily_data_file_p2 = "/Users/jiayu.zhou/Data/duid-program-watchTime-genre/20131210/part-2";
    
    min_occ_user = 0;
    min_occ_prog = 0;
    
    reader = DailyWatchTimeReader();
    
    #[occur_duid1, occur_pid1, cnt_duid1, cnt_pid1] = reader.read_file_info(daily_data_file);
    
    #[occur_duid2, occur_pid2, cnt_duid2, cnt_pid2] = reader.read_file_info([daily_data_file_p1, daily_data_file_p2]);
    
    data1 = reader.read_file_with_minval(daily_data_file, min_occ_user, min_occ_prog);
    print data1;
    #print data1.get_sparse_matrix().todense();
    
    data2 = reader.read_file_with_minval([daily_data_file_p1, daily_data_file_p2], \
                                          min_occ_user, min_occ_prog);
    print data2;
    #print data2.get_sparse_matrix().todense();
'''
Test the functionality of the share_row_data function in the rs.data.recdata

Created on Feb 4, 2014

@author: jiayu.zhou
'''

from rs.data.daily_watchtime import DailyWatchTimeReader
from rs.data.recdata import share_row_data;

if __name__ == '__main__':
    
    reader = DailyWatchTimeReader();
    
    filename1 = "../../datasample/agg_duid_pid_watchtime_genre/toy_small_day1";
    filename2 = "../../datasample/agg_duid_pid_watchtime_genre/toy_small_day2";
    
    fb_data1 = reader.read_file_with_minval(filename1, 0, 0);
    fb_data2 = reader.read_file_with_minval(filename2, 0, 0);
    
    print 'Matrix 1'
    print fb_data1.row_mapping;
    print fb_data1.col_mapping;
    print fb_data1.get_sparse_matrix().todense();
    
    print 'Matrix 2'
    print fb_data2.row_mapping;
    print fb_data2.col_mapping;
    print fb_data2.get_sparse_matrix().todense();
    
    num_user = 10000
    num_prog = 3000

    total_iteration = 2

    iteration = 1
    # iteration out of total_iteration.

    leave_k_out = 20
    lafactor = 5

    method = HierLat(latent_factor=lafactor)
    hash_file_str = str(hash(tuple(daily_data_file)))

    reader = DailyWatchTimeReader()
    feedback_data = reader.read_file_with_minval(daily_data_file, min_occ_user,
                                                 min_occ_prog, num_user,
                                                 num_prog)

    exp_id = 'lko_bi_' + exp_name + '_data' + hash_file_str\
                      + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_nu' + str(num_user) + '_np' + str(num_prog) \
                      + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration)

    result_resource_str = 'exp'      + exp_id + \
                          '_method'  + method.unique_str() + \
                          '_iter'    + str(iteration)
    sub_folder = exp_id + '/models/' + method.unique_str()
    # use a sub folder to store the experiment resource.
 min_occ_prog = 1000;
 
 num_user = 10000;
 num_prog = 3000;
 
 total_iteration = 2;
 
 iteration = 1; # iteration out of total_iteration. 
 
 leave_k_out = 20;
 lafactor = 5;
 
 method = HierLat(latent_factor=lafactor);
 hash_file_str = str(hash(tuple(daily_data_file)));
 
 reader = DailyWatchTimeReader();
 feedback_data = reader.read_file_with_minval(daily_data_file, min_occ_user, min_occ_prog, num_user, num_prog);
     
 exp_id = 'lko_bi_' + exp_name + '_data' + hash_file_str\
                   + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                   + '_nu' + str(num_user) + '_np' + str(num_prog) \
                   + '_k' + str(leave_k_out) + '_toiter' + str(total_iteration);
 
 result_resource_str = 'exp'      + exp_id + \
                       '_method'  + method.unique_str() + \
                       '_iter'    + str(iteration);
 sub_folder = exp_id + '/models/' + method.unique_str(); # use a sub folder to store the experiment resource.
                       
 trained_model = URM.LoadResource(URM.RTYPE_RESULT, result_resource_str, sub_folder);
 [method] = trained_model;
 
Esempio n. 11
0
#@UnusedImport

from rs.data.daily_watchtime import DailyWatchTimeReader

if __name__ == '__main__':
    filename = "../../datasample/agg_duid_pid_watchtime_genre/20131209_100000"
    #filename = "/Users/jiayu.zhou/Data/duid-program-watchTime-genre/20131209/part-r-00000";

    if len(sys.argv) == 1:
        print 'Use default sample data.'
    else:
        filename = sys.argv[1]

    print 'processing file', filename

    reader = DailyWatchTimeReader()
    #reader.readFile(filename);
    #[mapping_duid, mapping_pid, row, col, data, pggr_pg, pggr_gr] = \
    #    reader.read_file_with_minval(filename, 5, 5);
    dataStruct = reader.read_file_with_minval(filename, 1, 1)
    print dataStruct
    #print len(pggr_pg);
    #print len(pggr_gr);
    #print len(set(pggr_gr));

    #[occur_duid, occur_pid, cnt_duid, cnt_pid] = reader.readFileInfo(filename);
    #print cnt_duid;
    #print cnt_pid;

    #histplot(occur_duid.values());
    #histplot(occur_pid.values());
Esempio n. 12
0
def experiment_coldstart_map(exp_name,     daily_data_file,\
                    min_occ_user, min_occ_prog, num_user, num_prog,\
                    method_list, blind_k_out, total_iteration, max_rank, binary = False):
    '''
    
    Parameters
    ----------
    @param exp_name:       the experiment name (prefix) 
    @param daily_datafile: a list of files. 
    @param min_occ_user:   cold start user criteria
    @param min_occ_prog:   cold start user criteria
    @param num_user:       the number of users selected in the experiment. 
    @param num_prog:       the number of programs selected in the experiment. 
    @param method_list:
    @param blind_k_out: leave k out for each user. The k must be strict less than
         min_occ_user
    
    @param binary: if this is set to true then the binary data is used (non-zero set to 1). 
    
    Returns
    ----------
    @return out 
    '''
    
    print 'Blind k out: k = ', str(blind_k_out);
    print 'Min_occ_user: '******'Min_occ_prog: ',    str(min_occ_prog);
    
    if blind_k_out >= min_occ_user:
        raise ValueError('The k in the leave k out [' + str(blind_k_out) 
                         +'] should be strictly less than min_occ_user [' + str(min_occ_user) +'].'); 
    
    # define lko_log style. 
    lko_log = lambda msg: Logger.Log(msg, Logger.MSG_CATEGORY_EXP);
    
    
    if isinstance(daily_data_file, list):    
        hash_file_str = str(hash(tuple(daily_data_file)));
    else:
        hash_file_str = str(hash(daily_data_file));
    
    # construct exp_id
    if binary:
        exp_id = 'cst_bi_' + exp_name + '_data' + hash_file_str\
                      + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_nu' + str(num_user) + '_np' + str(num_prog) \
                      + '_k' + str(blind_k_out) + '_toiter' + str(total_iteration);
    else:
        exp_id = 'cst_'    + exp_name + '_data' + hash_file_str\
                      + '_mu' + str(min_occ_user) + '_mp' + str(min_occ_prog) \
                      + '_nu' + str(num_user) + '_np' + str(num_prog) \
                      + '_k' + str(blind_k_out) + '_toiter' + str(total_iteration);
    lko_log('Experiment ID: ' + exp_id);
    
    # load data. 
    lko_log('Read data...');
    reader = DailyWatchTimeReader();
    data = reader.read_file_with_minval(daily_data_file, min_occ_user, min_occ_prog, num_user, num_prog);
    lko_log('Data loaded: ' + str(data));
    
    if binary:
        lko_log('Binarizing data...');
        data.binarize();
    else:
        # normalize 
        lko_log('Normalizing data...');
        data.normalize_row();
    
    result = {};
    
    for method in method_list:
        # do for each method
    
        perf_vect = [];
        for iteration in range(total_iteration):
            # do for each iteration for each method. 
    
            lko_log('Method: '+ method.unique_str() + ' Iteration: '+ str(iteration));
    
            # data split of the current iteration. 
            split_resource_str = 'exp' + exp_id + '_blind_idx_iter' + str(iteration); 
            split_dir = exp_id + '/blind_idx';
            blind_out_idx = URM.LoadResource(URM.RTYPE_RESULT, split_resource_str, split_dir);
            if not blind_out_idx:
                # randomly generate k items to blind out.
                blind_out_idx   = ds.sample_num(data.num_col, blind_k_out);    
                URM.SaveResource(URM.RTYPE_RESULT, split_resource_str, blind_out_idx, split_dir);
            
            lko_log('Blind index done.');
            # split the k items as a separate. 
            [data_tr, data_left] = data.blind_k_out(blind_out_idx); 
            
            lko_log('Start index');
            iter_result = experiment_unit_leave_k_out_map(exp_id, method, \
                                    data_tr, data_left, iteration, max_rank);
            
            perf_vect.append(iter_result);
    
        result[method.unique_str()] = perf_vect;
    
    return result;
Esempio n. 13
0
'''
Test the functionality of the share_row_data function in the rs.data.recdata

Created on Feb 4, 2014

@author: jiayu.zhou
'''

from rs.data.daily_watchtime import DailyWatchTimeReader
from rs.data.recdata import share_row_data

if __name__ == '__main__':

    reader = DailyWatchTimeReader()

    filename1 = "../../datasample/agg_duid_pid_watchtime_genre/toy_small_day1"
    filename2 = "../../datasample/agg_duid_pid_watchtime_genre/toy_small_day2"

    fb_data1 = reader.read_file_with_minval(filename1, 0, 0)
    fb_data2 = reader.read_file_with_minval(filename2, 0, 0)

    print 'Matrix 1'
    print fb_data1.row_mapping
    print fb_data1.col_mapping
    print fb_data1.get_sparse_matrix().todense()

    print 'Matrix 2'
    print fb_data2.row_mapping
    print fb_data2.col_mapping
    print fb_data2.get_sparse_matrix().todense()
Esempio n. 14
0
    # turn off screen display.
    Logger.GetInstance().display_level = 10

    mcpl_log('Data file: ' + filename)
    mcpl_log('ROVI daily file: ' + rovi_daily_file)

    # build ROVI daily mapping
    mcpl_log('Building ROVI daily mapping')
    rovi_mapping = {}
    with open(rovi_daily_file) as csvfile:
        rovi_reader = csv.reader(csvfile, delimiter='\t', quotechar='|')
        for row in rovi_reader:
            rovi_mapping[row[3]] = row[7]

    # load data from file and transform into a sparse matrix.
    reader = DailyWatchTimeReader()
    fbdata = reader.read_file_with_minval(filename, 5, 5)

    mat = coo_matrix((fbdata.data_val, (fbdata.data_row, fbdata.data_col)), \
                     shape = (fbdata.num_row, fbdata.num_col))
    # memo: if we do multiple days, we might use coo_matrix summation, but we need
    #       to align the program and user.

    # we have a mapping from program id to row.
    program_mapping = fbdata.col_mapping
    # from which we build a reverse mapping from row id to program
    # the reverse mapping allows us to find program ID from matrix position.
    program_inv_mapping = {y: x
                           for x, y in program_mapping.items()}
    # check the consistency.
    if not (len(program_mapping) == len(program_inv_mapping)):
Esempio n. 15
0
"""
Created on Jan 31, 2014

@author: jiayu.zhou
"""
from rs.data.daily_watchtime import DailyWatchTimeReader


if __name__ == "__main__":
    filename = "../../datasample/agg_duid_pid_watchtime_genre/20131209_100000"
    reader = DailyWatchTimeReader()
    dataStruct = reader.read_file_with_minval(filename, 7, 1)
    print dataStruct

    print dataStruct.row_mapping

    print dataStruct.get_sparse_matrix().todense()

    print "-----------------"
    print ">>>subsample 3 rows"

    [subdata_row, subidx] = dataStruct.subsample_row(3)
    print subidx

    print subdata_row.get_sparse_matrix().todense()

    print subdata_row.row_mapping

    print "-----------------"
    print ">>>subsample 50% rows"
'''
This is a testing pipeline for KDD_2014 algorithm.
 
Created Feb 7, 2014

@author: Shiyu C. ([email protected])
'''

from rs.data.daily_watchtime import DailyWatchTimeReader
from rs.algorithms.recommendation.CF_ONMTF import CF_ONMTF

if __name__ == '__main__':
    filename = "../../../datasample/agg_duid_pid_watchtime_genre/20131209_100000";
    
    # load data. 
    reader = DailyWatchTimeReader();  
    feedback_data = reader.read_file_with_minval(filename, 1, 1);
    feedback_data.normalize_row();
    
    # build model with 3 latent factors.
    r = 5;
    # the L_2 norm regularizer 
    lamb = 0.2; 
    # the stopping delta value 
    delta = 0.01;
    # the maximium iteration number
    maxiter = 500;
     
    CF_ONMTF_model = CF_ONMTF(r,lamb,delta,maxiter, verbose = True); 
    CF_ONMTF_model.train(feedback_data);
'''    
Esempio n. 17
0
'''
Created on Feb 13, 2014

@author: jiayu.zhou
'''

import scipy.io as sio

from rs.data.daily_watchtime import DailyWatchTimeReader

if __name__ == '__main__':
    daily_data_file = "/Users/jiayu.zhou/Data/duid-program-watchTime-genre/20131209/part-r-00000"
    reader = DailyWatchTimeReader()
    data = reader.read_file_with_minval(daily_data_file, 1, 1)

    data_mat = data.get_sparse_matrix()

    ### directory save sparse matrix data structure to Matlab.
    #sio.savemat("/Users/jiayu.zhou/Data/duid-program-watchTime-genre/20131209.mat", {'data': data_mat})

    ###
    sio.savemat("/Users/jiayu.zhou/Data/duid-program-watchTime-genre/20131209_sparse.mat",
                {'data': data_mat.data, 'i': data_mat.row, 'j': data_mat.col, \
                 'm': data_mat.shape[0], 'n': data_mat.shape[1]})

    print 'Done'
'''
Created on Feb 13, 2014

@author: jiayu.zhou
'''

import scipy.io as sio;

from rs.data.daily_watchtime import DailyWatchTimeReader


if __name__ == '__main__':
    daily_data_file = "/Users/jiayu.zhou/Data/duid-program-watchTime-genre/20131209/part-r-00000";
    reader = DailyWatchTimeReader();
    data = reader.read_file_with_minval(daily_data_file, 1, 1);
    
    data_mat = data.get_sparse_matrix();
    
    ### directory save sparse matrix data structure to Matlab. 
    #sio.savemat("/Users/jiayu.zhou/Data/duid-program-watchTime-genre/20131209.mat", {'data': data_mat})
    
    ### 
    sio.savemat("/Users/jiayu.zhou/Data/duid-program-watchTime-genre/20131209_sparse.mat", 
                {'data': data_mat.data, 'i': data_mat.row, 'j': data_mat.col, \
                 'm': data_mat.shape[0], 'n': data_mat.shape[1]});
    
    
    print 'Done';