Esempio n. 1
0
 def normalize_row(self):
     '''
     Perform a normalization on the row. This will modify the row/col/val.
     The method first constructs a coo sparse matrix and then convert to lil 
     matrix for normalization. The normalized matrix is then converted back to 
     coo matrix and row/col/val are reset to the values in the converted coo matrix. 
     
     NOTE: when we have (row, col, 0.1) and (row, col, 0.2), and we will have 
           (row, col, 0.3), because coo to lil transformation.  
     '''
     mat = self.get_sparse_matrix();
     mat = normalize_row(mat); # normalize each row. 
     mat = mat.tocoo();
     
     self.data_val = mat.data.tolist();
     self.data_row = mat.row.tolist();
     self.data_col = mat.col.tolist();
Esempio n. 2
0
    def normalize_row(self):
        '''
        Perform a normalization on the row. This will modify the row/col/val.
        The method first constructs a coo sparse matrix and then convert to lil 
        matrix for normalization. The normalized matrix is then converted back to 
        coo matrix and row/col/val are reset to the values in the converted coo matrix. 
        
        NOTE: when we have (row, col, 0.1) and (row, col, 0.2), and we will have 
              (row, col, 0.3), because coo to lil transformation.  
        '''
        mat = self.get_sparse_matrix()
        mat = normalize_row(mat)
        # normalize each row.
        mat = mat.tocoo()

        self.data_val = mat.data.tolist()
        self.data_row = mat.row.tolist()
        self.data_col = mat.col.tolist()
 minf = 0.0001
 
 A = sp.coo_matrix((5,5))
 
 
 
 A = A.tolil();
 
 #A = sp.lil_matrix((5,5))
 b = np.arange(0,5)
 A.setdiag(b[:-1], k=1)
 A.setdiag(b)
 print 'Dense A:'
 print A.todense()
 
 C1 = normalize_row(A);
 
 
 
 A = A.T
 print 'Dense A transpose:'
 print A.todense()
 
 
 
 sum_of_col = A.sum(0).tolist()
 print sum_of_col
 c = []
 for i in sum_of_col:
     for j in i:
         if math.fabs(j)<minf:
 # we have a mapping from program id to row.
 program_mapping = fbdata.col_mapping; 
 # from which we build a reverse mapping from row id to program
 # the reverse mapping allows us to find program ID from matrix position.
 program_inv_mapping = {y: x for x, y in program_mapping.items()};
 # check the consistency. 
 if not (len(program_mapping) == len(program_inv_mapping)):
     raise ValueError('Mapping inverse error!');
 program_num = len(program_mapping);
 
 # compute pairwise similarity
 similarity_file = filename + '.prgsim';
 if not os.path.isfile(similarity_file):
     # normalize data per user. 
     mcpl_log('normalizing data...')
     mat = normalize_row(mat);
 
     mcpl_log('computing pairwise similarity...');
     total_pair = program_num *  (program_num + 1) / 2;
     progress = 0;
     
     cor_mat = np.zeros((program_num, program_num));
     for i in range(program_num):
         for j in range(program_num):
             if i < j:
                 progress += 1;
                 if progress % 1000 == 0:
                     mcpl_log('Computing '+ str(progress) + ' out of ' + str(total_pair));
                 
                 # Our similarity is defined as [1 - cosine distance]. 
                 cor_mat[i][j] = 1 - cosine(mat.getcol(i).todense(), mat.getcol(j).todense());
Esempio n. 5
0
    program_mapping = fbdata.col_mapping
    # from which we build a reverse mapping from row id to program
    # the reverse mapping allows us to find program ID from matrix position.
    program_inv_mapping = {y: x
                           for x, y in program_mapping.items()}
    # check the consistency.
    if not (len(program_mapping) == len(program_inv_mapping)):
        raise ValueError('Mapping inverse error!')
    program_num = len(program_mapping)

    # compute pairwise similarity
    similarity_file = filename + '.prgsim'
    if not os.path.isfile(similarity_file):
        # normalize data per user.
        mcpl_log('normalizing data...')
        mat = normalize_row(mat)

        mcpl_log('computing pairwise similarity...')
        total_pair = program_num * (program_num + 1) / 2
        progress = 0

        cor_mat = np.zeros((program_num, program_num))
        for i in range(program_num):
            for j in range(program_num):
                if i < j:
                    progress += 1
                    if progress % 1000 == 0:
                        mcpl_log('Computing ' + str(progress) + ' out of ' +
                                 str(total_pair))

                    # Our similarity is defined as [1 - cosine distance].
Esempio n. 6
0
if __name__ == '__main__':
    minf = 0.0001

    A = sp.coo_matrix((5, 5))

    A = A.tolil()

    #A = sp.lil_matrix((5,5))
    b = np.arange(0, 5)
    A.setdiag(b[:-1], k=1)
    A.setdiag(b)
    print 'Dense A:'
    print A.todense()

    C1 = normalize_row(A)

    A = A.T
    print 'Dense A transpose:'
    print A.todense()

    sum_of_col = A.sum(0).tolist()
    print sum_of_col
    c = []
    for i in sum_of_col:
        for j in i:
            if math.fabs(j) < minf:
                c.append(0)
            else:
                c.append(1 / j)