def normalize_row(self): ''' Perform a normalization on the row. This will modify the row/col/val. The method first constructs a coo sparse matrix and then convert to lil matrix for normalization. The normalized matrix is then converted back to coo matrix and row/col/val are reset to the values in the converted coo matrix. NOTE: when we have (row, col, 0.1) and (row, col, 0.2), and we will have (row, col, 0.3), because coo to lil transformation. ''' mat = self.get_sparse_matrix(); mat = normalize_row(mat); # normalize each row. mat = mat.tocoo(); self.data_val = mat.data.tolist(); self.data_row = mat.row.tolist(); self.data_col = mat.col.tolist();
def normalize_row(self): ''' Perform a normalization on the row. This will modify the row/col/val. The method first constructs a coo sparse matrix and then convert to lil matrix for normalization. The normalized matrix is then converted back to coo matrix and row/col/val are reset to the values in the converted coo matrix. NOTE: when we have (row, col, 0.1) and (row, col, 0.2), and we will have (row, col, 0.3), because coo to lil transformation. ''' mat = self.get_sparse_matrix() mat = normalize_row(mat) # normalize each row. mat = mat.tocoo() self.data_val = mat.data.tolist() self.data_row = mat.row.tolist() self.data_col = mat.col.tolist()
minf = 0.0001 A = sp.coo_matrix((5,5)) A = A.tolil(); #A = sp.lil_matrix((5,5)) b = np.arange(0,5) A.setdiag(b[:-1], k=1) A.setdiag(b) print 'Dense A:' print A.todense() C1 = normalize_row(A); A = A.T print 'Dense A transpose:' print A.todense() sum_of_col = A.sum(0).tolist() print sum_of_col c = [] for i in sum_of_col: for j in i: if math.fabs(j)<minf:
# we have a mapping from program id to row. program_mapping = fbdata.col_mapping; # from which we build a reverse mapping from row id to program # the reverse mapping allows us to find program ID from matrix position. program_inv_mapping = {y: x for x, y in program_mapping.items()}; # check the consistency. if not (len(program_mapping) == len(program_inv_mapping)): raise ValueError('Mapping inverse error!'); program_num = len(program_mapping); # compute pairwise similarity similarity_file = filename + '.prgsim'; if not os.path.isfile(similarity_file): # normalize data per user. mcpl_log('normalizing data...') mat = normalize_row(mat); mcpl_log('computing pairwise similarity...'); total_pair = program_num * (program_num + 1) / 2; progress = 0; cor_mat = np.zeros((program_num, program_num)); for i in range(program_num): for j in range(program_num): if i < j: progress += 1; if progress % 1000 == 0: mcpl_log('Computing '+ str(progress) + ' out of ' + str(total_pair)); # Our similarity is defined as [1 - cosine distance]. cor_mat[i][j] = 1 - cosine(mat.getcol(i).todense(), mat.getcol(j).todense());
program_mapping = fbdata.col_mapping # from which we build a reverse mapping from row id to program # the reverse mapping allows us to find program ID from matrix position. program_inv_mapping = {y: x for x, y in program_mapping.items()} # check the consistency. if not (len(program_mapping) == len(program_inv_mapping)): raise ValueError('Mapping inverse error!') program_num = len(program_mapping) # compute pairwise similarity similarity_file = filename + '.prgsim' if not os.path.isfile(similarity_file): # normalize data per user. mcpl_log('normalizing data...') mat = normalize_row(mat) mcpl_log('computing pairwise similarity...') total_pair = program_num * (program_num + 1) / 2 progress = 0 cor_mat = np.zeros((program_num, program_num)) for i in range(program_num): for j in range(program_num): if i < j: progress += 1 if progress % 1000 == 0: mcpl_log('Computing ' + str(progress) + ' out of ' + str(total_pair)) # Our similarity is defined as [1 - cosine distance].
if __name__ == '__main__': minf = 0.0001 A = sp.coo_matrix((5, 5)) A = A.tolil() #A = sp.lil_matrix((5,5)) b = np.arange(0, 5) A.setdiag(b[:-1], k=1) A.setdiag(b) print 'Dense A:' print A.todense() C1 = normalize_row(A) A = A.T print 'Dense A transpose:' print A.todense() sum_of_col = A.sum(0).tolist() print sum_of_col c = [] for i in sum_of_col: for j in i: if math.fabs(j) < minf: c.append(0) else: c.append(1 / j)