def _load_URM(self, filePath, header=False, separator="::"): URM_builder = IncrementalSparseMatrix(auto_create_col_mapper=True, auto_create_row_mapper=True) fileHandle = open(filePath, "r") numCells = 0 if header: fileHandle.readline() for line in fileHandle: numCells += 1 if (numCells % 1000000 == 0): print("Processed {} cells".format(numCells)) if (len(line)) > 1: line = line.split(separator) line[-1] = line[-1].replace("\n", "") if not line[2] == "0" and not line[2] == "NaN": user_id = line[0] item_id = line[1] rating = float(line[2]) URM_builder.add_data_lists([user_id], [item_id], [rating]) fileHandle.close() return URM_builder
def test_IncrementalSparseMatrix_add_rows(self): import numpy as np n_rows = 100 n_cols = 200 randomMatrix = sps.random(n_rows, n_cols, density=0.01, format='csr') incrementalMatrix = IncrementalSparseMatrix(n_rows=n_rows, n_cols=n_cols) for row in range(n_rows): row_data = randomMatrix.indices[randomMatrix.indptr[row]:randomMatrix.indptr[row+1]] incrementalMatrix.add_single_row(row, row_data, 5.0) randomMatrix.data = np.ones_like(randomMatrix.data)*5.0 randomMatrix_incremental = incrementalMatrix.get_SparseMatrix() assert sparse_are_equals(randomMatrix, randomMatrix_incremental)
def test_IncrementalSparseMatrix_add_lists(self): n_rows = 100 n_cols = 200 randomMatrix = sps.random(n_rows, n_cols, density=0.01, format='coo') incrementalMatrix = IncrementalSparseMatrix(n_rows=n_rows, n_cols=n_cols) incrementalMatrix.add_data_lists(randomMatrix.row.copy(), randomMatrix.col.copy(), randomMatrix.data.copy()) randomMatrix_incremental = incrementalMatrix.get_SparseMatrix() assert sparse_are_equals(randomMatrix, randomMatrix_incremental)