def test_vstack_with_single_vector(self): expected = SparseMatrix.from_list([[1, 0, 2, 0, 3, 4, 0, 5]]) result = SparseMatrix.vstack( [SparseVector.from_list([1, 0, 2, 0, 3, 4, 0, 5])]) self.assertEqual(result.shape, (1, 8)) self.assertEqual(result, expected)
def test_identity_with_multiple_vectors(self): expected = SparseMatrix.from_list([[1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0]]) result = SparseMatrix.identity((4, 5)) self.assertEqual(result.shape, (4, 5)) self.assertEqual(result, expected)
def test_vstack_with_multiple_vectors(self): arrays = [np.random.randint(0, 10, 30) for _ in range(20)] vectors = [SparseVector.from_list(arrays[i]) for i in range(20)] expected = SparseMatrix.from_list(arrays) result = SparseMatrix.vstack(vectors) self.assertEqual(result, expected)
def test_from_list_with_no_unique_elements(self): mat = SparseMatrix.from_list([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) self.assertEqual(len(mat), 0) self.assertEqual(mat, SparseMatrix.zero((4, 4))) self.assertTrue(np.array_equal(mat.cols, np.array([]))) self.assertTrue(np.array_equal(mat.data, np.array([]))) self.assertTrue(np.array_equal(mat.rows, np.array([])))
def test_transpose_using_non_square_identity_matrix(self): mat = SparseMatrix.identity((7, 9)) expected = SparseMatrix.identity((9, 7)) result = mat.T self.assertEqual(result.shape, (9, 7)) self.assertEqual(result, expected)
def test_from_list_with_several_unique_elements(self): mat = SparseMatrix.from_list([[0, 1, 0, 0], [0, 0, 0, 2], [4, 0, 0, 0], [0, 0, 3, 0]]) self.assertEqual(len(mat), 4) self.assertTrue(np.array_equal(mat.cols, np.array([1, 3, 0, 2]))) self.assertTrue(np.array_equal(mat.data, np.array([1, 2, 4, 3]))) self.assertTrue(np.array_equal(mat.rows, np.array([0, 1, 2, 3])))
def test_to_dense_with_zero(self): mat = SparseMatrix.zero((5, 5), dtype=np.uint16) expected = np.zeros((5, 5), dtype=np.uint16) result = mat.to_dense() self.assertTrue(np.array_equal(result, expected))
def read_database(pool, stream): """ Reads a training database from the specified CSV stream using the specified processing pool to improve I/O performance. :param pool: The processing pool to use. :param stream: The CSV stream to read from. :return: A database filled with data to train on. """ classes = [] cols = [] data = [] rows = [] processor = partial(CsvIO.process_line, skip_class=False, skip_id=True) results = pool.map(processor, CsvIO.generate_lines(stream)) for index, result in enumerate(results): classes.append(result.classz) cols.extend(result.cols) data.extend(result.data) rows.extend([index] * len(result.cols)) return TrainingDatabase( np.array(classes, copy=False, dtype=np.uint8), SparseMatrix(np.array(data, copy=False, dtype=np.uint16), np.array(rows, copy=False, dtype=np.uint32), np.array(cols, copy=False, dtype=np.uint32), (max(rows) + 1, max(cols) + 1)))
def test_to_dense_with_random(self): array = np.random.randint(0, 5, (10, 10), dtype=np.uint16) mat = SparseMatrix.from_list(array) expected = np.copy(array) result = mat.to_dense() self.assertTrue(np.array_equal(result, expected))
def test_transpose_using_square_identity_matrix(self): mat = SparseMatrix.identity((5, 5)) expected = copy(mat) result = mat.T self.assertEqual(result.shape, (5, 5)) self.assertEqual(result, expected)
def select(self, classz): """ Computes the sub-matrix that represents all training examples whose classification is the specified class. :param classz: The class to select. :return: A sparse matrix of data specific to a class. """ indices = np.where(self.classes == classz)[0] return SparseMatrix.vstack([self.counts.get_row(i) for i in indices])
def test_get_rows_using_simple_matrix(self): mat = SparseMatrix.from_list([[0, 2, 0], [0, 0, 3], [1, 0, 0]]) expected = [ SparseVector.from_list([0, 2, 0]), SparseVector.from_list([0, 0, 3]), SparseVector.from_list([1, 0, 0]) ] result = mat.get_rows() for ex, res in zip(expected, result): self.assertEqual(res, ex)
def shuffle(self): """ Shuffles this training database, re-arranging the order of both the classes and data by row only. """ rows = [row for row in self.counts.get_rows()] state = np.random.get_state() for item in [self.classes, rows]: np.random.set_state(state) np.random.shuffle(item) self.counts = SparseMatrix.vstack(rows, dtype=self.counts.data.dtype)
def test_get_row_using_simple_matrix(self): mat = SparseMatrix.from_list([[0, 2, 0], [0, 0, 3], [1, 0, 0]]) expected0 = SparseVector.from_list([0, 2, 0]) expected1 = SparseVector.from_list([0, 0, 3]) expected2 = SparseVector.from_list([1, 0, 0]) result0 = mat.get_row(0) result1 = mat.get_row(1) result2 = mat.get_row(2) self.assertEqual(expected0, result0) self.assertEqual(expected1, result1) self.assertEqual(expected2, result2)
def test_read_database_with_single_row(self): src = "0,0,1,0,2,0,3,4,0,5,1" with Pool(processes=4) as pool, StringIO(src) as stream: expected = TrainingDatabase( np.ones(1, dtype=np.uint8), SparseMatrix.from_list([ [0, 1, 0, 2, 0, 3, 4, 0, 5], ])) expected.counts.cols = expected.counts.cols + 1 expected.counts.shape = (expected.counts.shape[0], expected.counts.shape[1] + 1) result = CsvIO.read_database(pool, stream) self.assertEqual(result, expected)
def test_read_database_with_multiple_rows(self): src = "0,0,1,0,2,0,3,4,0,5,1\n" +\ "1,6,0,7,0,8,9,0,10,0,2\n" +\ "2,0,11,0,12,0,13,14,0,15,1\n" with Pool(processes=4) as pool, StringIO(src) as stream: expected = TrainingDatabase( np.array([1, 2, 1], dtype=np.uint8), SparseMatrix.from_list([[0, 1, 0, 2, 0, 3, 4, 0, 5], [6, 0, 7, 0, 8, 9, 0, 10, 0], [0, 11, 0, 12, 0, 13, 14, 0, 15]])) expected.counts.cols = expected.counts.cols + 1 expected.counts.shape = (expected.counts.shape[0], expected.counts.shape[1] + 1) result = CsvIO.read_database(pool, stream) self.assertEqual(result, expected)
def test_identity_with_single_vector(self): expected = SparseMatrix.from_list([[1, 0, 0, 0, 0]]) result = SparseMatrix.identity((1, 5)) self.assertEqual(result.shape, (1, 5)) self.assertEqual(result, expected)
def test_init_throws_when_row_and_column_dimensions_are_unequal(self): with self.assertRaises(ValueError): SparseMatrix(np.zeros(10), np.zeros(10, np.int), np.zeros(8, np.int), (10, 10))
def test_init_throws_when_row_dtype_is_not_integral(self): with self.assertRaises(ValueError): SparseMatrix(np.zeros(10), np.zeros(10, np.float), np.zeros(10, np.int), (10, 10))