コード例 #1
0
    def test_vstack_with_single_vector(self):
        expected = SparseMatrix.from_list([[1, 0, 2, 0, 3, 4, 0, 5]])
        result = SparseMatrix.vstack(
            [SparseVector.from_list([1, 0, 2, 0, 3, 4, 0, 5])])

        self.assertEqual(result.shape, (1, 8))
        self.assertEqual(result, expected)
コード例 #2
0
    def test_identity_with_multiple_vectors(self):
        expected = SparseMatrix.from_list([[1, 0, 0, 0, 0], [0, 1, 0, 0, 0],
                                           [0, 0, 1, 0, 0], [0, 0, 0, 1, 0]])
        result = SparseMatrix.identity((4, 5))

        self.assertEqual(result.shape, (4, 5))
        self.assertEqual(result, expected)
コード例 #3
0
    def test_vstack_with_multiple_vectors(self):
        arrays = [np.random.randint(0, 10, 30) for _ in range(20)]
        vectors = [SparseVector.from_list(arrays[i]) for i in range(20)]

        expected = SparseMatrix.from_list(arrays)
        result = SparseMatrix.vstack(vectors)

        self.assertEqual(result, expected)
コード例 #4
0
 def test_from_list_with_no_unique_elements(self):
     mat = SparseMatrix.from_list([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0],
                                   [0, 0, 0, 0]])
     self.assertEqual(len(mat), 0)
     self.assertEqual(mat, SparseMatrix.zero((4, 4)))
     self.assertTrue(np.array_equal(mat.cols, np.array([])))
     self.assertTrue(np.array_equal(mat.data, np.array([])))
     self.assertTrue(np.array_equal(mat.rows, np.array([])))
コード例 #5
0
    def test_transpose_using_non_square_identity_matrix(self):
        mat = SparseMatrix.identity((7, 9))

        expected = SparseMatrix.identity((9, 7))
        result = mat.T

        self.assertEqual(result.shape, (9, 7))
        self.assertEqual(result, expected)
コード例 #6
0
 def test_from_list_with_several_unique_elements(self):
     mat = SparseMatrix.from_list([[0, 1, 0, 0], [0, 0, 0, 2], [4, 0, 0, 0],
                                   [0, 0, 3, 0]])
     self.assertEqual(len(mat), 4)
     self.assertTrue(np.array_equal(mat.cols, np.array([1, 3, 0, 2])))
     self.assertTrue(np.array_equal(mat.data, np.array([1, 2, 4, 3])))
     self.assertTrue(np.array_equal(mat.rows, np.array([0, 1, 2, 3])))
コード例 #7
0
    def test_to_dense_with_zero(self):
        mat = SparseMatrix.zero((5, 5), dtype=np.uint16)

        expected = np.zeros((5, 5), dtype=np.uint16)
        result = mat.to_dense()

        self.assertTrue(np.array_equal(result, expected))
コード例 #8
0
ファイル: io.py プロジェクト: wbknez/word-categorization
    def read_database(pool, stream):
        """
        Reads a training database from the specified CSV stream using the
        specified processing pool to improve I/O performance.

        :param pool: The processing pool to use.
        :param stream: The CSV stream to read from.
        :return: A database filled with data to train on.
        """
        classes = []
        cols = []
        data = []
        rows = []

        processor = partial(CsvIO.process_line, skip_class=False, skip_id=True)
        results = pool.map(processor, CsvIO.generate_lines(stream))

        for index, result in enumerate(results):
            classes.append(result.classz)
            cols.extend(result.cols)
            data.extend(result.data)
            rows.extend([index] * len(result.cols))

        return TrainingDatabase(
            np.array(classes, copy=False, dtype=np.uint8),
            SparseMatrix(np.array(data, copy=False, dtype=np.uint16),
                         np.array(rows, copy=False, dtype=np.uint32),
                         np.array(cols, copy=False, dtype=np.uint32),
                         (max(rows) + 1, max(cols) + 1)))
コード例 #9
0
    def test_to_dense_with_random(self):
        array = np.random.randint(0, 5, (10, 10), dtype=np.uint16)
        mat = SparseMatrix.from_list(array)

        expected = np.copy(array)
        result = mat.to_dense()

        self.assertTrue(np.array_equal(result, expected))
コード例 #10
0
    def test_transpose_using_square_identity_matrix(self):
        mat = SparseMatrix.identity((5, 5))

        expected = copy(mat)
        result = mat.T

        self.assertEqual(result.shape, (5, 5))
        self.assertEqual(result, expected)
コード例 #11
0
    def select(self, classz):
        """
        Computes the sub-matrix that represents all training examples whose
        classification is the specified class.

        :param classz: The class to select.
        :return: A sparse matrix of data specific to a class.
        """
        indices = np.where(self.classes == classz)[0]
        return SparseMatrix.vstack([self.counts.get_row(i) for i in indices])
コード例 #12
0
    def test_get_rows_using_simple_matrix(self):
        mat = SparseMatrix.from_list([[0, 2, 0], [0, 0, 3], [1, 0, 0]])

        expected = [
            SparseVector.from_list([0, 2, 0]),
            SparseVector.from_list([0, 0, 3]),
            SparseVector.from_list([1, 0, 0])
        ]
        result = mat.get_rows()

        for ex, res in zip(expected, result):
            self.assertEqual(res, ex)
コード例 #13
0
    def shuffle(self):
        """
        Shuffles this training database, re-arranging the order of both the
        classes and data by row only.
        """
        rows = [row for row in self.counts.get_rows()]
        state = np.random.get_state()

        for item in [self.classes, rows]:
            np.random.set_state(state)
            np.random.shuffle(item)

        self.counts = SparseMatrix.vstack(rows, dtype=self.counts.data.dtype)
コード例 #14
0
    def test_get_row_using_simple_matrix(self):
        mat = SparseMatrix.from_list([[0, 2, 0], [0, 0, 3], [1, 0, 0]])

        expected0 = SparseVector.from_list([0, 2, 0])
        expected1 = SparseVector.from_list([0, 0, 3])
        expected2 = SparseVector.from_list([1, 0, 0])

        result0 = mat.get_row(0)
        result1 = mat.get_row(1)
        result2 = mat.get_row(2)

        self.assertEqual(expected0, result0)
        self.assertEqual(expected1, result1)
        self.assertEqual(expected2, result2)
コード例 #15
0
    def test_read_database_with_single_row(self):
        src = "0,0,1,0,2,0,3,4,0,5,1"

        with Pool(processes=4) as pool, StringIO(src) as stream:
            expected = TrainingDatabase(
                np.ones(1, dtype=np.uint8),
                SparseMatrix.from_list([
                    [0, 1, 0, 2, 0, 3, 4, 0, 5],
                ]))
            expected.counts.cols = expected.counts.cols + 1
            expected.counts.shape = (expected.counts.shape[0],
                                     expected.counts.shape[1] + 1)

            result = CsvIO.read_database(pool, stream)

            self.assertEqual(result, expected)
コード例 #16
0
    def test_read_database_with_multiple_rows(self):
        src = "0,0,1,0,2,0,3,4,0,5,1\n" +\
              "1,6,0,7,0,8,9,0,10,0,2\n" +\
              "2,0,11,0,12,0,13,14,0,15,1\n"

        with Pool(processes=4) as pool, StringIO(src) as stream:
            expected = TrainingDatabase(
                np.array([1, 2, 1], dtype=np.uint8),
                SparseMatrix.from_list([[0, 1, 0, 2, 0, 3, 4, 0, 5],
                                        [6, 0, 7, 0, 8, 9, 0, 10, 0],
                                        [0, 11, 0, 12, 0, 13, 14, 0, 15]]))
            expected.counts.cols = expected.counts.cols + 1
            expected.counts.shape = (expected.counts.shape[0],
                                     expected.counts.shape[1] + 1)

            result = CsvIO.read_database(pool, stream)

            self.assertEqual(result, expected)
コード例 #17
0
    def test_identity_with_single_vector(self):
        expected = SparseMatrix.from_list([[1, 0, 0, 0, 0]])
        result = SparseMatrix.identity((1, 5))

        self.assertEqual(result.shape, (1, 5))
        self.assertEqual(result, expected)
コード例 #18
0
 def test_init_throws_when_row_and_column_dimensions_are_unequal(self):
     with self.assertRaises(ValueError):
         SparseMatrix(np.zeros(10), np.zeros(10, np.int),
                      np.zeros(8, np.int), (10, 10))
コード例 #19
0
 def test_init_throws_when_row_dtype_is_not_integral(self):
     with self.assertRaises(ValueError):
         SparseMatrix(np.zeros(10), np.zeros(10, np.float),
                      np.zeros(10, np.int), (10, 10))