Example #1
0
 def test_sparse_matrix(self):
     rows = Rows(TestFeatureEncoder.rows)
     fe = FeatureEncoder(rows)
     inv_idx = fe.make_sparse_matrix()
     expected = csr_matrix(((3, 2, 1, 3, 2, 1),
                            ((0, 0, 0, 1, 1, 1), (0, 1, 2, 0, 1, 3))),
                           shape=(2, 4),
                           dtype=int32)
     to_string = lambda mat: str(mat.toarray()).replace('\n', ',')
     # scipy.sparse.matrix.nnz: number of non-zero values
     self.assertEqual(abs(inv_idx - expected).nnz, 0,
                      "{} != {}".format(to_string(expected),
                                        to_string(inv_idx)))
Example #2
0
 def test_sparse_matrix(self):
     rows = Rows(TestFeatureEncoder.rows)
     fe = FeatureEncoder(rows)
     inv_idx = fe.make_sparse_matrix()
     expected = csr_matrix(
         ((3, 2, 1, 3, 2, 1), ((0, 0, 0, 1, 1, 1), (0, 1, 2, 0, 1, 3))),
         shape=(2, 4),
         dtype=int32)
     to_string = lambda mat: str(mat.toarray()).replace('\n', ',')
     # scipy.sparse.matrix.nnz: number of non-zero values
     self.assertEqual(
         abs(inv_idx - expected).nnz, 0,
         "{} != {}".format(to_string(expected), to_string(inv_idx)))
Example #3
0
    def test_multirow_token_generator(self):
        rows = Rows(TestFeatureEncoder.rows)
        rows.names = ("id", "text1", "text2", "label")
        rows.text_columns = (1, 2)
        fe = FeatureEncoder(rows)

        for instance in range(2):
            expected = []
            for i in range(1, 3):
                for t in rows.rows[instance][i]:
                    expected.append('text{}={}'.format(i, t))
            gen = fe._multirow_token_generator(rows.rows[instance])
            received = []

            for idx, token in enumerate(gen):
                self.assertEqual(expected[idx], token)
                received.append(token)

            self.assertListEqual(expected, received)
Example #4
0
    def test_multirow_token_generator(self):
        rows = Rows(TestFeatureEncoder.rows)
        rows.names = ("id", "text1", "text2", "label")
        rows.text_columns = (1, 2)
        fe = FeatureEncoder(rows)

        for instance in range(2):
            expected = []
            for i in range(1, 3):
                for t in rows.rows[instance][i]:
                    expected.append('text{}={}'.format(i, t))
            gen = fe._multirow_token_generator(rows.rows[instance])
            received = []

            for idx, token in enumerate(gen):
                self.assertEqual(expected[idx], token)
                received.append(token)

            self.assertListEqual(expected, received)
Example #5
0
 def test_make_vocabulary(self):
     rows = Rows(TestFeatureEncoder.rows)
     fe = FeatureEncoder(rows)
     fe.make_sparse_matrix()
     self.assertEqual({'a': 0, 'b': 1, 'c': 2, 'd': 3}, fe.vocabulary)
Example #6
0
 def test_make_vocabulary(self):
     rows = Rows(TestFeatureEncoder.rows)
     fe = FeatureEncoder(rows)
     fe.make_sparse_matrix()
     self.assertEqual({'a': 0, 'b': 1, 'c': 2, 'd': 3}, fe.vocabulary)
Example #7
0
 def test_init(self):
     rows = Rows(TestFeatureEncoder.rows)
     fe = FeatureEncoder(rows)
     self.assertEqual(None, fe.vocabulary)
     self.assertEqual(0, fe.id_col)
     self.assertEqual(-1, fe.label_col)