Beispiel #1
0
    def test_stats_sparse(self):
        X = csr_matrix(np.identity(5))
        np.testing.assert_equal(
            stats(X),
            [
                [0, 1, 0.2, 0, 4, 1],
                [0, 1, 0.2, 0, 4, 1],
                [0, 1, 0.2, 0, 4, 1],
                [0, 1, 0.2, 0, 4, 1],
                [0, 1, 0.2, 0, 4, 1],
            ],
        )

        # assure last two columns have just zero elements
        X = X[:3]
        np.testing.assert_equal(
            stats(X),
            [
                [0, 1, 1 / 3, 0, 2, 1],
                [0, 1, 1 / 3, 0, 2, 1],
                [0, 1, 1 / 3, 0, 2, 1],
                [0, 0, 0, 0, 3, 0],
                [0, 0, 0, 0, 3, 0],
            ],
        )
Beispiel #2
0
    def test_stats_weights(self):
        X = np.arange(4).reshape(2, 2).astype(float)
        weights = np.array([1, 3])
        np.testing.assert_equal(stats(X, weights), [[0, 2, 1.5, 0, 0, 2],
                                                    [1, 3, 2.5, 0, 0, 2]])

        X = np.arange(4).reshape(2, 2).astype(object)
        np.testing.assert_equal(stats(X, weights), stats(X))
Beispiel #3
0
 def test_stats_sparse(self):
     """
     Stats should not fail when trying to calculate mean on sparse data.
     GH-2357
     """
     data = Table("iris")
     sparse_x = sp.csr_matrix(data.X)
     self.assertTrue(stats(data.X).all() == stats(sparse_x).all())
Beispiel #4
0
 def test_stats_sparse(self):
     """
     Stats should not fail when trying to calculate mean on sparse data.
     GH-2357
     """
     data = Table("iris")
     sparse_x = sp.csr_matrix(data.X)
     self.assertTrue(stats(data.X).all() == stats(sparse_x).all())
Beispiel #5
0
    def test_stats_weights(self):
        X = np.arange(4).reshape(2, 2).astype(float)
        weights = np.array([1, 3])
        np.testing.assert_equal(stats(X, weights),
                                [[0, 2, 1.5, 0, 0, 2], [1, 3, 2.5, 0, 0, 2]])

        X = np.arange(4).reshape(2, 2).astype(object)
        np.testing.assert_equal(stats(X, weights), stats(X))
Beispiel #6
0
 def test_stats(self):
     X = np.arange(4).reshape(2, 2).astype(float)
     X[1, 1] = np.nan
     np.testing.assert_equal(stats(X), [[0, 2, 1, 0, 0, 2],
                                        [1, 1, 1, 0, 1, 1]])
     # empty table should return ~like metas
     X = X[:0]
     np.testing.assert_equal(stats(X), [[np.inf, -np.inf, 0, 0, 0, 0],
                                        [np.inf, -np.inf, 0, 0, 0, 0]])
Beispiel #7
0
 def test_stats(self):
     X = np.arange(4).reshape(2, 2).astype(float)
     X[1, 1] = np.nan
     np.testing.assert_equal(stats(X), [[0, 2, 1, 0, 0, 2],
                                        [1, 1, 1, 0, 1, 1]])
     # empty table should return ~like metas
     X = X[:0]
     np.testing.assert_equal(stats(X), [[np.inf, -np.inf, 0, 0, 0, 0],
                                        [np.inf, -np.inf, 0, 0, 0, 0]])
Beispiel #8
0
    def test_stats_sparse(self):
        X = csr_matrix(np.identity(5))
        np.testing.assert_equal(stats(X), [[0, 1, .2, 0, 4, 1],
                                           [0, 1, .2, 0, 4, 1],
                                           [0, 1, .2, 0, 4, 1],
                                           [0, 1, .2, 0, 4, 1],
                                           [0, 1, .2, 0, 4, 1]])

        # assure last two columns have just zero elements
        X = X[:3]
        np.testing.assert_equal(stats(X), [[0, 1, 1/3, 0, 2, 1],
                                           [0, 1, 1/3, 0, 2, 1],
                                           [0, 1, 1/3, 0, 2, 1],
                                           [0, 0, 0, 0, 3, 0],
                                           [0, 0, 0, 0, 3, 0]])
 def test_stats_sparse(self):
     X = csr_matrix(np.identity(5))
     np.testing.assert_equal(stats(X), [[0, 1, .2, 0, 4, 1],
                                        [0, 1, .2, 0, 4, 1],
                                        [0, 1, .2, 0, 4, 1],
                                        [0, 1, .2, 0, 4, 1],
                                        [0, 1, .2, 0, 4, 1]])
Beispiel #10
0
 def test_coef_table_multiple(self):
     data = Table("zoo")
     learn = LogisticRegressionLearner()
     classifier = learn(data)
     coef_table = create_coef_table(classifier)
     self.assertEqual(1, len(stats(coef_table.metas, None)))
     self.assertEqual(len(coef_table), len(classifier.domain.attributes) + 1)
     self.assertEqual(len(coef_table[0]), len(classifier.domain.class_var.values))
 def test_coef_table_single(self):
     data = Table("titanic")
     learn = LogisticRegressionLearner()
     classifier = learn(data)
     coef_table = create_coef_table(classifier)
     self.assertEqual(1, len(stats(coef_table.metas, None)))
     self.assertEqual(len(coef_table), len(classifier.domain.attributes) + 1)
     self.assertEqual(len(coef_table[0]), 1)
Beispiel #12
0
 def test_coef_table_single(self):
     data = Table("titanic")
     learn = LogisticRegressionLearner()
     classifier = learn(data)
     coef_table = create_coef_table(classifier)
     self.assertEqual(1, len(stats(coef_table.metas, None)))
     self.assertEqual(len(coef_table), len(classifier.domain.attributes) + 1)
     self.assertEqual(len(coef_table[0]), 1)
Beispiel #13
0
 def test_stats_non_numeric(self):
     X = np.array([
         ['', 'a', 'b'],
         ['a', '', 'b'],
         ['a', 'b', ''],
     ], dtype=object)
     np.testing.assert_equal(stats(X), [[np.inf, -np.inf, 0, 0, 1, 2],
                                        [np.inf, -np.inf, 0, 0, 1, 2],
                                        [np.inf, -np.inf, 0, 0, 1, 2]])
 def test_coef_table_multiple(self):
     data = Table("zoo")
     learn = LogisticRegressionLearner()
     classifier = learn(data)
     coef_table = create_coef_table(classifier)
     self.assertEqual(1, len(stats(coef_table.metas, None)))
     self.assertEqual(len(coef_table), len(classifier.domain.attributes) + 1)
     self.assertEqual(len(coef_table[0]),
                      len(classifier.domain.class_var.values))
Beispiel #15
0
 def test_stats_non_numeric(self):
     X = np.array([
         ['', 'a', 'b'],
         ['a', '', 'b'],
         ['a', 'b', ''],
     ], dtype=object)
     np.testing.assert_equal(stats(X), [[np.inf, -np.inf, 0, 0, 1, 2],
                                        [np.inf, -np.inf, 0, 0, 1, 2],
                                        [np.inf, -np.inf, 0, 0, 1, 2]])
Beispiel #16
0
 def test_stats_non_numeric(self):
     X = np.array([
         ["", "a", np.nan, 0],
         ["a", "", np.nan, 1],
         ["a", "b", 0, 0],
     ], dtype=object)
     np.testing.assert_equal(stats(X), [[np.inf, -np.inf, 0, 0, 1, 2],
                                        [np.inf, -np.inf, 0, 0, 1, 2],
                                        [np.inf, -np.inf, 0, 0, 2, 1],
                                        [np.inf, -np.inf, 0, 0, 0, 3]])
Beispiel #17
0
 def test_stats_non_numeric(self):
     X = np.array([
         ["", "a", np.nan, 0],
         ["a", "", np.nan, 1],
         ["a", "b", 0, 0],
     ], dtype=object)
     np.testing.assert_equal(stats(X), [[np.inf, -np.inf, 0, 0, 1, 2],
                                        [np.inf, -np.inf, 0, 0, 1, 2],
                                        [np.inf, -np.inf, 0, 0, 2, 1],
                                        [np.inf, -np.inf, 0, 0, 0, 3]])
Beispiel #18
0
    def test_stats_long_string_mem_use(self):
        X = np.full((1000, 1000), "a", dtype=object)
        t = time.time()
        stats(X)
        t_a = time.time() - t  # time for an array with constant-len strings

        # Add one very long string
        X[0, 0] = "a" * 2000

        # The implementation of stats() in Orange 3.30.2 used .astype("str")
        # internally. X.astype("str") would take ~1000x the memory as X,
        # because its type would be "<U1000" (the length of the longest string).
        # That is about 7.5 GiB of memory on a 64-bit Linux system

        # Because it is hard to measure CPU, we here measure time as
        # memory allocation of such big tables takes time. On Marko's
        # Linux system .astype("str") took ~3 seconds.
        t = time.time()
        stats(X)
        t_b = time.time() - t
        self.assertLess(t_b, 2 * t_a + 0.1)  # some grace period
Beispiel #19
0
 def test_stats_weights_sparse(self):
     X = np.arange(4).reshape(2, 2).astype(float)
     X = csr_matrix(X)
     weights = np.array([1, 3])
     np.testing.assert_equal(stats(X, weights),
                             [[0, 2, 1.5, 0, 1, 1], [1, 3, 2.5, 0, 0, 2]])
Beispiel #20
0
 def test_stats(self):
     X = np.arange(4).reshape(2, 2).astype(float)
     X[1, 1] = np.nan
     np.testing.assert_equal(stats(X), [[0, 2, 1, 0, 0, 2],
                                        [1, 1, 1, 0, 1, 1]])
Beispiel #21
0
 def test_stats_weights_sparse(self):
     X = np.arange(4).reshape(2, 2).astype(float)
     X = csr_matrix(X)
     weights = np.array([1, 3])
     np.testing.assert_equal(stats(X, weights), [[0, 2, 1.5, 0, 1, 1],
                                                 [1, 3, 2.5, 0, 0, 2]])