Ejemplo n.º 1
0
    def test_replacement(self):
        nan = np.nan
        X = [
            [1.0, nan, 0.0],
            [2.0, 1.0, 3.0],
            [nan, nan, nan]
        ]
        unknowns = np.isnan(X)

        domain = data.Domain(
            (data.DiscreteVariable("A", values=["0", "1", "2"]),
             data.ContinuousVariable("B"),
             data.ContinuousVariable("C"))
        )
        table = data.Table.from_numpy(domain, np.array(X))

        v1 = impute.Random()(table, domain[0])
        v2 = impute.Random()(table, domain[1])
        v3 = impute.Random()(table, domain[2])

        self.assertTrue(np.all(np.isfinite(v1.compute_value(table))))
        self.assertTrue(np.all(np.isfinite(v2.compute_value(table))))
        self.assertTrue(np.all(np.isfinite(v3.compute_value(table))))

        imputer = preprocess.Impute(method=impute.Random())
        itable = imputer(table)
        self.assertTrue(np.all(np.isfinite(itable.X)))

        # Original data should keep unknowns
        self.assertTrue(np.all(unknowns == np.isnan(table.X)))
        self.assertTrue(np.all(itable.X[~unknowns] == table.X[~unknowns]))
Ejemplo n.º 2
0
 def createinstance(params):
     params = dict(params)
     method = params.pop("method", ImputeEditor.Average)
     if method == ImputeEditor.NoImputation:
         return None
     elif method == ImputeEditor.Average:
         return preprocess.Impute()
     elif method == ImputeEditor.Model:
         return preprocess.Impute(method=preprocess.impute.Model())
     elif method == ImputeEditor.DropRows:
         return RemoveNaNRows()
     elif method == ImputeEditor.DropColumns:
         return preprocess.RemoveNaNColumns()
     else:
         method, defaults = ImputeEditor.Imputers[method]
         defaults = dict(defaults)
         defaults.update(params)
         return preprocess.Impute(method=method)
Ejemplo n.º 3
0
    def test_replacement(self):
        nan = np.nan
        X = [[1.0, nan, 0.0], [2.0, 1.0, 3.0], [nan, nan, nan]]

        table = Table.from_numpy(None, np.array(X))
        var1 = impute.Default(0.0)(table, 0)
        self.assertTrue(np.all(np.isfinite(var1.compute_value(table))))
        self.assertTrue(all(var1.compute_value(table) == [1.0, 2.0, 0.0]))

        imputer = preprocess.Impute(method=impute.Default(42))
        idata = imputer(table)
        np.testing.assert_allclose(
            idata.X, [[1.0, 42., 0.0], [2.0, 1.0, 3.0], [42., 42., 42.]])
Ejemplo n.º 4
0
    def test_replacement(self):
        from Orange.classification import MajorityLearner, SimpleTreeLearner
        from Orange.regression import MeanLearner

        nan = np.nan
        X = [
            [1.0, nan, 0.0],
            [2.0, 1.0, 3.0],
            [nan, nan, nan]
        ]
        unknowns = np.isnan(X)

        domain = data.Domain(
            (data.DiscreteVariable("A", values=["0", "1", "2"]),
             data.ContinuousVariable("B"),
             data.ContinuousVariable("C"))
        )
        table = data.Table.from_numpy(domain, np.array(X))

        v = impute.Model(MajorityLearner())(table, domain[0])
        self.assertTrue(np.all(np.isfinite(v.compute_value(table))))
        self.assertTrue(np.all(v.compute_value(table) == [1., 2., 1.])
                        or np.all(v.compute_value(table) == [1., 2., 2.]))
        v = impute.Model(MeanLearner())(table, domain[1])
        self.assertTrue(np.all(np.isfinite(v.compute_value(table))))
        self.assertTrue(np.all(v.compute_value(table) == [1., 1., 1.]))

        imputer = preprocess.Impute(impute.Model(SimpleTreeLearner()))
        itable = imputer(table)

        # Original data should keep unknowns
        self.assertTrue(np.all(np.isnan(table.X) == unknowns))
        self.assertTrue(np.all(itable.X[~unknowns] == table.X[~unknowns]))

        Aimp = itable.domain["A"].compute_value
        self.assertIsInstance(Aimp, impute.ReplaceUnknownsModel)

        col = Aimp(table)
        self.assertEqual(col.shape, (len(table),))
        self.assertTrue(np.all(np.isfinite(col)))

        v = Aimp(table[-1])
        self.assertEqual(v.shape, (1,))
        self.assertTrue(np.all(np.isfinite(v)))
Ejemplo n.º 5
0
    def test_replacement(self):
        nan = np.nan
        X = [[1.0, nan, 0.0], [2.0, 1.0, 3.0], [nan, nan, nan]]
        unknowns = np.isnan(X)

        domain = data.Domain(
            (data.DiscreteVariable("A", values=("0", "1", "2")),
             data.ContinuousVariable("B"), data.ContinuousVariable("C")),
            # the class is here to ensure the backmapper in model does not
            # run and raise exception
            data.DiscreteVariable("Z", values=("P", "M")))
        table = data.Table.from_numpy(domain, np.array(X), [
            0,
        ] * 3)

        v = impute.Model(MajorityLearner())(table, domain[0])
        self.assertTrue(np.all(np.isfinite(v.compute_value(table))))
        self.assertTrue(
            np.all(v.compute_value(table) == [1., 2., 1.])
            or np.all(v.compute_value(table) == [1., 2., 2.]))
        v = impute.Model(MeanLearner())(table, domain[1])
        self.assertTrue(np.all(np.isfinite(v.compute_value(table))))
        self.assertTrue(np.all(v.compute_value(table) == [1., 1., 1.]))

        imputer = preprocess.Impute(impute.Model(SimpleTreeLearner()))
        itable = imputer(table)

        # Original data should keep unknowns
        self.assertTrue(np.all(np.isnan(table.X) == unknowns))
        self.assertTrue(np.all(itable.X[~unknowns] == table.X[~unknowns]))

        Aimp = itable.domain["A"].compute_value
        self.assertIsInstance(Aimp, impute.ReplaceUnknownsModel)

        col = Aimp(table)
        self.assertEqual(col.shape, (len(table), ))
        self.assertTrue(np.all(np.isfinite(col)))

        v = Aimp(table[-1])
        self.assertEqual(v.shape, (1, ))
        self.assertTrue(np.all(np.isfinite(v)))
Ejemplo n.º 6
0
 def test_imputer(self):
     auto = data.Table(test_filename('datasets/imports-85.tab'))
     auto2 = preprocess.Impute()(auto)
     self.assertFalse(np.isnan(auto2.X).any())
 def test_imputer(self):
     auto = data.Table('auto-mpg')
     auto2 = preprocess.Impute()(auto)
     self.assertFalse(np.isnan(auto2.X).any())