def test_replacement(self): nan = np.nan X = [ [1.0, nan, 0.0], [2.0, 1.0, 3.0], [nan, nan, nan] ] unknowns = np.isnan(X) domain = data.Domain( (data.DiscreteVariable("A", values=["0", "1", "2"]), data.ContinuousVariable("B"), data.ContinuousVariable("C")) ) table = data.Table.from_numpy(domain, np.array(X)) v1 = impute.Random()(table, domain[0]) v2 = impute.Random()(table, domain[1]) v3 = impute.Random()(table, domain[2]) self.assertTrue(np.all(np.isfinite(v1.compute_value(table)))) self.assertTrue(np.all(np.isfinite(v2.compute_value(table)))) self.assertTrue(np.all(np.isfinite(v3.compute_value(table)))) imputer = preprocess.Impute(method=impute.Random()) itable = imputer(table) self.assertTrue(np.all(np.isfinite(itable.X))) # Original data should keep unknowns self.assertTrue(np.all(unknowns == np.isnan(table.X))) self.assertTrue(np.all(itable.X[~unknowns] == table.X[~unknowns]))
def createinstance(params): params = dict(params) method = params.pop("method", ImputeEditor.Average) if method == ImputeEditor.NoImputation: return None elif method == ImputeEditor.Average: return preprocess.Impute() elif method == ImputeEditor.Model: return preprocess.Impute(method=preprocess.impute.Model()) elif method == ImputeEditor.DropRows: return RemoveNaNRows() elif method == ImputeEditor.DropColumns: return preprocess.RemoveNaNColumns() else: method, defaults = ImputeEditor.Imputers[method] defaults = dict(defaults) defaults.update(params) return preprocess.Impute(method=method)
def test_replacement(self): nan = np.nan X = [[1.0, nan, 0.0], [2.0, 1.0, 3.0], [nan, nan, nan]] table = Table.from_numpy(None, np.array(X)) var1 = impute.Default(0.0)(table, 0) self.assertTrue(np.all(np.isfinite(var1.compute_value(table)))) self.assertTrue(all(var1.compute_value(table) == [1.0, 2.0, 0.0])) imputer = preprocess.Impute(method=impute.Default(42)) idata = imputer(table) np.testing.assert_allclose( idata.X, [[1.0, 42., 0.0], [2.0, 1.0, 3.0], [42., 42., 42.]])
def test_replacement(self): from Orange.classification import MajorityLearner, SimpleTreeLearner from Orange.regression import MeanLearner nan = np.nan X = [ [1.0, nan, 0.0], [2.0, 1.0, 3.0], [nan, nan, nan] ] unknowns = np.isnan(X) domain = data.Domain( (data.DiscreteVariable("A", values=["0", "1", "2"]), data.ContinuousVariable("B"), data.ContinuousVariable("C")) ) table = data.Table.from_numpy(domain, np.array(X)) v = impute.Model(MajorityLearner())(table, domain[0]) self.assertTrue(np.all(np.isfinite(v.compute_value(table)))) self.assertTrue(np.all(v.compute_value(table) == [1., 2., 1.]) or np.all(v.compute_value(table) == [1., 2., 2.])) v = impute.Model(MeanLearner())(table, domain[1]) self.assertTrue(np.all(np.isfinite(v.compute_value(table)))) self.assertTrue(np.all(v.compute_value(table) == [1., 1., 1.])) imputer = preprocess.Impute(impute.Model(SimpleTreeLearner())) itable = imputer(table) # Original data should keep unknowns self.assertTrue(np.all(np.isnan(table.X) == unknowns)) self.assertTrue(np.all(itable.X[~unknowns] == table.X[~unknowns])) Aimp = itable.domain["A"].compute_value self.assertIsInstance(Aimp, impute.ReplaceUnknownsModel) col = Aimp(table) self.assertEqual(col.shape, (len(table),)) self.assertTrue(np.all(np.isfinite(col))) v = Aimp(table[-1]) self.assertEqual(v.shape, (1,)) self.assertTrue(np.all(np.isfinite(v)))
def test_replacement(self): nan = np.nan X = [[1.0, nan, 0.0], [2.0, 1.0, 3.0], [nan, nan, nan]] unknowns = np.isnan(X) domain = data.Domain( (data.DiscreteVariable("A", values=("0", "1", "2")), data.ContinuousVariable("B"), data.ContinuousVariable("C")), # the class is here to ensure the backmapper in model does not # run and raise exception data.DiscreteVariable("Z", values=("P", "M"))) table = data.Table.from_numpy(domain, np.array(X), [ 0, ] * 3) v = impute.Model(MajorityLearner())(table, domain[0]) self.assertTrue(np.all(np.isfinite(v.compute_value(table)))) self.assertTrue( np.all(v.compute_value(table) == [1., 2., 1.]) or np.all(v.compute_value(table) == [1., 2., 2.])) v = impute.Model(MeanLearner())(table, domain[1]) self.assertTrue(np.all(np.isfinite(v.compute_value(table)))) self.assertTrue(np.all(v.compute_value(table) == [1., 1., 1.])) imputer = preprocess.Impute(impute.Model(SimpleTreeLearner())) itable = imputer(table) # Original data should keep unknowns self.assertTrue(np.all(np.isnan(table.X) == unknowns)) self.assertTrue(np.all(itable.X[~unknowns] == table.X[~unknowns])) Aimp = itable.domain["A"].compute_value self.assertIsInstance(Aimp, impute.ReplaceUnknownsModel) col = Aimp(table) self.assertEqual(col.shape, (len(table), )) self.assertTrue(np.all(np.isfinite(col))) v = Aimp(table[-1]) self.assertEqual(v.shape, (1, )) self.assertTrue(np.all(np.isfinite(v)))
def test_imputer(self): auto = data.Table(test_filename('datasets/imports-85.tab')) auto2 = preprocess.Impute()(auto) self.assertFalse(np.isnan(auto2.X).any())
def test_imputer(self): auto = data.Table('auto-mpg') auto2 = preprocess.Impute()(auto) self.assertFalse(np.isnan(auto2.X).any())