Esempio n. 1
0
    def test_mcmc(self):
        nlevels = 5
        self.create_base_dataset(
            sample_size=1000,
            num_numeric=3,
            num_cat=4,
            num_levels=nlevels)
        ac = R.AutoClean()
        niter = 100
        nclusters = 7
        ac.train_model(self._data, nclusters=nclusters, niter=niter)

        beta = self._beta
        xdim = beta.shape[0]
        ydim = beta.shape[1]
        self.assertEqual(ac.coefficients.shape, (niter, xdim, ydim))
        self.assertEqual(ac.residual_variance.shape, (niter, ydim, ydim))

        self.assertEqual(len(ac.atom_probs), ydim)
        self.assertEqual(ac.atom_probs["X1"].shape,
                         (niter, nclusters, 1))
        self.assertTrue(
            np.allclose(ac.atom_probs["X1"].sum(axis=2),
                        np.ones((niter, nclusters)))
        )

        self.assertEqual(len(ac.atom_error_probs), ydim)
        self.assertEqual(ac.atom_error_probs["X1"].shape,
                         (niter, nclusters, 1, 2))
        self.assertTrue(np.allclose(
            ac.atom_error_probs["X1"].sum(axis=3),
            np.ones((niter, nclusters, 1))))

        self.assertEqual(len(ac.level_probs), self._ncat)
        self.assertEqual(ac.level_probs["cat1"].shape,
                         (niter, nclusters, nlevels))
        self.assertTrue(np.allclose(
            ac.level_probs["cat1"].sum(axis=2),
            np.ones((niter, nclusters))))

        self.assertEqual(len(ac.level_observation_probs), self._ncat)
        self.assertEqual(ac.level_observation_probs["cat1"].shape,
                         (niter, nclusters, nlevels, nlevels+1))
        self.assertTrue(np.allclose(
            ac.level_observation_probs["cat1"].sum(axis=3),
            np.ones((niter, nclusters, nlevels))))

        to_impute = self._data.iloc[[3, 9, 24], :]
        iterations = [50, 60, 70, 75, 90]

        imputed = ac.impute_rows(to_impute, iterations)
        self.assertEqual(len(imputed), len(iterations))
        self.assertEqual(imputed[0].shape, to_impute.shape)
Esempio n. 2
0
 def test_autoclean_construction(self):
     ac = R.AutoClean()
     ac.train_model(self._data, nclusters=3, niter=10)