def setUpClass(cls): """Setup the model only once, since it is expensive and can be copied""" cls.data = crabs.mixed() cls.model = MixableCondGaussianModel('crabs').fit(cls.data) cls.model.mode = 'model' # crabs has columns: 'species', 'sex', 'FL', 'RW', 'CL', 'CW', 'BD' cls.cols = list(cls.data.columns)
class TestParallelProcessing(unittest.TestCase): def setUp(self): self.data = test_iris.mixed() self.model = MixableCondGaussianModel("TestMod") self.model.fit(df=self.data, fit_algo="map") pass def test_prob(self): pred = ['sepal_width', 'sepal_length', Aggregation(['sepal_length', 'sepal_width'], method='probability', yields=None, args=None)] # TODO: use Split instead of SplitTuple split = [SplitTuple(name='sepal_width', method='equiinterval', args=[10]), SplitTuple(name='sepal_length', method='equiinterval', args=[10])] self.model.parallel_processing = True df_parallel = self.model.predict(predict=pred, splitby=split) self.model.parallel_processing = False df_serial = self.model.predict(predict=pred, splitby=split) self.assertTrue(df_parallel.equals(df_serial)) def test_maximum(self): # TODO: use Split instead of SplitTuple, and Aggregation instead of AggregationTuple pred = ['sepal_width', 'sepal_length', AggregationTuple(name=['species'], method='maximum', yields='species', args=[])] split = [SplitTuple(name='sepal_width', method='equiinterval', args=[10]), SplitTuple(name='sepal_length', method='equiinterval', args=[10])] self.model.parallel_processing = True df_parallel = self.model.predict(predict=pred, splitby=split) self.model.parallel_processing = False df_serial = self.model.predict(predict=pred, splitby=split) self.assertTrue(df_parallel.equals(df_serial))
def test_samplequality(self): samples1 = self.model._sample(250) data1 = pd.DataFrame(data=samples1, columns=self.model.names) testmod1 = MixCondGauss("Allbus_test1") testmod1.fit(df=data1) error1 = ((np.array(testmod1._mu).ravel() - np.array(self.model._mu).ravel())**2).sum() samples2 = self.model._sample(500) data2 = pd.DataFrame(data=samples2, columns=self.model.names) testmod2 = MixCondGauss("Allbus_test2") testmod2.fit(df=data2) error2 = ((np.array(testmod2._mu).ravel() - np.array(self.model._mu).ravel())**2).sum() self.assertTrue(error2 < error1)
class TestMethods(unittest.TestCase): def setUp(self): self.data = ta.mixed() self.model = MixCondGauss("TestMod") self.model.fit(df=self.data) pass def test_basics(self): self.assertEqual(self.model.name, 'TestMod') self.assertEqual(len(self.model._sample(5)), 5) def test_samplequality(self): samples1 = self.model._sample(250) data1 = pd.DataFrame(data=samples1, columns=self.model.names) testmod1 = MixCondGauss("Allbus_test1") testmod1.fit(df=data1) error1 = ((np.array(testmod1._mu).ravel() - np.array(self.model._mu).ravel())**2).sum() samples2 = self.model._sample(500) data2 = pd.DataFrame(data=samples2, columns=self.model.names) testmod2 = MixCondGauss("Allbus_test2") testmod2.fit(df=data2) error2 = ((np.array(testmod2._mu).ravel() - np.array(self.model._mu).ravel())**2).sum() self.assertTrue(error2 < error1) def test_onlycats(self): margmod = self.model.copy().marginalize(keep=['sex']) self.assertEqual(type(margmod._sample(1)[0][0]), type('str')) def test_onlynumericals(self): margmod = self.model.copy().marginalize(keep=['age']) self.assertEqual(type(margmod._sample(1)[0][0]), type(0.12345))
def setUp(self): self.data = test_iris.mixed() self.model = MixableCondGaussianModel("TestMod") self.model.fit(df=self.data, fit_algo="map") pass
def setUp(self): self.data = ta.mixed() self.model = MixCondGauss("TestMod") self.model.fit(df=self.data) pass
res = model.predict(['sex', 'RW', Aggregation(FL, method='maximum', yields='FL')], splitby=[Split(sex), Split(RW, method='equidist')]) print(res) res = model.predict(['species', Density(species)], splitby=Split(species)) print(res) res = model.predict(['FL', Density(FL)], splitby=Split(FL, method='equidist')) print(res) res = model.predict([Aggregation(FL, method='maximum', yields='FL')]) print(res) res = model.predict([Aggregation(FL, method='maximum', yields='FL')], splitby=[Split(sex)]) print(res) res = model.predict(['sex', Aggregation(FL, method='maximum', yields='FL')], splitby=[Split(sex)]) print(res) res = model.predict(['FL', Probability(FL)], splitby=Split(FL, method='equiinterval')) print(res) res = model.predict(['sex', Density(sex)], splitby=Split(sex)) print(res) res = model.predict(['sex', Density(sex), Aggregation(FL, method='maximum', yields='FL')], splitby=[Split(sex)]) print(res) model = CondGaussian("my_model").fit(df=data) res = model.predict(['sex', Density(sex)], splitby=Split(sex)) print(res)