예제 #1
0
 def setUpClass(cls):
     """Setup the model only once, since it is expensive and can be copied"""
     cls.data = crabs.mixed()
     cls.model = MixableCondGaussianModel('crabs').fit(cls.data)
     cls.model.mode = 'model'
     # crabs has columns: 'species', 'sex', 'FL', 'RW', 'CL', 'CW', 'BD'
     cls.cols = list(cls.data.columns)
예제 #2
0
class TestParallelProcessing(unittest.TestCase):

    def setUp(self):
        self.data = test_iris.mixed()
        self.model = MixableCondGaussianModel("TestMod")
        self.model.fit(df=self.data, fit_algo="map")
        pass

    def test_prob(self):
        pred = ['sepal_width', 'sepal_length', Aggregation(['sepal_length', 'sepal_width'], method='probability', yields=None, args=None)]
        # TODO: use Split instead of SplitTuple
        split = [SplitTuple(name='sepal_width', method='equiinterval', args=[10]), SplitTuple(name='sepal_length', method='equiinterval', args=[10])]

        self.model.parallel_processing = True
        df_parallel = self.model.predict(predict=pred, splitby=split)

        self.model.parallel_processing = False
        df_serial = self.model.predict(predict=pred, splitby=split)
        self.assertTrue(df_parallel.equals(df_serial))

    def test_maximum(self):
        # TODO: use Split instead of SplitTuple, and Aggregation instead of AggregationTuple
        pred = ['sepal_width', 'sepal_length', AggregationTuple(name=['species'], method='maximum', yields='species', args=[])]
        split = [SplitTuple(name='sepal_width', method='equiinterval', args=[10]), SplitTuple(name='sepal_length', method='equiinterval', args=[10])]

        self.model.parallel_processing = True
        df_parallel = self.model.predict(predict=pred, splitby=split)

        self.model.parallel_processing = False
        df_serial = self.model.predict(predict=pred, splitby=split)

        self.assertTrue(df_parallel.equals(df_serial))
예제 #3
0
    def test_samplequality(self):

        samples1 = self.model._sample(250)
        data1 = pd.DataFrame(data=samples1, columns=self.model.names)
        testmod1 = MixCondGauss("Allbus_test1")
        testmod1.fit(df=data1)
        error1 = ((np.array(testmod1._mu).ravel() -
                   np.array(self.model._mu).ravel())**2).sum()

        samples2 = self.model._sample(500)
        data2 = pd.DataFrame(data=samples2, columns=self.model.names)
        testmod2 = MixCondGauss("Allbus_test2")
        testmod2.fit(df=data2)
        error2 = ((np.array(testmod2._mu).ravel() -
                   np.array(self.model._mu).ravel())**2).sum()

        self.assertTrue(error2 < error1)
예제 #4
0
class TestMethods(unittest.TestCase):
    def setUp(self):
        self.data = ta.mixed()
        self.model = MixCondGauss("TestMod")
        self.model.fit(df=self.data)
        pass

    def test_basics(self):
        self.assertEqual(self.model.name, 'TestMod')
        self.assertEqual(len(self.model._sample(5)), 5)

    def test_samplequality(self):

        samples1 = self.model._sample(250)
        data1 = pd.DataFrame(data=samples1, columns=self.model.names)
        testmod1 = MixCondGauss("Allbus_test1")
        testmod1.fit(df=data1)
        error1 = ((np.array(testmod1._mu).ravel() -
                   np.array(self.model._mu).ravel())**2).sum()

        samples2 = self.model._sample(500)
        data2 = pd.DataFrame(data=samples2, columns=self.model.names)
        testmod2 = MixCondGauss("Allbus_test2")
        testmod2.fit(df=data2)
        error2 = ((np.array(testmod2._mu).ravel() -
                   np.array(self.model._mu).ravel())**2).sum()

        self.assertTrue(error2 < error1)

    def test_onlycats(self):
        margmod = self.model.copy().marginalize(keep=['sex'])
        self.assertEqual(type(margmod._sample(1)[0][0]), type('str'))

    def test_onlynumericals(self):
        margmod = self.model.copy().marginalize(keep=['age'])
        self.assertEqual(type(margmod._sample(1)[0][0]), type(0.12345))
예제 #5
0
 def setUp(self):
     self.data = test_iris.mixed()
     self.model = MixableCondGaussianModel("TestMod")
     self.model.fit(df=self.data, fit_algo="map")
     pass
예제 #6
0
 def setUp(self):
     self.data = ta.mixed()
     self.model = MixCondGauss("TestMod")
     self.model.fit(df=self.data)
     pass
예제 #7
0
    res = model.predict(['sex', 'RW', Aggregation(FL, method='maximum', yields='FL')], splitby=[Split(sex), Split(RW, method='equidist')])
    print(res)

    res = model.predict(['species', Density(species)], splitby=Split(species))
    print(res)

    res = model.predict(['FL', Density(FL)], splitby=Split(FL, method='equidist'))
    print(res)

    res = model.predict([Aggregation(FL, method='maximum', yields='FL')])
    print(res)

    res = model.predict([Aggregation(FL, method='maximum', yields='FL')], splitby=[Split(sex)])
    print(res)

    res = model.predict(['sex', Aggregation(FL, method='maximum', yields='FL')], splitby=[Split(sex)])
    print(res)

    res = model.predict(['FL', Probability(FL)], splitby=Split(FL, method='equiinterval'))
    print(res)

    res = model.predict(['sex', Density(sex)], splitby=Split(sex))
    print(res)

    res = model.predict(['sex', Density(sex), Aggregation(FL, method='maximum', yields='FL')], splitby=[Split(sex)])
    print(res)

    model = CondGaussian("my_model").fit(df=data)
    res = model.predict(['sex', Density(sex)], splitby=Split(sex))
    print(res)