예제 #1
0
 def test_maximum_for_categorical_model(self):
     data = pd.DataFrame({'B': np.array(['foo', 'bar', 'foo', 'foo', 'bar', 'foo', 'bar']),
                          'C': np.array(['hey', 'hey', 'hey', 'hey', 'hey', 'hey', 'ho'])},
                         columns=['B', 'C'])
     kde_model = KDEModel('kde_model')
     kde_model.fit(data)
     self.assertTrue(kde_model._maximum() == ['foo', 'hey'], 'maximum was not correctly calculated')
예제 #2
0
 def test_mixed_categorical_numerical_model(self):
     data = pd.DataFrame({'A': np.array([1, 2, 3, 2, 3, 4, 5]),
                          'B': np.array(['foo', 'bar', 'foo', 'foo', 'bar', 'foo', 'bar'])},
                         columns=['A', 'B'])
     kde_model = KDEModel('kde_model')
     kde_model.fit(data)
     self.assertAlmostEqual(kde_model._density([1, 'foo']), 0.1, places=2, msg='density is not calculated correctly')
예제 #3
0
    def test_maximum(self):
        data = pd.DataFrame({'B': np.array([0, 2, 2, 3, 3, 3, 4, 4, 6]), 'A': np.array([1, 2, 3, 3, 3, 3, 3, 4, 5])},
                            columns=['B', 'A'])
        maximum = np.array([3., 3.])
        kde_model = KDEModel('kde_model')
        kde_model.fit(data)
        model_max = kde_model._maximum()

        for i in range(len(kde_model.fields)):
            self.assertAlmostEqual(model_max[i], maximum[i])
예제 #4
0
 def test_kde_storage(self):
     data = pd.DataFrame({'A': np.array([1, 2, 3, 2, 3, 4, 5]),
                          'B': np.array(['foo', 'bar', 'foo', 'foo', 'bar', 'foo', 'bar']),
                          'C': np.array(['hey', 'hey', 'hey', 'hey', 'hey', 'hey', 'ho'])},
                         columns=['A', 'B', 'C'])
     kde_model = KDEModel('kde_model')
     kde_model.fit(data)
     kde_model._density(['foo', 'hey', 2])
     kde_model._density(['foo', 'hey', 3])
     self.assertTrue(kde_model.kde['[\'foo\', \'hey\']'], 'kde storage does not work properly')
예제 #5
0
 def test_conditionout(self):
     data = pd.DataFrame({'B': np.array([2, 4, 7, 7, 7, 4, 1]), 'A': np.array([1, 2, 3, 3, 3, 4, 5])},
                         columns=['B', 'A'])
     kde_model = KDEModel('kde_model')
     kde_model.fit(data)
     self.assertTrue(kde_model.data.sort_values(by='A').reset_index(drop=True).equals(data),
                     "input data was not passed properly to the model")
     # Change domains of dimension A
     kde_model.fields[1]['domain'].setlowerbound(2)
     kde_model.fields[1]['domain'].setupperbound(4)
     # Condition and marginalize model
     kde_model._conditionout(keep='B', remove='A')
     # Generate control data
     data_cond = pd.DataFrame({'B': np.array([4, 7, 7, 7, 4]), 'A': np.array([2, 3, 3, 3, 4])},
                              columns=['B', 'A'])
     self.assertTrue(kde_model.data.sort_values(by='A').reset_index(drop=True).equals(data_cond),
                     "model data was not marginalized and conditioned properly")
예제 #6
0
 def test_predict(self):
     data = pd.DataFrame({'A': np.array([1, 2, 3, 2, 3, 4, 5]),
                          'B': np.array([1, 2, 2, 3, 3, 4, 5]),
                          'C': np.array([1, 2, 3, 3, 3, 4, 5]),
                          'D': np.array([0, 3, 3, 4, 4, 6, 7])},
                         columns=['A', 'B', 'C', 'D'])
     kde_model = KDEModel('kde_model')
     kde_model.fit(data)
     # marginalize out all but two dimensions
     kde_model.marginalize(keep=['C', 'D'])
     # condition out one of the remaining dimensions
     kde_model.byname('C')['domain'].setupperbound(3)
     kde_model.byname('C')['domain'].setlowerbound(2)
     kde_model.marginalize(keep=['D'])
     # For the remaining dimension: get point of maximum/average probability density
     self.assertAlmostEqual(kde_model._maximum()[0], 3.0, places=2, msg='prediction is not correct')