def test_compute_category_modes(self):
        training_features_cat = [
            {'hair':'brown','eyes':'blue'},
            {'hair':'black','eyes':'blue'},
        ]

        imputer = Imputer()
        modes = imputer.get_category_modes(training_features_cat)

        self.assertEqual(modes['eyes'], 'blue')
        self.assertEqual(modes['hair'], 'brown')
    def test_imputes_continuous_features_using_training_set(self):
        training_features_cont = [
            [0, 0, 1],
            [0, 10, 2],
        ]
        features_cont = [
            [1, None, 0],
        ]

        imputer = Imputer()
        imputed = imputer.impute_continuous(training_features_cont, features_cont)
        self.assertEqual(list(imputed[0]), [1.0, 5.0, 0.0])
    def test_removes_continuous_features_when_training_is_nil_for_all_samples_of_a_feature(self):
        training_features_cont = [
            [0, None, 1],
            [0, None, 2],
        ]
        features_cont = [
            [1, 2, 3],
        ]

        imputer = Imputer()
        imputed = imputer.impute_continuous(training_features_cont, features_cont)
        self.assertEqual(list(imputed[0]), [1.0, 3.0])
    def test_imputes_categorical_features_using_training_set(self):
        training_features_cat = [
            {'hair':'brown','eyes':'blue'},
            {'hair':'black','eyes':'blue'},
        ]
        features_cat = [
            {'hair':'brown','eyes':None},
        ]

        imputer = Imputer()
        imputed = imputer.impute_categorical(training_features_cat, features_cat)
        self.assertEqual(imputed[0], {'hair':'brown','eyes':'blue'})
Example #5
0
    def __init__(self, training_features_to_targets, features_to_targets):
        self.event_ids = [int(features_to_target[0]) for features_to_target in features_to_targets]
        self.targets = [features_to_target[2] for features_to_target in features_to_targets]

        self.training_categorical_features = [features_to_target[1]['categorical'] for features_to_target in training_features_to_targets]
        self.training_continuous_features = [features_to_target[1]['continuous'] for features_to_target in training_features_to_targets]
        imputer = Imputer()

        raw_continuous_features = [features_to_target[1]['continuous'] for features_to_target in features_to_targets]
        continuous_features = imputer.impute_continuous(self.training_continuous_features, raw_continuous_features)
        # continuous_features = self.scale(imputed_continuous_features)

        raw_categorical_features = [features_to_target[1]['categorical'] for features_to_target in features_to_targets]
        categorical_features = imputer.impute_categorical(self.training_categorical_features, raw_categorical_features)

        self.features = self.vectorize(categorical_features, continuous_features)