Example #1
0
    def _compute_targets_distribution(self, conditions):
        """Given conditions dict {feature_col:val}, returns the
        distribution and (class mapping for lookup) of the random label
        self.targets|conditions.
        """
        if not set(self.conditions).issubset(set(conditions.keys())):
            raise BLE(ValueError(
                'Must specify values for all the conditionals.\n'
                'Received: {}\n'
                'Expected: {}'.format(conditions, self.conditions_numerical +
                self.conditions_categorical)))

        # Are there any category values in conditions which never appeared during
        # training? If yes, we need to run the partial RF.
        unseen = any([conditions[cat] not in self.categories_to_val_map[cat]
            for cat in self.conditions_categorical])

        X_numerical = [conditions[col] for col in self.conditions_numerical]
        if unseen:
            distribution = self.rf_partial.predict_proba(X_numerical)
            classes = self.rf_partial.classes_
        else:
            X_categorical = [conditions[col] for col in
                self.conditions_categorical]
            X_categorical = utils.binarize_categorical_row(
                self.conditions_categorical, self.categories_to_val_map,
                X_categorical)
            distribution = self.rf_full.predict_proba(
                np.hstack((X_numerical, X_categorical)))
            classes = self.rf_partial.classes_
        return distribution[0], classes
Example #2
0
    def _compute_targets_distribution(self, conditions):
        """Given conditions dict {feature_col:val}, returns the
        distribution and (class mapping for lookup) of the random label
        self.targets|conditions.
        """
        if not set(self.conditions).issubset(set(conditions.keys())):
            raise BLE(
                ValueError('Must specify values for all the conditionals.\n'
                           'Received: {}\n'
                           'Expected: {}'.format(
                               conditions, self.conditions_numerical +
                               self.conditions_categorical)))

        # Are there any category values in conditions which never appeared during
        # training? If yes, we need to run the partial RF.
        unseen = any([
            conditions[cat] not in self.categories_to_val_map[cat]
            for cat in self.conditions_categorical
        ])

        X_numerical = [conditions[col] for col in self.conditions_numerical]
        if unseen:
            distribution = self.rf_partial.predict_proba(X_numerical)
            classes = self.rf_partial.classes_
        else:
            X_categorical = [
                conditions[col] for col in self.conditions_categorical
            ]
            X_categorical = utils.binarize_categorical_row(
                self.conditions_categorical, self.categories_to_val_map,
                X_categorical)
            distribution = self.rf_full.predict_proba(
                np.hstack((X_numerical, X_categorical)))
            classes = self.rf_partial.classes_
        return distribution[0], classes
Example #3
0
    def _compute_targets_distribution(self, conditions):
        """Given conditions dict {feature_col:val}, returns the conditional
        mean of the `targets`, and the scale of the Gaussian noise.
        """
        if not set(self.conditions).issubset(set(conditions.keys())):
            raise ValueError('Must specify values for all the conditionals.\n'
                'Received: {}\n'
                'Expected: {}'.format(conditions, self.conditions_numerical +
                self.conditions_categorical))

        # Are there any category values in conditions which never appeared during
        # training? If yes, we need to run the partial RF.
        unseen = any([conditions[cat] not in self.categories_to_val_map[cat]
            for cat in self.conditions_categorical])

        X_numerical = [conditions[col] for col in self.conditions_numerical]

        if unseen:
            inputs = np.array([X_numerical])
            assert inputs.shape == (1, len(self.conditions_numerical))
            predictions = self.mr_partial.predict(inputs)
            noise = self.mr_partial_noise
        else:
            X_categorical = [conditions[col] for col in
                self.conditions_categorical]
            X_categorical = utils.binarize_categorical_row(
                self.conditions_categorical, self.categories_to_val_map,
                X_categorical)
            inputs = np.concatenate(([X_numerical], [X_categorical]), axis=1)
            assert inputs.shape == \
                (1, len(self.conditions_numerical) + len(X_categorical))
            predictions = self.mr_full.predict(inputs)
            noise = self.mr_full_noise

        return predictions[0], noise
    def _compute_targets_distribution(self, conditions):
        """Given conditions dict {feature_col:val}, returns the conditional
        mean of the `targets`, and the scale of the Gaussian noise.
        """
        if not set(self.conditions).issubset(set(conditions.keys())):
            raise BLE(
                ValueError('Must specify values for all the conditionals.\n'
                           'Received: {}\n'
                           'Expected: {}'.format(
                               conditions, self.conditions_numerical +
                               self.conditions_categorical)))

        # Are there any category values in conditions which never appeared during
        # training? If yes, we need to run the partial RF.
        unseen = any([
            conditions[cat] not in self.categories_to_val_map[cat]
            for cat in self.conditions_categorical
        ])

        X_numerical = [conditions[col] for col in self.conditions_numerical]

        if unseen:
            inputs = np.array([X_numerical])
            assert inputs.shape == (1, len(self.conditions_numerical))
            predictions = self.mr_partial.predict(inputs)
            noise = self.mr_partial_noise
        else:
            X_categorical = [
                conditions[col] for col in self.conditions_categorical
            ]
            X_categorical = utils.binarize_categorical_row(
                self.conditions_categorical, self.categories_to_val_map,
                X_categorical)
            inputs = np.concatenate(([X_numerical], [X_categorical]), axis=1)
            assert inputs.shape == \
                (1, len(self.conditions_numerical) + len(X_categorical))
            predictions = self.mr_full.predict(inputs)
            noise = self.mr_full_noise

        return predictions[0], noise