Beispiel #1
0
    def _combine_distributions(self,
                               input_data,
                               missing_strategy,
                               method=PROBABILITY_CODE):
        """Computes the predicted distributions and combines them to give the
        final predicted distribution. Depending on the method parameter
        probability, votes or the confidence are used to weight the models.

        """

        if len(self.models_splits) > 1:
            # If there's more than one chunk of models, they must be
            # sequentially used to generate the votes for the prediction
            votes = MultiVoteList([])

            for models_split in self.models_splits:
                models = self._get_models(models_split)
                multi_model = MultiModel(models,
                                         api=self.api,
                                         fields=self.fields,
                                         class_names=self.class_names)

                votes_split = multi_model.generate_votes_distribution( \
                    input_data,
                    missing_strategy=missing_strategy,
                    method=method)
                votes.extend(votes_split)
        else:
            # When only one group of models is found you use the
            # corresponding multimodel to predict
            votes = self.multi_model.generate_votes_distribution( \
                input_data,
                missing_strategy=missing_strategy, method=method)

        return votes.combine_to_distribution(normalize=False)
Beispiel #2
0
    def predict_probability(self, input_data,
                            missing_strategy=LAST_PREDICTION,
                            compact=False):

        """For classification models, Predicts a probability for
        each possible output class, based on input values.  The input
        fields must be a dictionary keyed by field name or field ID.

        For regressions, the output is a single element list
        containing the prediction.

        :param input_data: Input data to be predicted
        :param missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy
                                 for missing fields
        :param compact: If False, prediction is returned as a list of maps, one
                        per class, with the keys "prediction" and "probability"
                        mapped to the name of the class and it's probability,
                        respectively.  If True, returns a list of probabilities
                        ordered by the sorted order of the class names.
        """
        votes = MultiVoteList([])
        if not self.missing_numerics:
            check_no_missing_numerics(input_data, self.fields)

        for models_split in self.models_splits:
            models = []
            for model in models_split:
                if get_resource_type(model) == "fusion":
                    models.append(Fusion(model, api=self.api))
                else:
                    models.append(SupervisedModel(model, api=self.api))
            votes_split = []
            for model in models:
                try:
                    prediction = model.predict_probability( \
                        input_data,
                        missing_strategy=missing_strategy,
                        compact=True)

                except ValueError:
                    # logistic regressions can raise this error if they
                    # have missing_numerics=False and some numeric missings
                    # are found
                    continue
                if self.regression:
                    prediction = prediction[0]
                    if self.weights is not None:
                        prediction = self.weigh(prediction, model.resource_id)
                else:
                    if self.weights is not None:
                        prediction = self.weigh( \
                            prediction, model.resource_id)
                    # we need to check that all classes in the fusion
                    # are also in the composing model
                    if not self.regression and \
                            self.class_names != model.class_names:
                        try:
                            prediction = rearrange_prediction( \
                                model.class_names,
                                self.class_names,
                                prediction)
                        except AttributeError:
                            # class_names should be defined, but just in case
                            pass
                votes_split.append(prediction)


            votes.extend(votes_split)
        if self.regression:
            total_weight = len(votes.predictions) if self.weights is None \
                else sum(self.weights)
            prediction = sum([prediction for prediction in \
                votes.predictions]) / float(total_weight)
            if compact:
                output = [prediction]
            else:
                output = {"prediction": prediction}

        else:
            output = votes.combine_to_distribution(normalize=True)
            if not compact:
                output = [{'category': class_name,
                           'probability': probability}
                          for class_name, probability in
                          zip(self.class_names, output)]

        return output