def _combine_distributions(self, input_data, missing_strategy, method=PROBABILITY_CODE): """Computes the predicted distributions and combines them to give the final predicted distribution. Depending on the method parameter probability, votes or the confidence are used to weight the models. """ if len(self.models_splits) > 1: # If there's more than one chunk of models, they must be # sequentially used to generate the votes for the prediction votes = MultiVoteList([]) for models_split in self.models_splits: models = self._get_models(models_split) multi_model = MultiModel(models, api=self.api, fields=self.fields, class_names=self.class_names) votes_split = multi_model.generate_votes_distribution( \ input_data, missing_strategy=missing_strategy, method=method) votes.extend(votes_split) else: # When only one group of models is found you use the # corresponding multimodel to predict votes = self.multi_model.generate_votes_distribution( \ input_data, missing_strategy=missing_strategy, method=method) return votes.combine_to_distribution(normalize=False)
def predict_probability(self, input_data, missing_strategy=LAST_PREDICTION, compact=False): """For classification models, Predicts a probability for each possible output class, based on input values. The input fields must be a dictionary keyed by field name or field ID. For regressions, the output is a single element list containing the prediction. :param input_data: Input data to be predicted :param missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy for missing fields :param compact: If False, prediction is returned as a list of maps, one per class, with the keys "prediction" and "probability" mapped to the name of the class and it's probability, respectively. If True, returns a list of probabilities ordered by the sorted order of the class names. """ votes = MultiVoteList([]) if not self.missing_numerics: check_no_missing_numerics(input_data, self.fields) for models_split in self.models_splits: models = [] for model in models_split: if get_resource_type(model) == "fusion": models.append(Fusion(model, api=self.api)) else: models.append(SupervisedModel(model, api=self.api)) votes_split = [] for model in models: try: prediction = model.predict_probability( \ input_data, missing_strategy=missing_strategy, compact=True) except ValueError: # logistic regressions can raise this error if they # have missing_numerics=False and some numeric missings # are found continue if self.regression: prediction = prediction[0] if self.weights is not None: prediction = self.weigh(prediction, model.resource_id) else: if self.weights is not None: prediction = self.weigh( \ prediction, model.resource_id) # we need to check that all classes in the fusion # are also in the composing model if not self.regression and \ self.class_names != model.class_names: try: prediction = rearrange_prediction( \ model.class_names, self.class_names, prediction) except AttributeError: # class_names should be defined, but just in case pass votes_split.append(prediction) votes.extend(votes_split) if self.regression: total_weight = len(votes.predictions) if self.weights is None \ else sum(self.weights) prediction = sum([prediction for prediction in \ votes.predictions]) / float(total_weight) if compact: output = [prediction] else: output = {"prediction": prediction} else: output = votes.combine_to_distribution(normalize=True) if not compact: output = [{'category': class_name, 'probability': probability} for class_name, probability in zip(self.class_names, output)] return output