def predict(self, input_data, method=PLURALITY_CODE, full=False): """Makes a prediction based on the prediction made by every model. :param input_data: Test data to be used as input :param method: numeric key code for the following combination methods in classifications/regressions: 0 - majority vote (plurality)/ average: PLURALITY_CODE 1 - confidence weighted majority vote / error weighted: CONFIDENCE_CODE 2 - probability weighted majority vote / average: PROBABILITY_CODE """ # When only one group of models is found you use the # corresponding multimodel to predict votes_split = [] options = None count = 1 for fun in self.predict_functions: prediction = fun(input_data) prediction.update({"order": count, "count": 1}) count += 1 votes_split.append(prediction) votes = MultiVote(votes_split, boosting_offsets=self.boosting_offsets) if self.boosting is not None and not self.regression: categories = [ \ d[0] for d in self.fields[self.objective_id]["summary"]["categories"]] options = {"categories": categories} result = votes.combine(method=method, options=options, full=full) if isinstance(result, dict): del result['count'] return result
def aggregate_multivote(multivote, options, labels, models_per_label, ordered, models_order, label_separator=None): """Aggregate the model's predictions for multi-label fields in a concatenated format into a final prediction """ if label_separator is None: label_separator = "," predictions = multivote.predictions if ordered and models_per_label == 1: # as multi-labeled models are created from end to start votes # must be reversed to match predictions.reverse() else: predictions = [ prediction for (_, prediction) in sorted(zip(models_order, predictions), key=lambda x: x[0]) ] if (labels is None or len(labels) * models_per_label != len(predictions)): sys.exit("Failed to make a multi-label prediction. No" " valid label info is found.") prediction_list = [] confidence_list = [] # In the following case, we must vote each label using the models # in the ensemble and the chosen method if models_per_label > 1: label_predictions = [ predictions[i:i + models_per_label] for i in range(0, len(predictions), models_per_label) ] predictions = [] for label_prediction in label_predictions: label_multivote = MultiVote(label_prediction) prediction_info = label_multivote.combine(method=AGGREGATION, full=True, options=options) predictions.append({ 'prediction': prediction_info["prediction"], 'confidence': prediction_info["confidence"] }) for vote_index, vote_prediction in enumerate(predictions): if ast.literal_eval(vote_prediction['prediction']): prediction_list.append(labels[vote_index]) confidence = str(vote_prediction['confidence']) confidence_list.append(confidence) prediction = [ label_separator.join(prediction_list), label_separator.join(confidence_list) ] return prediction
def predict(self, input_data, by_name=True, method=PLURALITY_CODE, with_confidence=False, options=None): """Makes a prediction based on the prediction made by every model. The method parameter is a numeric key to the following combination methods in classifications/regressions: 0 - majority vote (plurality)/ average: PLURALITY_CODE 1 - confidence weighted majority vote / error weighted: CONFIDENCE_CODE 2 - probability weighted majority vote / average: PROBABILITY_CODE 3 - threshold filtered vote / doesn't apply: THRESHOLD_CODE """ if len(self.models_splits) > 1: # If there's more than one chunck of models, they must be # sequentially used to generate the votes for the prediction votes = MultiVote([]) for models_split in self.models_splits: models = [retrieve_resource(self.api, model_id, query_string=ONLY_MODEL) for model_id in models_split] multi_model = MultiModel(models, api=self.api) votes_split = multi_model.generate_votes(input_data, by_name=by_name) votes.extend(votes_split.predictions) else: # When only one group of models is found you use the # corresponding multimodel to predict votes_split = self.multi_model.generate_votes(input_data, by_name=by_name) votes = MultiVote(votes_split.predictions) return votes.combine(method=method, with_confidence=with_confidence, options=options)
def predict(self, input_data, by_name=True, method=PLURALITY_CODE): """Makes a prediction based on the prediction made by every model. :param input_data: Test data to be used as input :param by_name: Boolean that is set to True if field_names (as alternative to field ids) are used in the input_data dict :param method: numeric key code for the following combination methods in classifications/regressions: 0 - majority vote (plurality)/ average: PLURALITY_CODE 1 - confidence weighted majority vote / error weighted: CONFIDENCE_CODE 2 - probability weighted majority vote / average: PROBABILITY_CODE """ # When only one group of models is found you use the # corresponding multimodel to predict input_data_array = self.format_input_data(input_data, by_name=by_name) votes_split = [] options = None for fun in self.predict_functions: votes_split.append(fun(*input_data_array)) votes = MultiVote(votes_split, boosting_offsets=self.boosting_offsets) if self.boosting is not None and not self.regression: categories = [ \ d[0] for d in self.fields[self.objective_id]["summary"]["categories"]] options = {"categories": categories} result = votes.combine(method=method, options=options) return result
def aggregate_multivote(multivote, options, labels, models_per_label, ordered, models_order, label_separator=None): """Aggregate the model's predictions for multi-label fields in a concatenated format into a final prediction """ if label_separator is None: label_separator = "," predictions = multivote.predictions if ordered and models_per_label == 1: # as multi-labeled models are created from end to start votes # must be reversed to match predictions.reverse() else: predictions = [prediction for (_, prediction) in sorted(zip(models_order, predictions), key=lambda x: x[0])] if (labels is None or len(labels) * models_per_label != len(predictions)): sys.exit("Failed to make a multi-label prediction. No" " valid label info is found.") prediction_list = [] confidence_list = [] # In the following case, we must vote each label using the models # in the ensemble and the chosen method if models_per_label > 1: label_predictions = [predictions[i: i + models_per_label] for i in range(0, len(predictions), models_per_label)] predictions = [] for label_prediction in label_predictions: label_multivote = MultiVote(label_prediction) prediction, confidence = label_multivote.combine( method=AGGREGATION, with_confidence=True, options=options) predictions.append({'prediction': prediction, 'confidence': confidence}) for vote_index in range(0, len(predictions)): if ast.literal_eval(predictions[vote_index]['prediction']): prediction_list.append(labels[vote_index]) confidence = str(predictions[vote_index]['confidence']) confidence_list.append(confidence) prediction = [label_separator.join(prediction_list), label_separator.join(confidence_list)] return prediction
def predict(self, input_data, by_name=True, method=PLURALITY_CODE, with_confidence=False): """Makes a prediction based on the prediction made by every model. The method parameter is a numeric key to the following combination methods in classifications/regressions: 0 - majority vote (plurality)/ average: PLURALITY_CODE 1 - confidence weighted majority vote / error weighted: CONFIDENCE_CODE 2 - probability weighted majority vote / average: PROBABILITY_CODE """ votes = MultiVote([]) for models_split in self.models_splits: models = [retrieve_model(self.api, model_id) for model_id in models_split] multi_model = MultiModel(models) votes_split = multi_model.generate_votes(input_data, by_name=by_name) votes.extend(votes_split.predictions) return votes.combine(method=method, with_confidence=with_confidence)
def predict(self, input_data, by_name=True, method=PLURALITY_CODE): """Makes a prediction based on the prediction made by every model. The method parameter is a numeric key to the following combination methods in classifications/regressions: 0 - majority vote (plurality)/ average: PLURALITY_CODE 1 - confidence weighted majority vote / error weighted: CONFIDENCE_CODE 2 - probability weighted majority vote / average: PROBABILITY_CODE """ votes = MultiVote([]) for order in range(0, len(self.models)): model = self.models[order] prediction_info = model.predict(input_data, by_name=by_name, with_confidence=True) prediction, confidence, distribution, instances = prediction_info prediction_row = [prediction, confidence, order, distribution, instances] votes.append_row(prediction_row) return votes.combine(method=method)
len(labels) * models_per_label != len(predictions)): sys.exit("Failed to make a multi-label prediction. No" " valid label info is found.") prediction_list = [] confidence_list = [] # In the following case, we must vote each label using the models # in the ensemble and the chosen method if models_per_label > 1: label_predictions = [predictions[i: i + models_per_label] for i in range(0, len(predictions), models_per_label)] predictions = [] for label_prediction in label_predictions: label_multivote = MultiVote(label_prediction) prediction, confidence = label_multivote.combine( method=method, with_confidence=True, options=options) predictions.append({'prediction': prediction, 'confidence': confidence}) for vote_index in range(0, len(predictions)): if ast.literal_eval(predictions[vote_index]['prediction']): prediction_list.append(labels[vote_index]) confidence = str(predictions[vote_index]['confidence']) confidence_list.append(confidence) prediction = [label_separator.join(prediction_list), label_separator.join(confidence_list)] elif method == COMBINATION: predictions = multivote.predictions global_distribution = [] for prediction in predictions: prediction_category = None prediction_instances = 0
def predict(self, input_data, method=None, options=None, missing_strategy=LAST_PREDICTION, operating_point=None, operating_kind=None, median=False, full=False): """Makes a prediction based on the prediction made by every model. :param input_data: Test data to be used as input :param method: **deprecated**. Please check the `operating_kind` attribute. Numeric key code for the following combination methods in classifications/regressions: 0 - majority vote (plurality)/ average: PLURALITY_CODE 1 - confidence weighted majority vote / error weighted: CONFIDENCE_CODE 2 - probability weighted majority vote / average: PROBABILITY_CODE 3 - threshold filtered vote / doesn't apply: THRESHOLD_CODE :param options: Options to be used in threshold filtered votes. :param missing_strategy: numeric key for the individual model's prediction method. See the model predict method. :param operating_point: In classification models, this is the point of the ROC curve where the model will be used at. The operating point can be defined in terms of: - the positive_class, the class that is important to predict accurately - its kind: probability, confidence or voting - its threshold: the minimum established for the positive_class to be predicted. The operating_point is then defined as a map with three attributes, e.g.: {"positive_class": "Iris-setosa", "kind": "probability", "threshold": 0.5} :param operating_kind: "probability", "confidence" or "votes". Sets the property that decides the prediction. Used only if no operating_point is used :param median: Uses the median of each individual model's predicted node as individual prediction for the specified combination method. :param full: Boolean that controls whether to include the prediction's attributes. By default, only the prediction is produced. If set to True, the rest of available information is added in a dictionary format. The dictionary keys can be: - prediction: the prediction value - confidence: prediction's confidence - probability: prediction's probability - path: rules that lead to the prediction - count: number of training instances supporting the prediction - next: field to check in the next split - min: minim value of the training instances in the predicted node - max: maximum value of the training instances in the predicted node - median: median of the values of the training instances in the predicted node - unused_fields: list of fields in the input data that are not being used in the model """ # Checks and cleans input_data leaving the fields used in the model new_data = self.filter_input_data( \ input_data, add_unused_fields=full) unused_fields = None if full: input_data, unused_fields = new_data else: input_data = new_data # Strips affixes for numeric values and casts to the final field type cast(input_data, self.fields) if median and method is None: # predictions with median are only available with old combiners method = PLURALITY_CODE if method is None and operating_point is None and \ operating_kind is None and not median: # operating_point has precedence over operating_kind. If no # combiner is set, default operating kind is "probability" operating_kind = "probability" if operating_point: if self.regression: raise ValueError("The operating_point argument can only be" " used in classifications.") prediction = self.predict_operating( \ input_data, missing_strategy=missing_strategy, operating_point=operating_point) if full: return prediction return prediction["prediction"] if operating_kind: if self.regression: # for regressions, operating_kind defaults to the old # combiners method = 1 if operating_kind == "confidence" else 0 return self.predict( \ input_data, method=method, options=options, missing_strategy=missing_strategy, operating_point=None, operating_kind=None, full=full) prediction = self.predict_operating_kind( \ input_data, missing_strategy=missing_strategy, operating_kind=operating_kind) return prediction if len(self.models_splits) > 1: # If there's more than one chunk of models, they must be # sequentially used to generate the votes for the prediction votes = MultiVote([], boosting_offsets=self.boosting_offsets) for models_split in self.models_splits: models = self._get_models(models_split) multi_model = MultiModel(models, api=self.api, fields=self.fields) votes_split = multi_model._generate_votes( input_data, missing_strategy=missing_strategy, unused_fields=unused_fields) if median: for prediction in votes_split.predictions: prediction['prediction'] = prediction['median'] votes.extend(votes_split.predictions) else: # When only one group of models is found you use the # corresponding multimodel to predict votes_split = self.multi_model._generate_votes( input_data, missing_strategy=missing_strategy, unused_fields=unused_fields) votes = MultiVote(votes_split.predictions, boosting_offsets=self.boosting_offsets) if median: for prediction in votes.predictions: prediction['prediction'] = prediction['median'] if self.boosting is not None and not self.regression: categories = [ \ d[0] for d in self.fields[self.objective_id]["summary"]["categories"]] options = {"categories": categories} result = votes.combine(method=method, options=options, full=full) if full: unused_fields = set(input_data.keys()) for prediction in votes.predictions: unused_fields = unused_fields.intersection( \ set(prediction.get("unused_fields", []))) if not isinstance(result, dict): result = {"prediction": result} result['unused_fields'] = list(unused_fields) return result
sys.exit("Failed to make a multi-label prediction. No" " valid label info is found.") prediction_list = [] confidence_list = [] # In the following case, we must vote each label using the models # in the ensemble and the chosen method if models_per_label > 1: label_predictions = [ predictions[i:i + models_per_label] for i in range(0, len(predictions), models_per_label) ] predictions = [] for label_prediction in label_predictions: label_multivote = MultiVote(label_prediction) prediction, confidence = label_multivote.combine( method=method, with_confidence=True, options=options) predictions.append({ 'prediction': prediction, 'confidence': confidence }) for vote_index in range(0, len(predictions)): if ast.literal_eval(predictions[vote_index]['prediction']): prediction_list.append(labels[vote_index]) confidence = str(predictions[vote_index]['confidence']) confidence_list.append(confidence) prediction = [ label_separator.join(prediction_list), label_separator.join(confidence_list) ] elif method == COMBINATION: predictions = multivote.predictions