def predict(self, input_data, method=PLURALITY_CODE, full=False):
        """Makes a prediction based on the prediction made by every model.

        :param input_data: Test data to be used as input
        :param method: numeric key code for the following combination
                       methods in classifications/regressions:
              0 - majority vote (plurality)/ average: PLURALITY_CODE
              1 - confidence weighted majority vote / error weighted:
                  CONFIDENCE_CODE
              2 - probability weighted majority vote / average:
                  PROBABILITY_CODE
        """

        # When only one group of models is found you use the
        # corresponding multimodel to predict
        votes_split = []
        options = None
        count = 1
        for fun in self.predict_functions:
            prediction = fun(input_data)
            prediction.update({"order": count, "count": 1})
            count += 1
            votes_split.append(prediction)
        votes = MultiVote(votes_split, boosting_offsets=self.boosting_offsets)
        if self.boosting is not None and not self.regression:
            categories = [ \
                d[0] for d in
                self.fields[self.objective_id]["summary"]["categories"]]
            options = {"categories": categories}

        result = votes.combine(method=method, options=options, full=full)
        if isinstance(result, dict):
            del result['count']

        return result
Example #2
0
def aggregate_multivote(multivote,
                        options,
                        labels,
                        models_per_label,
                        ordered,
                        models_order,
                        label_separator=None):
    """Aggregate the model's predictions for multi-label fields in a
       concatenated format into a final prediction

    """

    if label_separator is None:
        label_separator = ","
    predictions = multivote.predictions

    if ordered and models_per_label == 1:
        # as multi-labeled models are created from end to start votes
        # must be reversed to match
        predictions.reverse()
    else:
        predictions = [
            prediction
            for (_, prediction) in sorted(zip(models_order, predictions),
                                          key=lambda x: x[0])
        ]

    if (labels is None or len(labels) * models_per_label != len(predictions)):
        sys.exit("Failed to make a multi-label prediction. No"
                 " valid label info is found.")
    prediction_list = []
    confidence_list = []
    # In the following case, we must vote each label using the models
    # in the ensemble and the chosen method

    if models_per_label > 1:
        label_predictions = [
            predictions[i:i + models_per_label]
            for i in range(0, len(predictions), models_per_label)
        ]
        predictions = []
        for label_prediction in label_predictions:
            label_multivote = MultiVote(label_prediction)
            prediction_info = label_multivote.combine(method=AGGREGATION,
                                                      full=True,
                                                      options=options)
            predictions.append({
                'prediction': prediction_info["prediction"],
                'confidence': prediction_info["confidence"]
            })
    for vote_index, vote_prediction in enumerate(predictions):
        if ast.literal_eval(vote_prediction['prediction']):
            prediction_list.append(labels[vote_index])
            confidence = str(vote_prediction['confidence'])
            confidence_list.append(confidence)
    prediction = [
        label_separator.join(prediction_list),
        label_separator.join(confidence_list)
    ]
    return prediction
Example #3
0
    def predict(self, input_data, by_name=True, method=PLURALITY_CODE,
                with_confidence=False, options=None):
        """Makes a prediction based on the prediction made by every model.

           The method parameter is a numeric key to the following combination
           methods in classifications/regressions:
              0 - majority vote (plurality)/ average: PLURALITY_CODE
              1 - confidence weighted majority vote / error weighted:
                  CONFIDENCE_CODE
              2 - probability weighted majority vote / average:
                  PROBABILITY_CODE
              3 - threshold filtered vote / doesn't apply:
                  THRESHOLD_CODE
        """

        if len(self.models_splits) > 1:
            # If there's more than one chunck of models, they must be
            # sequentially used to generate the votes for the prediction
            votes = MultiVote([])
            for models_split in self.models_splits:
                models = [retrieve_resource(self.api, model_id,
                                            query_string=ONLY_MODEL)
                          for model_id in models_split]
                multi_model = MultiModel(models, api=self.api)
                votes_split = multi_model.generate_votes(input_data,
                                                         by_name=by_name)
                votes.extend(votes_split.predictions)
        else:
            # When only one group of models is found you use the
            # corresponding multimodel to predict
            votes_split = self.multi_model.generate_votes(input_data,
                                                          by_name=by_name)
            votes = MultiVote(votes_split.predictions)
        return votes.combine(method=method, with_confidence=with_confidence,
                             options=options)
Example #4
0
    def predict(self, input_data, by_name=True, method=PLURALITY_CODE,
                with_confidence=False, options=None):
        """Makes a prediction based on the prediction made by every model.

           The method parameter is a numeric key to the following combination
           methods in classifications/regressions:
              0 - majority vote (plurality)/ average: PLURALITY_CODE
              1 - confidence weighted majority vote / error weighted:
                  CONFIDENCE_CODE
              2 - probability weighted majority vote / average:
                  PROBABILITY_CODE
              3 - threshold filtered vote / doesn't apply:
                  THRESHOLD_CODE
        """

        if len(self.models_splits) > 1:
            # If there's more than one chunck of models, they must be
            # sequentially used to generate the votes for the prediction
            votes = MultiVote([])
            for models_split in self.models_splits:
                models = [retrieve_resource(self.api, model_id,
                                            query_string=ONLY_MODEL)
                          for model_id in models_split]
                multi_model = MultiModel(models, api=self.api)
                votes_split = multi_model.generate_votes(input_data,
                                                         by_name=by_name)
                votes.extend(votes_split.predictions)
        else:
            # When only one group of models is found you use the
            # corresponding multimodel to predict
            votes_split = self.multi_model.generate_votes(input_data,
                                                          by_name=by_name)
            votes = MultiVote(votes_split.predictions)
        return votes.combine(method=method, with_confidence=with_confidence,
                             options=options)
Example #5
0
    def predict(self, input_data, by_name=True, method=PLURALITY_CODE):
        """Makes a prediction based on the prediction made by every model.

        :param input_data: Test data to be used as input
        :param by_name: Boolean that is set to True if field_names (as
                        alternative to field ids) are used in the
                        input_data dict
        :param method: numeric key code for the following combination
                       methods in classifications/regressions:
              0 - majority vote (plurality)/ average: PLURALITY_CODE
              1 - confidence weighted majority vote / error weighted:
                  CONFIDENCE_CODE
              2 - probability weighted majority vote / average:
                  PROBABILITY_CODE
        """

        # When only one group of models is found you use the
        # corresponding multimodel to predict
        input_data_array = self.format_input_data(input_data, by_name=by_name)
        votes_split = []
        options = None
        for fun in self.predict_functions:
            votes_split.append(fun(*input_data_array))

        votes = MultiVote(votes_split, boosting_offsets=self.boosting_offsets)
        if self.boosting is not None and not self.regression:
            categories = [ \
                d[0] for d in
                self.fields[self.objective_id]["summary"]["categories"]]
            options = {"categories": categories}

        result = votes.combine(method=method, options=options)

        return result
Example #6
0
def aggregate_multivote(multivote, options, labels, models_per_label, ordered,
                        models_order, label_separator=None):
    """Aggregate the model's predictions for multi-label fields in a
       concatenated format into a final prediction

    """

    if label_separator is None:
        label_separator = ","
    predictions = multivote.predictions

    if ordered and models_per_label == 1:
        # as multi-labeled models are created from end to start votes
        # must be reversed to match
        predictions.reverse()
    else:
        predictions = [prediction for (_, prediction)
                       in sorted(zip(models_order, predictions),
                                 key=lambda x: x[0])]

    if (labels is None or
            len(labels) * models_per_label != len(predictions)):
        sys.exit("Failed to make a multi-label prediction. No"
                 " valid label info is found.")
    prediction_list = []
    confidence_list = []
    # In the following case, we must vote each label using the models
    # in the ensemble and the chosen method

    if models_per_label > 1:
        label_predictions = [predictions[i: i + models_per_label] for
                             i in range(0, len(predictions),
                                        models_per_label)]
        predictions = []
        for label_prediction in label_predictions:
            label_multivote = MultiVote(label_prediction)
            prediction, confidence = label_multivote.combine(
                method=AGGREGATION, with_confidence=True, options=options)
            predictions.append({'prediction': prediction,
                                'confidence': confidence})
    for vote_index in range(0, len(predictions)):
        if ast.literal_eval(predictions[vote_index]['prediction']):
            prediction_list.append(labels[vote_index])
            confidence = str(predictions[vote_index]['confidence'])
            confidence_list.append(confidence)
    prediction = [label_separator.join(prediction_list),
                  label_separator.join(confidence_list)]
    return prediction
Example #7
0
    def predict(self, input_data, by_name=True, method=PLURALITY_CODE,
                with_confidence=False):
        """Makes a prediction based on the prediction made by every model.

           The method parameter is a numeric key to the following combination
           methods in classifications/regressions:
              0 - majority vote (plurality)/ average: PLURALITY_CODE
              1 - confidence weighted majority vote / error weighted:
                  CONFIDENCE_CODE
              2 - probability weighted majority vote / average:
                  PROBABILITY_CODE
        """
        votes = MultiVote([])
        for models_split in self.models_splits:
            models = [retrieve_model(self.api, model_id)
                      for model_id in models_split]
            multi_model = MultiModel(models)
            votes_split = multi_model.generate_votes(input_data,
                                                     by_name=by_name)
            votes.extend(votes_split.predictions)
        return votes.combine(method=method, with_confidence=with_confidence)
Example #8
0
    def predict(self, input_data, by_name=True, method=PLURALITY_CODE):
        """Makes a prediction based on the prediction made by every model.

           The method parameter is a numeric key to the following combination
           methods in classifications/regressions:
              0 - majority vote (plurality)/ average: PLURALITY_CODE
              1 - confidence weighted majority vote / error weighted:
                  CONFIDENCE_CODE
              2 - probability weighted majority vote / average:
                  PROBABILITY_CODE
        """

        votes = MultiVote([])
        for order in range(0, len(self.models)):
            model = self.models[order]
            prediction_info = model.predict(input_data, by_name=by_name,
                                            with_confidence=True)
            prediction, confidence, distribution, instances = prediction_info
            prediction_row = [prediction, confidence, order,
                              distribution, instances]
            votes.append_row(prediction_row)

        return votes.combine(method=method)
Example #9
0
    def predict(self, input_data, by_name=True, method=PLURALITY_CODE):
        """Makes a prediction based on the prediction made by every model.

        :param input_data: Test data to be used as input
        :param by_name: Boolean that is set to True if field_names (as
                        alternative to field ids) are used in the
                        input_data dict
        :param method: numeric key code for the following combination
                       methods in classifications/regressions:
              0 - majority vote (plurality)/ average: PLURALITY_CODE
              1 - confidence weighted majority vote / error weighted:
                  CONFIDENCE_CODE
              2 - probability weighted majority vote / average:
                  PROBABILITY_CODE
        """


        # When only one group of models is found you use the
        # corresponding multimodel to predict
        input_data_array = self.format_input_data(input_data, by_name=by_name)
        votes_split = []
        options = None
        for fun in self.predict_functions:
            votes_split.append(fun(*input_data_array))

        votes = MultiVote(votes_split,
                          boosting_offsets=self.boosting_offsets)
        if self.boosting is not None and not self.regression:
            categories = [ \
                d[0] for d in
                self.fields[self.objective_id]["summary"]["categories"]]
            options = {"categories": categories}

        result = votes.combine(method=method, options=options)

        return result
Example #10
0
                    len(labels) * models_per_label != len(predictions)):
                sys.exit("Failed to make a multi-label prediction. No"
                         " valid label info is found.")
            prediction_list = []
            confidence_list = []
            # In the following case, we must vote each label using the models
            # in the ensemble and the chosen method

            if models_per_label > 1:
                label_predictions = [predictions[i: i + models_per_label] for
                                     i in range(0, len(predictions),
                                                models_per_label)]
                predictions = []
                for label_prediction in label_predictions:
                    label_multivote = MultiVote(label_prediction)
                    prediction, confidence = label_multivote.combine(
                        method=method, with_confidence=True, options=options)
                    predictions.append({'prediction': prediction,
                                        'confidence': confidence})
            for vote_index in range(0, len(predictions)):
                if ast.literal_eval(predictions[vote_index]['prediction']):
                    prediction_list.append(labels[vote_index])
                    confidence = str(predictions[vote_index]['confidence'])
                    confidence_list.append(confidence)
            prediction = [label_separator.join(prediction_list),
                          label_separator.join(confidence_list)]
        elif method == COMBINATION:
            predictions = multivote.predictions
            global_distribution = []
            for prediction in predictions:
                prediction_category = None
                prediction_instances = 0
Example #11
0
    def predict(self,
                input_data,
                method=None,
                options=None,
                missing_strategy=LAST_PREDICTION,
                operating_point=None,
                operating_kind=None,
                median=False,
                full=False):
        """Makes a prediction based on the prediction made by every model.

        :param input_data: Test data to be used as input
        :param method: **deprecated**. Please check the `operating_kind`
                       attribute. Numeric key code for the following
                       combination methods in classifications/regressions:
              0 - majority vote (plurality)/ average: PLURALITY_CODE
              1 - confidence weighted majority vote / error weighted:
                  CONFIDENCE_CODE
              2 - probability weighted majority vote / average:
                  PROBABILITY_CODE
              3 - threshold filtered vote / doesn't apply:
                  THRESHOLD_CODE
        :param options: Options to be used in threshold filtered votes.
        :param missing_strategy: numeric key for the individual model's
                                 prediction method. See the model predict
                                 method.
        :param operating_point: In classification models, this is the point of
                                the ROC curve where the model will be used at.
                                The operating point can be defined in terms of:
                                  - the positive_class, the class that is
                                    important to predict accurately
                                  - its kind: probability, confidence or voting
                                  - its threshold: the minimum established
                                    for the positive_class to be predicted.
                                    The operating_point is then defined as a
                                    map with three attributes, e.g.:
                                       {"positive_class": "Iris-setosa",
                                        "kind": "probability",
                                        "threshold": 0.5}
        :param operating_kind: "probability", "confidence" or "votes". Sets the
                               property that decides the prediction.
                               Used only if no operating_point is used
        :param median: Uses the median of each individual model's predicted
                       node as individual prediction for the specified
                       combination method.
        :param full: Boolean that controls whether to include the prediction's
                     attributes. By default, only the prediction is produced.
                     If set to True, the rest of available information is
                     added in a dictionary format. The dictionary keys can be:
                      - prediction: the prediction value
                      - confidence: prediction's confidence
                      - probability: prediction's probability
                      - path: rules that lead to the prediction
                      - count: number of training instances supporting the
                               prediction
                      - next: field to check in the next split
                      - min: minim value of the training instances in the
                             predicted node
                      - max: maximum value of the training instances in the
                             predicted node
                      - median: median of the values of the training instances
                                in the predicted node
                      - unused_fields: list of fields in the input data that
                                       are not being used in the model
        """

        # Checks and cleans input_data leaving the fields used in the model
        new_data = self.filter_input_data( \
            input_data,
            add_unused_fields=full)
        unused_fields = None
        if full:
            input_data, unused_fields = new_data
        else:
            input_data = new_data

        # Strips affixes for numeric values and casts to the final field type
        cast(input_data, self.fields)

        if median and method is None:
            # predictions with median are only available with old combiners
            method = PLURALITY_CODE

        if method is None and operating_point is None and \
            operating_kind is None and not median:
            # operating_point has precedence over operating_kind. If no
            # combiner is set, default operating kind is "probability"
            operating_kind = "probability"

        if operating_point:
            if self.regression:
                raise ValueError("The operating_point argument can only be"
                                 " used in classifications.")
            prediction = self.predict_operating( \
                input_data,
                missing_strategy=missing_strategy,
                operating_point=operating_point)
            if full:
                return prediction
            return prediction["prediction"]

        if operating_kind:
            if self.regression:
                # for regressions, operating_kind defaults to the old
                # combiners
                method = 1 if operating_kind == "confidence" else 0
                return self.predict( \
                    input_data, method=method,
                    options=options, missing_strategy=missing_strategy,
                    operating_point=None, operating_kind=None, full=full)
            prediction = self.predict_operating_kind( \
                input_data,
                missing_strategy=missing_strategy,
                operating_kind=operating_kind)
            return prediction

        if len(self.models_splits) > 1:
            # If there's more than one chunk of models, they must be
            # sequentially used to generate the votes for the prediction
            votes = MultiVote([], boosting_offsets=self.boosting_offsets)

            for models_split in self.models_splits:
                models = self._get_models(models_split)
                multi_model = MultiModel(models,
                                         api=self.api,
                                         fields=self.fields)

                votes_split = multi_model._generate_votes(
                    input_data,
                    missing_strategy=missing_strategy,
                    unused_fields=unused_fields)
                if median:
                    for prediction in votes_split.predictions:
                        prediction['prediction'] = prediction['median']
                votes.extend(votes_split.predictions)
        else:
            # When only one group of models is found you use the
            # corresponding multimodel to predict
            votes_split = self.multi_model._generate_votes(
                input_data,
                missing_strategy=missing_strategy,
                unused_fields=unused_fields)

            votes = MultiVote(votes_split.predictions,
                              boosting_offsets=self.boosting_offsets)
            if median:
                for prediction in votes.predictions:
                    prediction['prediction'] = prediction['median']

        if self.boosting is not None and not self.regression:
            categories = [ \
                d[0] for d in
                self.fields[self.objective_id]["summary"]["categories"]]
            options = {"categories": categories}
        result = votes.combine(method=method, options=options, full=full)
        if full:
            unused_fields = set(input_data.keys())
            for prediction in votes.predictions:
                unused_fields = unused_fields.intersection( \
                    set(prediction.get("unused_fields", [])))
            if not isinstance(result, dict):
                result = {"prediction": result}
            result['unused_fields'] = list(unused_fields)

        return result
Example #12
0
                sys.exit("Failed to make a multi-label prediction. No"
                         " valid label info is found.")
            prediction_list = []
            confidence_list = []
            # In the following case, we must vote each label using the models
            # in the ensemble and the chosen method

            if models_per_label > 1:
                label_predictions = [
                    predictions[i:i + models_per_label]
                    for i in range(0, len(predictions), models_per_label)
                ]
                predictions = []
                for label_prediction in label_predictions:
                    label_multivote = MultiVote(label_prediction)
                    prediction, confidence = label_multivote.combine(
                        method=method, with_confidence=True, options=options)
                    predictions.append({
                        'prediction': prediction,
                        'confidence': confidence
                    })
            for vote_index in range(0, len(predictions)):
                if ast.literal_eval(predictions[vote_index]['prediction']):
                    prediction_list.append(labels[vote_index])
                    confidence = str(predictions[vote_index]['confidence'])
                    confidence_list.append(confidence)
            prediction = [
                label_separator.join(prediction_list),
                label_separator.join(confidence_list)
            ]
        elif method == COMBINATION:
            predictions = multivote.predictions