Esempio n. 1
0
    def predict(self, input_data, by_name=True, method=PLURALITY_CODE,
                with_confidence=False, options=None):
        """Makes a prediction based on the prediction made by every model.

           The method parameter is a numeric key to the following combination
           methods in classifications/regressions:
              0 - majority vote (plurality)/ average: PLURALITY_CODE
              1 - confidence weighted majority vote / error weighted:
                  CONFIDENCE_CODE
              2 - probability weighted majority vote / average:
                  PROBABILITY_CODE
              3 - threshold filtered vote / doesn't apply:
                  THRESHOLD_CODE
        """

        if len(self.models_splits) > 1:
            # If there's more than one chunck of models, they must be
            # sequentially used to generate the votes for the prediction
            votes = MultiVote([])
            for models_split in self.models_splits:
                models = [retrieve_resource(self.api, model_id,
                                            query_string=ONLY_MODEL)
                          for model_id in models_split]
                multi_model = MultiModel(models, api=self.api)
                votes_split = multi_model.generate_votes(input_data,
                                                         by_name=by_name)
                votes.extend(votes_split.predictions)
        else:
            # When only one group of models is found you use the
            # corresponding multimodel to predict
            votes_split = self.multi_model.generate_votes(input_data,
                                                          by_name=by_name)
            votes = MultiVote(votes_split.predictions)
        return votes.combine(method=method, with_confidence=with_confidence,
                             options=options)
Esempio n. 2
0
    def predict(self, input_data, by_name=True, method=PLURALITY_CODE,
                with_confidence=False, options=None):
        """Makes a prediction based on the prediction made by every model.

           The method parameter is a numeric key to the following combination
           methods in classifications/regressions:
              0 - majority vote (plurality)/ average: PLURALITY_CODE
              1 - confidence weighted majority vote / error weighted:
                  CONFIDENCE_CODE
              2 - probability weighted majority vote / average:
                  PROBABILITY_CODE
              3 - threshold filtered vote / doesn't apply:
                  THRESHOLD_CODE
        """

        if len(self.models_splits) > 1:
            # If there's more than one chunck of models, they must be
            # sequentially used to generate the votes for the prediction
            votes = MultiVote([])
            for models_split in self.models_splits:
                models = [retrieve_resource(self.api, model_id,
                                            query_string=ONLY_MODEL)
                          for model_id in models_split]
                multi_model = MultiModel(models, api=self.api)
                votes_split = multi_model.generate_votes(input_data,
                                                         by_name=by_name)
                votes.extend(votes_split.predictions)
        else:
            # When only one group of models is found you use the
            # corresponding multimodel to predict
            votes_split = self.multi_model.generate_votes(input_data,
                                                          by_name=by_name)
            votes = MultiVote(votes_split.predictions)
        return votes.combine(method=method, with_confidence=with_confidence,
                             options=options)
Esempio n. 3
0
    def predict(self, input_data, by_name=True, method=PLURALITY_CODE,
                with_confidence=False):
        """Makes a prediction based on the prediction made by every model.

           The method parameter is a numeric key to the following combination
           methods in classifications/regressions:
              0 - majority vote (plurality)/ average: PLURALITY_CODE
              1 - confidence weighted majority vote / error weighted:
                  CONFIDENCE_CODE
              2 - probability weighted majority vote / average:
                  PROBABILITY_CODE
        """
        votes = MultiVote([])
        for models_split in self.models_splits:
            models = [retrieve_model(self.api, model_id)
                      for model_id in models_split]
            multi_model = MultiModel(models)
            votes_split = multi_model.generate_votes(input_data,
                                                     by_name=by_name)
            votes.extend(votes_split.predictions)
        return votes.combine(method=method, with_confidence=with_confidence)
Esempio n. 4
0
    def predict(self,
                input_data,
                method=None,
                options=None,
                missing_strategy=LAST_PREDICTION,
                operating_point=None,
                operating_kind=None,
                median=False,
                full=False):
        """Makes a prediction based on the prediction made by every model.

        :param input_data: Test data to be used as input
        :param method: **deprecated**. Please check the `operating_kind`
                       attribute. Numeric key code for the following
                       combination methods in classifications/regressions:
              0 - majority vote (plurality)/ average: PLURALITY_CODE
              1 - confidence weighted majority vote / error weighted:
                  CONFIDENCE_CODE
              2 - probability weighted majority vote / average:
                  PROBABILITY_CODE
              3 - threshold filtered vote / doesn't apply:
                  THRESHOLD_CODE
        :param options: Options to be used in threshold filtered votes.
        :param missing_strategy: numeric key for the individual model's
                                 prediction method. See the model predict
                                 method.
        :param operating_point: In classification models, this is the point of
                                the ROC curve where the model will be used at.
                                The operating point can be defined in terms of:
                                  - the positive_class, the class that is
                                    important to predict accurately
                                  - its kind: probability, confidence or voting
                                  - its threshold: the minimum established
                                    for the positive_class to be predicted.
                                    The operating_point is then defined as a
                                    map with three attributes, e.g.:
                                       {"positive_class": "Iris-setosa",
                                        "kind": "probability",
                                        "threshold": 0.5}
        :param operating_kind: "probability", "confidence" or "votes". Sets the
                               property that decides the prediction.
                               Used only if no operating_point is used
        :param median: Uses the median of each individual model's predicted
                       node as individual prediction for the specified
                       combination method.
        :param full: Boolean that controls whether to include the prediction's
                     attributes. By default, only the prediction is produced.
                     If set to True, the rest of available information is
                     added in a dictionary format. The dictionary keys can be:
                      - prediction: the prediction value
                      - confidence: prediction's confidence
                      - probability: prediction's probability
                      - path: rules that lead to the prediction
                      - count: number of training instances supporting the
                               prediction
                      - next: field to check in the next split
                      - min: minim value of the training instances in the
                             predicted node
                      - max: maximum value of the training instances in the
                             predicted node
                      - median: median of the values of the training instances
                                in the predicted node
                      - unused_fields: list of fields in the input data that
                                       are not being used in the model
        """

        # Checks and cleans input_data leaving the fields used in the model
        new_data = self.filter_input_data( \
            input_data,
            add_unused_fields=full)
        unused_fields = None
        if full:
            input_data, unused_fields = new_data
        else:
            input_data = new_data

        # Strips affixes for numeric values and casts to the final field type
        cast(input_data, self.fields)

        if median and method is None:
            # predictions with median are only available with old combiners
            method = PLURALITY_CODE

        if method is None and operating_point is None and \
            operating_kind is None and not median:
            # operating_point has precedence over operating_kind. If no
            # combiner is set, default operating kind is "probability"
            operating_kind = "probability"

        if operating_point:
            if self.regression:
                raise ValueError("The operating_point argument can only be"
                                 " used in classifications.")
            prediction = self.predict_operating( \
                input_data,
                missing_strategy=missing_strategy,
                operating_point=operating_point)
            if full:
                return prediction
            return prediction["prediction"]

        if operating_kind:
            if self.regression:
                # for regressions, operating_kind defaults to the old
                # combiners
                method = 1 if operating_kind == "confidence" else 0
                return self.predict( \
                    input_data, method=method,
                    options=options, missing_strategy=missing_strategy,
                    operating_point=None, operating_kind=None, full=full)
            prediction = self.predict_operating_kind( \
                input_data,
                missing_strategy=missing_strategy,
                operating_kind=operating_kind)
            return prediction

        if len(self.models_splits) > 1:
            # If there's more than one chunk of models, they must be
            # sequentially used to generate the votes for the prediction
            votes = MultiVote([], boosting_offsets=self.boosting_offsets)

            for models_split in self.models_splits:
                models = self._get_models(models_split)
                multi_model = MultiModel(models,
                                         api=self.api,
                                         fields=self.fields)

                votes_split = multi_model._generate_votes(
                    input_data,
                    missing_strategy=missing_strategy,
                    unused_fields=unused_fields)
                if median:
                    for prediction in votes_split.predictions:
                        prediction['prediction'] = prediction['median']
                votes.extend(votes_split.predictions)
        else:
            # When only one group of models is found you use the
            # corresponding multimodel to predict
            votes_split = self.multi_model._generate_votes(
                input_data,
                missing_strategy=missing_strategy,
                unused_fields=unused_fields)

            votes = MultiVote(votes_split.predictions,
                              boosting_offsets=self.boosting_offsets)
            if median:
                for prediction in votes.predictions:
                    prediction['prediction'] = prediction['median']

        if self.boosting is not None and not self.regression:
            categories = [ \
                d[0] for d in
                self.fields[self.objective_id]["summary"]["categories"]]
            options = {"categories": categories}
        result = votes.combine(method=method, options=options, full=full)
        if full:
            unused_fields = set(input_data.keys())
            for prediction in votes.predictions:
                unused_fields = unused_fields.intersection( \
                    set(prediction.get("unused_fields", [])))
            if not isinstance(result, dict):
                result = {"prediction": result}
            result['unused_fields'] = list(unused_fields)

        return result