Esempio n. 1
0
def i_create_a_proportional_local_prediction(step, data=None):
    if data is None:
        data = "{}"
    data = json.loads(data)
    world.local_prediction = world.local_model.predict(
        data, missing_strategy=1, full=True)
    world.local_prediction = cast_prediction(world.local_prediction,
                                             to="list",
                                             confidence=True)
Esempio n. 2
0
    def batch_predict(self,
                      input_data_list,
                      output_file_path=None,
                      reuse=False,
                      missing_strategy=LAST_PREDICTION,
                      headers=None,
                      to_file=True,
                      use_median=False):
        """Makes predictions for a list of input data.

           When the to_file argument is set to True, the predictions
           generated for each model are stored in an output
           file. The name of the file will use the following syntax:
                model_[id of the model]__predictions.csv
           For instance, when using model/50c0de043b563519830001c2 to predict,
           the output file name will be
                model_50c0de043b563519830001c2__predictions.csv
            On the contrary, if it is False, the function returns a list
            of MultiVote objects with the model's predictions.
        """
        add_headers = (isinstance(input_data_list[0], list)
                       and headers is not None
                       and len(headers) == len(input_data_list[0]))
        if not add_headers and not isinstance(input_data_list[0], dict):
            raise ValueError("Input data list is not a dictionary or the"
                             " headers and input data information are not"
                             " consistent.")
        order = 0
        if not to_file:
            votes = []

        for model in self.models:
            order += 1
            out = None
            if to_file:
                output_file = get_predictions_file_name(
                    model.resource_id, output_file_path)
                if reuse:
                    try:
                        predictions_file = open(output_file)
                        predictions_file.close()
                        continue
                    except IOError:
                        pass
                try:
                    out = UnicodeWriter(output_file)
                except IOError:
                    raise Exception("Cannot find %s directory." %
                                    output_file_path)

            if out:
                out.open_writer()
            for index, input_data in enumerate(input_data_list):
                if add_headers:
                    input_data = dict(list(zip(headers, input_data)))
                prediction = model.predict(input_data,
                                           missing_strategy=missing_strategy,
                                           full=True)
                if model.regression:
                    # if median is to be used, we just replace the prediction
                    if use_median:
                        prediction["prediction"] = prediction["median"]
                if to_file:
                    prediction = cast_prediction(prediction,
                                                 to="list",
                                                 confidence=True,
                                                 distribution=True,
                                                 count=True)
                    out.writerow(prediction)
                else:
                    if len(votes) <= index:
                        votes.append(MultiVote([]))
                    votes[index].append(prediction)
            if out:
                out.close_writer()
        if not to_file:
            return votes