Ejemplo n.º 1
0
def local_prediction(models, test_reader, output, args, exclude=None):
    """Get local model and issue prediction

    """
    # Only one model at present
    local_model = SupervisedModel(models[0], api=args.retrieve_api_)
    kwargs = {"full": True}
    if has_value(args.operating_point_):
        kwargs.update({"operating_point": args.operating_point_})
    for input_data in test_reader:
        input_data_dict = test_reader.dict(input_data, filtering=False)
        prediction_info = local_model.predict(input_data_dict, **kwargs)
        write_prediction(prediction_info, output, args.prediction_info,
                         input_data, exclude)
Ejemplo n.º 2
0
    def predict_probability(self, input_data,
                            missing_strategy=LAST_PREDICTION,
                            compact=False):

        """For classification models, Predicts a probability for
        each possible output class, based on input values.  The input
        fields must be a dictionary keyed by field name or field ID.

        For regressions, the output is a single element list
        containing the prediction.

        :param input_data: Input data to be predicted
        :param missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy
                                 for missing fields
        :param compact: If False, prediction is returned as a list of maps, one
                        per class, with the keys "prediction" and "probability"
                        mapped to the name of the class and it's probability,
                        respectively.  If True, returns a list of probabilities
                        ordered by the sorted order of the class names.
        """
        votes = MultiVoteList([])
        if not self.missing_numerics:
            check_no_missing_numerics(input_data, self.fields)

        for models_split in self.models_splits:
            models = []
            for model in models_split:
                if get_resource_type(model) == "fusion":
                    models.append(Fusion(model, api=self.api))
                else:
                    models.append(SupervisedModel(model, api=self.api))
            votes_split = []
            for model in models:
                try:
                    prediction = model.predict_probability( \
                        input_data,
                        missing_strategy=missing_strategy,
                        compact=True)

                except ValueError:
                    # logistic regressions can raise this error if they
                    # have missing_numerics=False and some numeric missings
                    # are found
                    continue
                if self.regression:
                    prediction = prediction[0]
                    if self.weights is not None:
                        prediction = self.weigh(prediction, model.resource_id)
                else:
                    if self.weights is not None:
                        prediction = self.weigh( \
                            prediction, model.resource_id)
                    # we need to check that all classes in the fusion
                    # are also in the composing model
                    if not self.regression and \
                            self.class_names != model.class_names:
                        try:
                            prediction = rearrange_prediction( \
                                model.class_names,
                                self.class_names,
                                prediction)
                        except AttributeError:
                            # class_names should be defined, but just in case
                            pass
                votes_split.append(prediction)


            votes.extend(votes_split)
        if self.regression:
            total_weight = len(votes.predictions) if self.weights is None \
                else sum(self.weights)
            prediction = sum([prediction for prediction in \
                votes.predictions]) / float(total_weight)
            if compact:
                output = [prediction]
            else:
                output = {"prediction": prediction}

        else:
            output = votes.combine_to_distribution(normalize=True)
            if not compact:
                output = [{'category': class_name,
                           'probability': probability}
                          for class_name, probability in
                          zip(self.class_names, output)]

        return output
Ejemplo n.º 3
0
    def __init__(self, fusion, api=None, max_models=None):

        if api is None:
            self.api = BigML(storage=STORAGE)
        else:
            self.api = api
        self.resource_id = None
        self.models_ids = None
        self.objective_id = None
        self.distribution = None
        self.models_splits = []
        self.cache_get = None
        self.regression = False
        self.fields = None
        self.class_names = None
        self.importance = {}

        self.resource_id, fusion = get_resource_dict( \
            fusion, "fusion", api=self.api)

        if 'object' in fusion:
            fusion = fusion.get('object', {})
        self.model_ids, self.weights = get_models_weight( \
            fusion['models'])
        model_types = [get_resource_type(model) for model in self.model_ids]

        for model_type in model_types:
            if model_type not in LOCAL_SUPERVISED:
                raise ValueError("The resource %s has not an allowed"
                                 " supervised model type.")
        self.importance = fusion.get('importance', [])
        self.missing_numerics = fusion.get('missing_numerics', True)
        if fusion.get('fusion'):
            self.fields = fusion.get( \
                'fusion', {}).get("fields")
            self.objective_id = fusion.get("objective_field")

        number_of_models = len(self.model_ids)

        # Downloading the model information to cache it
        if self.api.storage is not None:
            for model_id in self.model_ids:
                if get_resource_type(model_id) == "fusion":
                    Fusion(model_id, api=self.api)
                else:
                    SupervisedModel(model_id, api=self.api)

        if max_models is None:
            self.models_splits = [self.model_ids]
        else:
            self.models_splits = [self.model_ids[index:(index + max_models)]
                                  for index
                                  in range(0, number_of_models, max_models)]

        if self.fields:
            summary = self.fields[self.objective_id]['summary']
            if 'bins' in summary:
                distribution = summary['bins']
            elif 'counts' in summary:
                distribution = summary['counts']
            elif 'categories' in summary:
                distribution = summary['categories']
            else:
                distribution = []
            self.distribution = distribution

        self.regression = \
            self.fields[self.objective_id].get('optype') == 'numeric'

        if not self.regression:
            objective_field = self.fields[self.objective_id]
            categories = objective_field['summary']['categories']
            classes = [category[0] for category in categories]
            self.class_names = sorted(classes)
            self.objective_categories = [category for \
                category, _ in self.fields[self.objective_id][ \
               "summary"]["categories"]]

        ModelFields.__init__( \
            self, self.fields,
            objective_id=self.objective_id)
Ejemplo n.º 4
0
def create_local_supervised_ensemble(step):
    world.local_ensemble = SupervisedModel(world.ensemble_id, world.api)
    world.local_model = Model(world.local_ensemble.model_ids[0], world.api)
Ejemplo n.º 5
0
def i_create_a_local_supervised_model(step, model_type=None):
    if model_type is None:
        model = world.model
    else:
        model = getattr(world, model_type)
    world.local_model = SupervisedModel(model)