def local_prediction(models, test_reader, output, args, exclude=None): """Get local model and issue prediction """ # Only one model at present local_model = SupervisedModel(models[0], api=args.retrieve_api_) kwargs = {"full": True} if has_value(args.operating_point_): kwargs.update({"operating_point": args.operating_point_}) for input_data in test_reader: input_data_dict = test_reader.dict(input_data, filtering=False) prediction_info = local_model.predict(input_data_dict, **kwargs) write_prediction(prediction_info, output, args.prediction_info, input_data, exclude)
def predict_probability(self, input_data, missing_strategy=LAST_PREDICTION, compact=False): """For classification models, Predicts a probability for each possible output class, based on input values. The input fields must be a dictionary keyed by field name or field ID. For regressions, the output is a single element list containing the prediction. :param input_data: Input data to be predicted :param missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy for missing fields :param compact: If False, prediction is returned as a list of maps, one per class, with the keys "prediction" and "probability" mapped to the name of the class and it's probability, respectively. If True, returns a list of probabilities ordered by the sorted order of the class names. """ votes = MultiVoteList([]) if not self.missing_numerics: check_no_missing_numerics(input_data, self.fields) for models_split in self.models_splits: models = [] for model in models_split: if get_resource_type(model) == "fusion": models.append(Fusion(model, api=self.api)) else: models.append(SupervisedModel(model, api=self.api)) votes_split = [] for model in models: try: prediction = model.predict_probability( \ input_data, missing_strategy=missing_strategy, compact=True) except ValueError: # logistic regressions can raise this error if they # have missing_numerics=False and some numeric missings # are found continue if self.regression: prediction = prediction[0] if self.weights is not None: prediction = self.weigh(prediction, model.resource_id) else: if self.weights is not None: prediction = self.weigh( \ prediction, model.resource_id) # we need to check that all classes in the fusion # are also in the composing model if not self.regression and \ self.class_names != model.class_names: try: prediction = rearrange_prediction( \ model.class_names, self.class_names, prediction) except AttributeError: # class_names should be defined, but just in case pass votes_split.append(prediction) votes.extend(votes_split) if self.regression: total_weight = len(votes.predictions) if self.weights is None \ else sum(self.weights) prediction = sum([prediction for prediction in \ votes.predictions]) / float(total_weight) if compact: output = [prediction] else: output = {"prediction": prediction} else: output = votes.combine_to_distribution(normalize=True) if not compact: output = [{'category': class_name, 'probability': probability} for class_name, probability in zip(self.class_names, output)] return output
def __init__(self, fusion, api=None, max_models=None): if api is None: self.api = BigML(storage=STORAGE) else: self.api = api self.resource_id = None self.models_ids = None self.objective_id = None self.distribution = None self.models_splits = [] self.cache_get = None self.regression = False self.fields = None self.class_names = None self.importance = {} self.resource_id, fusion = get_resource_dict( \ fusion, "fusion", api=self.api) if 'object' in fusion: fusion = fusion.get('object', {}) self.model_ids, self.weights = get_models_weight( \ fusion['models']) model_types = [get_resource_type(model) for model in self.model_ids] for model_type in model_types: if model_type not in LOCAL_SUPERVISED: raise ValueError("The resource %s has not an allowed" " supervised model type.") self.importance = fusion.get('importance', []) self.missing_numerics = fusion.get('missing_numerics', True) if fusion.get('fusion'): self.fields = fusion.get( \ 'fusion', {}).get("fields") self.objective_id = fusion.get("objective_field") number_of_models = len(self.model_ids) # Downloading the model information to cache it if self.api.storage is not None: for model_id in self.model_ids: if get_resource_type(model_id) == "fusion": Fusion(model_id, api=self.api) else: SupervisedModel(model_id, api=self.api) if max_models is None: self.models_splits = [self.model_ids] else: self.models_splits = [self.model_ids[index:(index + max_models)] for index in range(0, number_of_models, max_models)] if self.fields: summary = self.fields[self.objective_id]['summary'] if 'bins' in summary: distribution = summary['bins'] elif 'counts' in summary: distribution = summary['counts'] elif 'categories' in summary: distribution = summary['categories'] else: distribution = [] self.distribution = distribution self.regression = \ self.fields[self.objective_id].get('optype') == 'numeric' if not self.regression: objective_field = self.fields[self.objective_id] categories = objective_field['summary']['categories'] classes = [category[0] for category in categories] self.class_names = sorted(classes) self.objective_categories = [category for \ category, _ in self.fields[self.objective_id][ \ "summary"]["categories"]] ModelFields.__init__( \ self, self.fields, objective_id=self.objective_id)
def create_local_supervised_ensemble(step): world.local_ensemble = SupervisedModel(world.ensemble_id, world.api) world.local_model = Model(world.local_ensemble.model_ids[0], world.api)
def i_create_a_local_supervised_model(step, model_type=None): if model_type is None: model = world.model else: model = getattr(world, model_type) world.local_model = SupervisedModel(model)