def local_predict(models, test_reader, output, args, options=None, exclude=None): """Get local predictions and combine them to get a final prediction """ single_model = len(models) == 1 test_set_header = test_reader.has_headers() kwargs = { "by_name": test_set_header, "with_confidence": True, "missing_strategy": args.missing_strategy } if single_model: local_model = Model(models[0]) else: local_model = Ensemble(models, max_models=args.max_batch_models) kwargs.update({ "method": args.method, "options": options, "median": args.median }) for input_data in test_reader: input_data_dict = dict(zip(test_reader.raw_headers, input_data)) prediction = local_model.predict(input_data_dict, **kwargs) if single_model and args.median and local_model.tree.regression: # only single models' predictions can be based on the median value # predict prediction[0] = prediction[-1] write_prediction(prediction[0:2], output, args.prediction_info, input_data, exclude)
def local_predict(models, test_reader, output, args, options=None, exclude=None): """Get local predictions and combine them to get a final prediction """ single_model = len(models) == 1 test_set_header = test_reader.has_headers() kwargs = {"by_name": test_set_header, "with_confidence": True, "missing_strategy": args.missing_strategy} if single_model: local_model = Model(models[0]) else: local_model = Ensemble(models, max_models=args.max_batch_models) kwargs.update({"method": args.method, "options": options, "median": args.median}) for input_data in test_reader: input_data_dict = dict(zip(test_reader.raw_headers, input_data)) prediction = local_model.predict( input_data_dict, **kwargs) if single_model and args.median and local_model.tree.regression: # only single models' predictions can be based on the median value # predict prediction[0] = prediction[-1] write_prediction(prediction[0: 2], output, args.prediction_info, input_data, exclude)
def local_predict(models, test_reader, output, args, options=None, exclude=None): """Get local predictions and combine them to get a final prediction """ single_model = len(models) == 1 kwargs = {"full": True, "missing_strategy": args.missing_strategy} if single_model: local_model = Model(models[0], api=args.retrieve_api_) else: local_model = Ensemble(models, max_models=args.max_batch_models, api=args.retrieve_api_) kwargs.update({"method": args.method, "options": options, "median": args.median}) if args.operating_point_: kwargs.update({"operating_point": args.operating_point_}) for input_data in test_reader: input_data_dict = dict(zip(test_reader.raw_headers, input_data)) prediction = local_model.predict( input_data_dict, **kwargs) if single_model and args.median and local_model.tree.regression: # only single models' predictions can be based on the median value # predict prediction["prediction"] = prediction["median"] write_prediction(prediction, output, args.prediction_info, input_data, exclude)
def local_predict(models, test_reader, output, args, options=None, exclude=None): """Get local predictions and combine them to get a final prediction """ single_model = len(models) == 1 kwargs = {"full": True, "missing_strategy": args.missing_strategy} if single_model: local_model = Model(models[0], api=args.retrieve_api_) else: local_model = Ensemble(models, max_models=args.max_batch_models, api=args.retrieve_api_) kwargs.update({ "method": args.method, "options": options, "median": args.median }) if args.operating_point_: kwargs.update({"operating_point": args.operating_point_}) for input_data in test_reader: input_data_dict = dict(zip(test_reader.raw_headers, input_data)) prediction = local_model.predict(input_data_dict, **kwargs) if single_model and args.median and local_model.tree.regression: # only single models' predictions can be based on the median value # predict prediction["prediction"] = prediction["median"] write_prediction(prediction, output, args.prediction_info, input_data, exclude)
def test_local_model(model_name): # Create local_model object print("Creating local model from file .... ") model_file = glob.glob(os.path.join(MODEL_STORAGE, model_name, "model_*")) local_model = Model(model_file[0]) predict_storage = os.path.join(PREDICT_STORAGE, model_name) if not os.path.exists(predict_storage): print("Creating predict directory .... ") os.makedirs(predict_storage) predict_storage_local = os.path.join(predict_storage, "local_model_result") if not os.path.exists(predict_storage_local): print("Creating predict directory .... ") os.makedirs(predict_storage_local) print("Start predicting .... ") print(" Opening testing data") training_data_path = os.path.join(DATASET_STORAGE, model_name, model_name) + "_test.csv" with open(training_data_path, 'r') as test_handler, open(os.path.join(predict_storage_local, "PREDICT.txt"), 'w') as fh: reader = csv.DictReader(test_handler) counter = 1 tmp = "" for input_data in reader: tmp = tmp + "=================================\n" print("=================================") tmp = tmp + "===== Prediction " + str(counter) + " ========\n" print("===== Prediction ", counter, " ========") tmp = tmp + "=================================\n" print("=================================") tmp = tmp + "Input testing data : " + str(input_data) + "\n" print("Input testing data : ", input_data) predict_result = local_model.predict(input_data) tmp = tmp + ">> Prediction : " + str(predict_result) + "\n\n" print(">> Prediction : ", predict_result, "\n") fh.write(tmp) counter = counter + 1
class BigMLTreeExtractor(TreeExtractor): def __init__(self, data, epsilon=0.01, rounding=None, black_box=False): self.black_box = black_box if not self.black_box: model_id = models[data] # retrieve a model from local storage or from bigml.io # (only works for public models) try: self.model = Model('model/{}'.format(model_id), api=BigML(storage=STORAGE)) except ValueError: self.model = Model('public/model/{}'.format(model_id), api=BigML(storage=STORAGE)) self.leaves = self.model.tree.get_leaves() else: logging.info('Extracting a Black Box Model') self.model_id = black_box_models[data] # get the black-box model with the real credentials for sanity # checks try: self.model = Model('model/{}'.format(self.model_id), api=BigML(username='******', api_key=BB_KEY)) except ValueError: self.model = Model('public/model/{}'.format(self.model_id), api=BigML(storage=STORAGE)) self.connection = BigML() TreeExtractor.__init__(self, epsilon, rounding) def get_classes(self): tree = self.model.tree for key, val in util.sort_fields(tree.fields): if key == tree.objective_id: return [str(x[0]) for x in val['summary']['categories']] def get_features(self): """ Parse the BigML tree model to get all the features """ features = [] tree = self.model.tree for key, val in util.sort_fields(tree.fields): if key and key != tree.objective_id: ftype = str(val['optype']) if ftype == "numeric": features.append( ContFeature(str(val['name']), key, val['summary']['minimum'], val['summary']['maximum'])) elif ftype == "categorical": categories = sorted([ str(name) for (name, _) in val['summary']['categories'] ]) features.append( CatFeature(str(val['name']), key, list(categories))) else: raise ValueError("Unknown feature type {}".format(ftype)) return features def make_prediction(self, query): if not self.black_box: res = self.model.predict(query, add_confidence=True, add_distribution=True, add_path=True, add_next=True) # simulate the "fields" information in the prediction response features = get_features_on_prediction_path(res['path']) features = features.union(set(query.keys())) if res['next']: features = features.union(set([str(res['next'])])) res_id = LeafID(res['prediction'], res['confidence'], self.rounding, features) return res_id else: logging.info('{}: {}'.format(self.queryCount, query)) headers = {'content-type': 'application/json'} url = 'https://bigml.io/prediction' + self.connection.auth payload = { "model": "public/model/{}".format(self.model_id), "input_data": query } r = requests.post(url, data=json.dumps(payload), headers=headers) print 'request took {} ms'.format(1000 * r.elapsed.total_seconds()) res = r.json() fields = [ str(f['name']) for (k, f) in res['fields'].iteritems() if k != res['objective_field'] ] res_id = LeafID(res['prediction'].values()[0], res['confidence'], self.rounding, fields) logging.info('{}'.format(res_id)) return res_id def get_leaves(self): if not self.black_box: paths = [ parse_path(leaf['path'], self.features) for leaf in self.leaves ] return [(LeafID(leaf['output'], leaf['confidence'], self.rounding, predicate_names(path)), path) for (leaf, path) in zip(self.leaves, paths)] else: raise NotImplementedError()
def localSinglePred(model, vals1, vals2): input_data2 = {"features1": vals1, "features2": vals2} local_model = Model(model) # model Id local_model.predict(input_data2) # add_confidence=True)
# Requires BigML Python bindings # # Install via: pip install bigml # # or clone it: # git clone https://github.com/bigmlcom/python.git from bigml.model import Model from bigml.api import BigML # Downloads and generates a local version of the model, if it # hasn't been downloaded previously. model = Model('model/5900dbaf014404467d000811', api=BigML("jaimevalero78", "6d685bf8cd3873a510b86500895071bcdd3d0990", dev_mode=True, domain="bigml.io")) # To make predictions fill the desired input_data # (e.g. {"petal length": 1, "sepal length": 3}) # as first parameter in next line. model.predict({}, with_confidence=True) # The result is a list of three elements: prediction, confidence and # distribution
from bigml.api import BigML from bigml.model import Model api = BigML("friendlycoconut", "936583948d0c870ccb5cb004afcf6c13f086c900") source = api.create_source('https://static.bigml.com/csv/diabetes.csv') api.ok(source) dataset = api.create_dataset(source) api.ok(dataset) model = api.create_model(dataset) api.ok(model) local_model = Model(model) input_data = {"age": 65, "bmi": 36, "plasma glucose": 180, "pregnancies": 3} local_model.predict(input_data, add_confidence=True)
#!/usr/bin/env python from bigml.api import BigML from bigml.model import Model from bigml.ensemble import Ensemble from bigml.anomaly import Anomaly api = BigML(dev_mode=True) model = api.get_model("model/563a1c7a3cd25747430023ce") prediction = api.create_prediction(model, {"petal length": 4.07, "sepal width": 3.15, "petal width": 1.51}) local_model = Model("model/56430eb8636e1c79b0001f90", api=api) prediction = local_model.predict( {"petal length": 0.96, "sepal width": 4.1, "petal width": 2.52}, 2, add_confidence=True, multiple=3 ) local_model = Ensemble("ensemble/564a02d5636e1c79b5006e13", api=api) local_model = Ensemble("ensemble/564a081bc6c19b6cf3011c60", api=api) prediction = local_model.predict( {"petal length": 0.95, "sepal width": 3.9, "petal width": 1.51, "sepal length": 7.0}, method=2, add_confidence=True ) local_ensemble = Ensemble("ensemble/564623d4636e1c79b00051f7", api=api) prediction = local_ensemble.predict({"Price": 5.8, "Grape": "Pinot Grigio", "Country": "Italy", "Rating": 92}, True) local_anomaly = Anomaly("anomaly/564c5a76636e1c3d52000007", api=api) prediction = local_anomaly.anomaly_score( {"petal length": 4.07, "sepal width": 3.15, "petal width": 1.51, "sepal length": 6.02, "species": "Iris-setosa"}, True, ) prediction = local_anomaly.anomaly_score(
class BigMLTreeExtractor(TreeExtractor): def __init__(self, data, epsilon=0.01, rounding=None, black_box=False): self.black_box = black_box if not self.black_box: model_id = models[data] # retrieve a model from local storage or from bigml.io # (only works for public models) try: self.model = Model('model/{}'.format(model_id), api=BigML(storage=STORAGE)) except ValueError: self.model = Model('public/model/{}'.format(model_id), api=BigML(storage=STORAGE)) self.leaves = self.model.tree.get_leaves() else: logging.info('Extracting a Black Box Model') self.model_id = black_box_models[data] # get the black-box model with the real credentials for sanity # checks try: self.model = Model('model/{}'.format(self.model_id), api=BigML(username='******', api_key=BB_KEY)) except ValueError: self.model = Model('public/model/{}'.format(self.model_id), api=BigML(storage=STORAGE)) self.connection = BigML() TreeExtractor.__init__(self, epsilon, rounding) def get_classes(self): tree = self.model.tree for key, val in util.sort_fields(tree.fields): if key == tree.objective_id: return [str(x[0]) for x in val['summary']['categories']] def get_features(self): """ Parse the BigML tree model to get all the features """ features = [] tree = self.model.tree for key, val in util.sort_fields(tree.fields): if key and key != tree.objective_id: ftype = str(val['optype']) if ftype == "numeric": features.append(ContFeature(str(val['name']), key, val['summary']['minimum'], val['summary']['maximum'])) elif ftype == "categorical": categories = sorted([str(name) for (name, _) in val['summary']['categories']]) features.append(CatFeature(str(val['name']), key, list(categories))) else: raise ValueError("Unknown feature type {}".format(ftype)) return features def make_prediction(self, query): if not self.black_box: res = self.model.predict(query, add_confidence=True, add_distribution=True, add_path=True, add_next=True) # simulate the "fields" information in the prediction response features = get_features_on_prediction_path(res['path']) features = features.union(set(query.keys())) if res['next']: features = features.union(set([str(res['next'])])) res_id = LeafID(res['prediction'], res['confidence'], self.rounding, features) return res_id else: logging.info('{}: {}'.format(self.queryCount, query)) headers = {'content-type': 'application/json'} url = 'https://bigml.io/prediction'+self.connection.auth payload = { "model": "public/model/{}".format(self.model_id), "input_data": query } r = requests.post(url, data=json.dumps(payload), headers=headers) print 'request took {} ms'.format(1000*r.elapsed.total_seconds()) res = r.json() fields = [str(f['name']) for (k, f) in res['fields'].iteritems() if k != res['objective_field']] res_id = LeafID(res['prediction'].values()[0], res['confidence'], self.rounding, fields) logging.info('{}'.format(res_id)) return res_id def get_leaves(self): if not self.black_box: paths = [parse_path(leaf['path'], self.features) for leaf in self.leaves] return [(LeafID(leaf['output'], leaf['confidence'], self.rounding, predicate_names(path)), path) for (leaf, path) in zip(self.leaves, paths)] else: raise NotImplementedError()