Exemple #1
0
def local_predict(models,
                  test_reader,
                  output,
                  args,
                  options=None,
                  exclude=None):
    """Get local predictions and combine them to get a final prediction

    """
    single_model = len(models) == 1
    test_set_header = test_reader.has_headers()
    kwargs = {
        "by_name": test_set_header,
        "with_confidence": True,
        "missing_strategy": args.missing_strategy
    }
    if single_model:
        local_model = Model(models[0])
    else:
        local_model = Ensemble(models, max_models=args.max_batch_models)
        kwargs.update({
            "method": args.method,
            "options": options,
            "median": args.median
        })
    for input_data in test_reader:
        input_data_dict = dict(zip(test_reader.raw_headers, input_data))
        prediction = local_model.predict(input_data_dict, **kwargs)
        if single_model and args.median and local_model.tree.regression:
            # only single models' predictions can be based on the median value
            # predict
            prediction[0] = prediction[-1]
        write_prediction(prediction[0:2], output, args.prediction_info,
                         input_data, exclude)
Exemple #2
0
def local_predict(models, test_reader, output, args, options=None,
                  exclude=None):
    """Get local predictions and combine them to get a final prediction

    """
    single_model = len(models) == 1
    test_set_header = test_reader.has_headers()
    kwargs = {"by_name": test_set_header, "with_confidence": True,
              "missing_strategy": args.missing_strategy}
    if single_model:
        local_model = Model(models[0])
    else:
        local_model = Ensemble(models, max_models=args.max_batch_models)
        kwargs.update({"method": args.method, "options": options,
                       "median": args.median})
    for input_data in test_reader:
        input_data_dict = dict(zip(test_reader.raw_headers, input_data))
        prediction = local_model.predict(
            input_data_dict, **kwargs)
        if single_model and args.median and local_model.tree.regression:
            # only single models' predictions can be based on the median value
            # predict
            prediction[0] = prediction[-1]
        write_prediction(prediction[0: 2],
                         output,
                         args.prediction_info, input_data, exclude)
Exemple #3
0
def local_predict(models, test_reader, output, args, options=None,
                  exclude=None):
    """Get local predictions and combine them to get a final prediction

    """
    single_model = len(models) == 1
    kwargs = {"full": True,
              "missing_strategy": args.missing_strategy}
    if single_model:
        local_model = Model(models[0], api=args.retrieve_api_)
    else:
        local_model = Ensemble(models, max_models=args.max_batch_models,
                               api=args.retrieve_api_)
        kwargs.update({"method": args.method, "options": options,
                       "median": args.median})
    if args.operating_point_:
        kwargs.update({"operating_point": args.operating_point_})

    for input_data in test_reader:
        input_data_dict = dict(zip(test_reader.raw_headers, input_data))
        prediction = local_model.predict(
            input_data_dict, **kwargs)
        if single_model and args.median and local_model.tree.regression:
            # only single models' predictions can be based on the median value
            # predict
            prediction["prediction"] = prediction["median"]
        write_prediction(prediction,
                         output,
                         args.prediction_info, input_data, exclude)
Exemple #4
0
def local_predict(models,
                  test_reader,
                  output,
                  args,
                  options=None,
                  exclude=None):
    """Get local predictions and combine them to get a final prediction

    """
    single_model = len(models) == 1
    kwargs = {"full": True, "missing_strategy": args.missing_strategy}
    if single_model:
        local_model = Model(models[0], api=args.retrieve_api_)
    else:
        local_model = Ensemble(models,
                               max_models=args.max_batch_models,
                               api=args.retrieve_api_)
        kwargs.update({
            "method": args.method,
            "options": options,
            "median": args.median
        })
    if args.operating_point_:
        kwargs.update({"operating_point": args.operating_point_})

    for input_data in test_reader:
        input_data_dict = dict(zip(test_reader.raw_headers, input_data))
        prediction = local_model.predict(input_data_dict, **kwargs)
        if single_model and args.median and local_model.tree.regression:
            # only single models' predictions can be based on the median value
            # predict
            prediction["prediction"] = prediction["median"]
        write_prediction(prediction, output, args.prediction_info, input_data,
                         exclude)
def test_local_model(model_name):
    
    # Create local_model object
    print("Creating local model from file .... ")
    model_file = glob.glob(os.path.join(MODEL_STORAGE, model_name, "model_*"))
    local_model = Model(model_file[0])
    
    predict_storage = os.path.join(PREDICT_STORAGE, model_name)
    if not os.path.exists(predict_storage):
        print("Creating predict directory .... ")
        os.makedirs(predict_storage)
    predict_storage_local = os.path.join(predict_storage, "local_model_result")
    if not os.path.exists(predict_storage_local):
        print("Creating predict directory .... ")
        os.makedirs(predict_storage_local)
    print("Start predicting .... ")
    print("    Opening testing data")
    training_data_path = os.path.join(DATASET_STORAGE, model_name, model_name) + "_test.csv"
    with open(training_data_path, 'r') as test_handler, open(os.path.join(predict_storage_local, "PREDICT.txt"), 'w') as fh:
        reader = csv.DictReader(test_handler)
        counter = 1
        tmp = ""
        for input_data in reader:
            tmp = tmp + "=================================\n"
            print("=================================")
            tmp = tmp + "=====  Prediction " + str(counter) + "  ========\n"
            print("=====  Prediction ", counter, "  ========")
            tmp = tmp + "=================================\n"
            print("=================================")
            tmp = tmp + "Input testing data : " + str(input_data) + "\n"
            print("Input testing data : ", input_data)
            predict_result = local_model.predict(input_data)
            tmp = tmp + ">> Prediction : " + str(predict_result) + "\n\n"
            print(">> Prediction : ", predict_result, "\n")
            fh.write(tmp)
            counter = counter + 1
Exemple #6
0
class BigMLTreeExtractor(TreeExtractor):
    def __init__(self, data, epsilon=0.01, rounding=None, black_box=False):
        self.black_box = black_box

        if not self.black_box:
            model_id = models[data]
            # retrieve a model from local storage or from bigml.io
            # (only works for public models)
            try:
                self.model = Model('model/{}'.format(model_id),
                                   api=BigML(storage=STORAGE))
            except ValueError:
                self.model = Model('public/model/{}'.format(model_id),
                                   api=BigML(storage=STORAGE))
            self.leaves = self.model.tree.get_leaves()
        else:
            logging.info('Extracting a Black Box Model')
            self.model_id = black_box_models[data]

            # get the black-box model with the real credentials for sanity
            # checks
            try:
                self.model = Model('model/{}'.format(self.model_id),
                                   api=BigML(username='******',
                                             api_key=BB_KEY))
            except ValueError:
                self.model = Model('public/model/{}'.format(self.model_id),
                                   api=BigML(storage=STORAGE))

            self.connection = BigML()

        TreeExtractor.__init__(self, epsilon, rounding)

    def get_classes(self):
        tree = self.model.tree
        for key, val in util.sort_fields(tree.fields):
            if key == tree.objective_id:
                return [str(x[0]) for x in val['summary']['categories']]

    def get_features(self):
        """
        Parse the BigML tree model to get all the features
        """
        features = []
        tree = self.model.tree
        for key, val in util.sort_fields(tree.fields):
            if key and key != tree.objective_id:
                ftype = str(val['optype'])
                if ftype == "numeric":
                    features.append(
                        ContFeature(str(val['name']), key,
                                    val['summary']['minimum'],
                                    val['summary']['maximum']))
                elif ftype == "categorical":
                    categories = sorted([
                        str(name) for (name, _) in val['summary']['categories']
                    ])
                    features.append(
                        CatFeature(str(val['name']), key, list(categories)))
                else:
                    raise ValueError("Unknown feature type {}".format(ftype))
        return features

    def make_prediction(self, query):
        if not self.black_box:
            res = self.model.predict(query,
                                     add_confidence=True,
                                     add_distribution=True,
                                     add_path=True,
                                     add_next=True)

            # simulate the "fields" information in the prediction response
            features = get_features_on_prediction_path(res['path'])
            features = features.union(set(query.keys()))
            if res['next']:
                features = features.union(set([str(res['next'])]))

            res_id = LeafID(res['prediction'], res['confidence'],
                            self.rounding, features)
            return res_id
        else:
            logging.info('{}: {}'.format(self.queryCount, query))
            headers = {'content-type': 'application/json'}
            url = 'https://bigml.io/prediction' + self.connection.auth

            payload = {
                "model": "public/model/{}".format(self.model_id),
                "input_data": query
            }

            r = requests.post(url, data=json.dumps(payload), headers=headers)
            print 'request took {} ms'.format(1000 * r.elapsed.total_seconds())

            res = r.json()
            fields = [
                str(f['name']) for (k, f) in res['fields'].iteritems()
                if k != res['objective_field']
            ]
            res_id = LeafID(res['prediction'].values()[0], res['confidence'],
                            self.rounding, fields)
            logging.info('{}'.format(res_id))

            return res_id

    def get_leaves(self):
        if not self.black_box:
            paths = [
                parse_path(leaf['path'], self.features) for leaf in self.leaves
            ]

            return [(LeafID(leaf['output'], leaf['confidence'], self.rounding,
                            predicate_names(path)), path)
                    for (leaf, path) in zip(self.leaves, paths)]
        else:
            raise NotImplementedError()
Exemple #7
0
    def localSinglePred(model, vals1, vals2):

        input_data2 = {"features1": vals1, "features2": vals2}
        local_model = Model(model)  # model Id
        local_model.predict(input_data2)  # add_confidence=True)
# Requires BigML Python bindings
#
# Install via: pip install bigml
#
# or clone it:
#   git clone https://github.com/bigmlcom/python.git

from bigml.model import Model
from bigml.api import BigML

# Downloads and generates a local version of the model, if it
# hasn't been downloaded previously.

model = Model('model/5900dbaf014404467d000811',
              api=BigML("jaimevalero78",
                        "6d685bf8cd3873a510b86500895071bcdd3d0990",
                        dev_mode=True,
                        domain="bigml.io"))

# To make predictions fill the desired input_data
# (e.g. {"petal length": 1, "sepal length": 3})
# as first parameter in next line.
model.predict({}, with_confidence=True)

# The result is a list of three elements: prediction, confidence and
# distribution

Exemple #9
0
from bigml.api import BigML
from bigml.model import Model

api = BigML("friendlycoconut", "936583948d0c870ccb5cb004afcf6c13f086c900")

source = api.create_source('https://static.bigml.com/csv/diabetes.csv')
api.ok(source)
dataset = api.create_dataset(source)

api.ok(dataset)
model = api.create_model(dataset)
api.ok(model)

local_model = Model(model)
input_data = {"age": 65, "bmi": 36, "plasma glucose": 180, "pregnancies": 3}
local_model.predict(input_data, add_confidence=True)
Exemple #10
0
#!/usr/bin/env python

from bigml.api import BigML
from bigml.model import Model
from bigml.ensemble import Ensemble
from bigml.anomaly import Anomaly

api = BigML(dev_mode=True)
model = api.get_model("model/563a1c7a3cd25747430023ce")
prediction = api.create_prediction(model, {"petal length": 4.07, "sepal width": 3.15, "petal width": 1.51})

local_model = Model("model/56430eb8636e1c79b0001f90", api=api)
prediction = local_model.predict(
    {"petal length": 0.96, "sepal width": 4.1, "petal width": 2.52}, 2, add_confidence=True, multiple=3
)

local_model = Ensemble("ensemble/564a02d5636e1c79b5006e13", api=api)
local_model = Ensemble("ensemble/564a081bc6c19b6cf3011c60", api=api)
prediction = local_model.predict(
    {"petal length": 0.95, "sepal width": 3.9, "petal width": 1.51, "sepal length": 7.0}, method=2, add_confidence=True
)

local_ensemble = Ensemble("ensemble/564623d4636e1c79b00051f7", api=api)
prediction = local_ensemble.predict({"Price": 5.8, "Grape": "Pinot Grigio", "Country": "Italy", "Rating": 92}, True)

local_anomaly = Anomaly("anomaly/564c5a76636e1c3d52000007", api=api)
prediction = local_anomaly.anomaly_score(
    {"petal length": 4.07, "sepal width": 3.15, "petal width": 1.51, "sepal length": 6.02, "species": "Iris-setosa"},
    True,
)
prediction = local_anomaly.anomaly_score(
Exemple #11
0
class BigMLTreeExtractor(TreeExtractor):

    def __init__(self, data, epsilon=0.01, rounding=None, black_box=False):
        self.black_box = black_box

        if not self.black_box:
            model_id = models[data]
            # retrieve a model from local storage or from bigml.io
            # (only works for public models)
            try:
                self.model = Model('model/{}'.format(model_id),
                                   api=BigML(storage=STORAGE))
            except ValueError:
                self.model = Model('public/model/{}'.format(model_id),
                                   api=BigML(storage=STORAGE))
            self.leaves = self.model.tree.get_leaves()
        else:
            logging.info('Extracting a Black Box Model')
            self.model_id = black_box_models[data]

            # get the black-box model with the real credentials for sanity
            # checks
            try:
                self.model = Model('model/{}'.format(self.model_id),
                                   api=BigML(username='******',
                                             api_key=BB_KEY))
            except ValueError:
                self.model = Model('public/model/{}'.format(self.model_id),
                                   api=BigML(storage=STORAGE))

            self.connection = BigML()

        TreeExtractor.__init__(self, epsilon, rounding)

    def get_classes(self):
        tree = self.model.tree
        for key, val in util.sort_fields(tree.fields):
            if key == tree.objective_id:
                return [str(x[0]) for x in val['summary']['categories']]

    def get_features(self):
        """
        Parse the BigML tree model to get all the features
        """
        features = []
        tree = self.model.tree
        for key, val in util.sort_fields(tree.fields):
            if key and key != tree.objective_id:
                ftype = str(val['optype'])
                if ftype == "numeric":
                    features.append(ContFeature(str(val['name']), key,
                                                val['summary']['minimum'],
                                                val['summary']['maximum']))
                elif ftype == "categorical":
                    categories = sorted([str(name) for (name, _)
                                         in val['summary']['categories']])
                    features.append(CatFeature(str(val['name']), key,
                                               list(categories)))
                else:
                    raise ValueError("Unknown feature type {}".format(ftype))
        return features

    def make_prediction(self, query):
        if not self.black_box:
            res = self.model.predict(query,
                                     add_confidence=True,
                                     add_distribution=True,
                                     add_path=True,
                                     add_next=True)

            # simulate the "fields" information in the prediction response
            features = get_features_on_prediction_path(res['path'])
            features = features.union(set(query.keys()))
            if res['next']:
                features = features.union(set([str(res['next'])]))

            res_id = LeafID(res['prediction'], res['confidence'],
                            self.rounding, features)
            return res_id
        else:
            logging.info('{}: {}'.format(self.queryCount, query))
            headers = {'content-type': 'application/json'}
            url = 'https://bigml.io/prediction'+self.connection.auth

            payload = {
                "model": "public/model/{}".format(self.model_id),
                "input_data": query
            }

            r = requests.post(url, data=json.dumps(payload), headers=headers)
            print 'request took {} ms'.format(1000*r.elapsed.total_seconds())

            res = r.json()
            fields = [str(f['name']) for (k, f) in res['fields'].iteritems()
                      if k != res['objective_field']]
            res_id = LeafID(res['prediction'].values()[0], res['confidence'],
                            self.rounding, fields)
            logging.info('{}'.format(res_id))

            return res_id

    def get_leaves(self):
        if not self.black_box:
            paths = [parse_path(leaf['path'], self.features)
                     for leaf in self.leaves]

            return [(LeafID(leaf['output'], leaf['confidence'],
                            self.rounding, predicate_names(path)), path)
                    for (leaf, path) in zip(self.leaves, paths)]
        else:
            raise NotImplementedError()