Python Ensemble Examples, bigml.ensemble.Ensemble Python Examples

Example #1

0

Show file

def local_predict(models,
                  test_reader,
                  output,
                  args,
                  options=None,
                  exclude=None):
    """Get local predictions and combine them to get a final prediction

    """
    single_model = len(models) == 1
    kwargs = {"full": True, "missing_strategy": args.missing_strategy}
    if single_model:
        local_model = Model(models[0], api=args.retrieve_api_)
    else:
        local_model = Ensemble(models,
                               max_models=args.max_batch_models,
                               api=args.retrieve_api_)
        kwargs.update({
            "method": args.method,
            "options": options,
            "median": args.median
        })
    if args.operating_point_:
        kwargs.update({"operating_point": args.operating_point_})

    for input_data in test_reader:
        input_data_dict = dict(zip(test_reader.raw_headers, input_data))
        prediction = local_model.predict(input_data_dict, **kwargs)
        if single_model and args.median and local_model.tree.regression:
            # only single models' predictions can be based on the median value
            # predict
            prediction["prediction"] = prediction["median"]
        write_prediction(prediction, output, args.prediction_info, input_data,
                         exclude)

Example #2

0

Show file

def local_predict(models,
                  test_reader,
                  output,
                  args,
                  options=None,
                  exclude=None):
    """Get local predictions and combine them to get a final prediction

    """
    single_model = len(models) == 1
    test_set_header = test_reader.has_headers()
    kwargs = {
        "by_name": test_set_header,
        "with_confidence": True,
        "missing_strategy": args.missing_strategy
    }
    if single_model:
        local_model = Model(models[0])
    else:
        local_model = Ensemble(models, max_models=args.max_batch_models)
        kwargs.update({
            "method": args.method,
            "options": options,
            "median": args.median
        })
    for input_data in test_reader:
        input_data_dict = dict(zip(test_reader.raw_headers, input_data))
        prediction = local_model.predict(input_data_dict, **kwargs)
        if single_model and args.median and local_model.tree.regression:
            # only single models' predictions can be based on the median value
            # predict
            prediction[0] = prediction[-1]
        write_prediction(prediction[0:2], output, args.prediction_info,
                         input_data, exclude)

Example #3

0

Show file

File: bigml_tester.py Project: cheesinglee/random_forest_compare

    def test_ensemble(self,test_file):
        assert self.authenticated, 'Not authenticated!'
        
        # download a local copy of the ensemble
        self.logger.info('Creating local ensemble')
        local_ensemble = Ensemble(self.ensemble_res,api=self.api)
        
        # make the Fields object
        source = self.api.get_source(self.source_res)
        fields = Fields(source['object']['fields'])
        
        self.logger.info('Reading test data and generating predictions')
        true_labels = []
        predict_labels = []
        pr = Profile()
        pr.enable()
        with open(test_file) as fid:
            test_reader = csv.reader(fid)
            # skip the header line
            test_reader.next()
            for row in test_reader:
                row_list = [val for val in row]
                true_labels.append(row_list.pop())
                instance = fields.pair(row_list)
                predict_labels.append(local_ensemble.predict(instance,
                                                         by_name=False,
                                                         method=1))

        pr.disable()
        ps = Stats(pr)
        self.predict_time = ps.total_tt
#        eval_args = {'combiner':1}
#        evaluation = self.api.create_evaluation(self.ensemble_res,test_data,eval_args)
#        check_resource(evaluation['resource'],self.api.get_evaluation)   
#        evaluation = self.api.get_evaluation(evaluation['resource'])
#        matrix = evaluation['object']['result']['model']['confusion_matrix']
#        self.predict_time = evaluation['object']['status']['elapsed']/1000
        if self.regression:
            self.results = (predict_labels,true_labels)
        else:
            self.results = make_confusion_matrix(true_labels,predict_labels)

Example #4

0

Show file

def export_code(args, api=None):
    """Generates the plugin code in the language required by the user

    """
    args.language = args.language or "javascript"

    if args.model is not None and args.language in EXPORTS:

        local_model = EXPORTS[args.language](args.model, api=api)
        generate_output(local_model, args, model_type="model")

    if args.ensemble is not None and args.language in EXPORTS:
        local_ensemble = Ensemble(args.ensemble, api=api)

        for model_id in local_ensemble.model_ids:
            local_model = EXPORTS[args.language]( \
                model_id,
                api=api,
                fields=local_ensemble.fields,
                boosting=local_ensemble.boosting)
            generate_output(local_model, args, model_type="model")

Example #5

0

Show file

from bigml.api import BigML

CURRENT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
CACHE_DIR = os.path.join(CURRENT_DIR, "cache")

api = BigML("deven96", "81795cceca568fff4115d5c047071728a0700673", storage=CACHE_DIR)
predictions = {
        "toxicity": "",
        "identity_hatred": "",
        "threat": "",
        "obscene": "",
        "severe_toxicity": "",
        "insult": "",
        }
ensembles = predictions.copy()
ensembles["toxicity"] = Ensemble('ensemble/5ddd1a3f1efc925827001f7a', api)
ensembles["identity_hatred"] = Ensemble('ensemble/5ddd1b6f5a213904ee0000ca', api)
ensembles["threat"] = Ensemble('ensemble/5ddd282959f5c31acc001a01', api)  
ensembles["obscene"] = Ensemble('ensemble/5ddd1ad959f5c31acc001999', api)
ensembles["severe_toxicity"] = Ensemble('ensemble/5ddd1aab1efc925827001f7d', api)
ensembles["insult"] = Ensemble('ensemble/5ddd1b3c5e269e4886001b8c', api)

def get_predictions(input_data):
    """
    Returns the strings gotten from each toxicity 
    api
    """
    
    for key, ensemble in ensembles.items():
        predictions[key] = ensemble.predict({"comment_text":input_data}, full=True)

Example #6

0

Show file

def create_local_ensemble_with_list(step, number_of_models):
    world.local_ensemble = Ensemble(world.models[-int(number_of_models):], world.api)

Example #7

0

Show file

def create_local_ensemble(step):
    world.local_ensemble = Ensemble(world.ensemble_id, world.api)

Example #8

0

Show file

def compute_output(api,
                   args,
                   training_set,
                   test_set=None,
                   output=None,
                   objective_field=None,
                   description=None,
                   field_attributes=None,
                   types=None,
                   dataset_fields=None,
                   model_fields=None,
                   name=None,
                   training_set_header=True,
                   test_set_header=True,
                   model_ids=None,
                   votes_files=None,
                   resume=False,
                   fields_map=None,
                   test_field_attributes=None,
                   test_types=None):
    """ Creates one or more models using the `training_set` or uses the ids
    of previously created BigML models to make predictions for the `test_set`.

    """
    source = None
    dataset = None
    model = None
    models = None
    fields = None
    other_label = OTHER
    ensemble_ids = []
    multi_label_data = None
    multi_label_fields = []
    local_ensemble = None

    # It is compulsory to have a description to publish either datasets or
    # models
    if (not description
            and (args.black_box or args.white_box or args.public_dataset)):
        sys.exit("You should provide a description to publish.")

    # When using --max-categories, it is compulsory to specify also the
    # objective_field
    if args.max_categories > 0 and objective_field is None:
        sys.exit("When --max-categories is used, you must also provide the"
                 " --objective field name or column number")

    # When using --new-fields, it is compulsory to specify also a dataset
    # id
    if args.new_fields and not args.dataset:
        sys.exit("To use --new-fields you must also provide a dataset id"
                 " to generate the new dataset from it.")

    path = u.check_dir(output)
    session_file = "%s%s%s" % (path, os.sep, SESSIONS_LOG)
    csv_properties = {}
    # If logging is required, open the file for logging
    log = None
    if args.log_file:
        u.check_dir(args.log_file)
        log = args.log_file
        # If --clear_logs the log files are cleared
        if args.clear_logs:
            try:
                open(log, 'w', 0).close()
            except IOError:
                pass

    # labels to be used in multi-label expansion
    labels = (map(str.strip, args.labels.split(','))
              if args.labels is not None else None)
    if labels is not None:
        labels = sorted([label.decode("utf-8") for label in labels])

    # multi_label file must be preprocessed to obtain a new extended file
    if args.multi_label and training_set is not None:
        (training_set, multi_label_data) = ps.multi_label_expansion(
            training_set,
            training_set_header,
            objective_field,
            args,
            path,
            labels=labels,
            session_file=session_file)
        training_set_header = True
        objective_field = multi_label_data["objective_name"]
        all_labels = l.get_all_labels(multi_label_data)
        if not labels:
            labels = all_labels
    else:
        all_labels = labels

    source, resume, csv_properties, fields = ps.source_processing(
        training_set,
        test_set,
        training_set_header,
        test_set_header,
        api,
        args,
        resume,
        name=name,
        description=description,
        csv_properties=csv_properties,
        field_attributes=field_attributes,
        types=types,
        multi_label_data=multi_label_data,
        session_file=session_file,
        path=path,
        log=log)
    if args.multi_label and source:
        multi_label_data = l.get_multi_label_data(source)
        (objective_field, labels, all_labels,
         multi_label_fields) = l.multi_label_sync(objective_field, labels,
                                                  multi_label_data, fields,
                                                  multi_label_fields)

    datasets, resume, csv_properties, fields = pd.dataset_processing(
        source,
        training_set,
        test_set,
        fields,
        objective_field,
        api,
        args,
        resume,
        name=name,
        description=description,
        dataset_fields=dataset_fields,
        multi_label_data=multi_label_data,
        csv_properties=csv_properties,
        session_file=session_file,
        path=path,
        log=log)

    if datasets:
        dataset = datasets[0]

    # If test_split is used, split the dataset in a training and a test dataset
    # according to the given split
    if args.test_split > 0:
        dataset, test_dataset, resume = pd.split_processing(
            dataset,
            api,
            args,
            resume,
            name=name,
            description=description,
            multi_label_data=multi_label_data,
            session_file=session_file,
            path=path,
            log=log)
        datasets[0] = dataset

    # Check if the dataset has a categorical objective field and it
    # has a max_categories limit for categories
    if args.max_categories > 0 and len(datasets) == 1:
        objective_id = fields.field_id(fields.objective_field)
        if pd.check_max_categories(fields.fields[objective_id]):
            distribution = pd.get_categories_distribution(
                dataset, objective_id)
            if distribution and len(distribution) > args.max_categories:
                categories = [element[0] for element in distribution]
                other_label = pd.create_other_label(categories, other_label)
                datasets, resume = pd.create_categories_datasets(
                    dataset,
                    distribution,
                    fields,
                    args,
                    api,
                    resume,
                    session_file=session_file,
                    path=path,
                    log=log,
                    other_label=other_label)
        else:
            sys.exit("The provided objective field is not categorical nor "
                     "a full terms only text field. "
                     "Only these fields can be used with"
                     "  --max-categories")

    # If multi-dataset flag is on, generate a new dataset from the given
    # list of datasets
    if args.multi_dataset:
        dataset, resume = pd.create_new_dataset(
            datasets,
            api,
            args,
            resume,
            name=name,
            description=description,
            fields=fields,
            dataset_fields=dataset_fields,
            objective_field=objective_field,
            session_file=session_file,
            path=path,
            log=log)
        datasets = [dataset]

    # Check if the dataset has a generators file associated with it, and
    # generate a new dataset with the specified field structure
    if args.new_fields:
        dataset, resume = pd.create_new_dataset(
            dataset,
            api,
            args,
            resume,
            name=name,
            description=description,
            fields=fields,
            dataset_fields=dataset_fields,
            objective_field=objective_field,
            session_file=session_file,
            path=path,
            log=log)
        datasets[0] = dataset
    if args.multi_label and dataset and multi_label_data is None:
        multi_label_data = l.get_multi_label_data(dataset)
        (objective_field, labels, all_labels,
         multi_label_fields) = l.multi_label_sync(objective_field, labels,
                                                  multi_label_data, fields,
                                                  multi_label_fields)

    if dataset:
        # retrieves max_categories data, if any
        args.max_categories = get_metadata(dataset, 'max_categories',
                                           args.max_categories)
        other_label = get_metadata(dataset, 'other_label', other_label)

    models, model_ids, ensemble_ids, resume = pm.models_processing(
        datasets,
        models,
        model_ids,
        objective_field,
        fields,
        api,
        args,
        resume,
        name=name,
        description=description,
        model_fields=model_fields,
        session_file=session_file,
        path=path,
        log=log,
        labels=labels,
        multi_label_data=multi_label_data,
        other_label=other_label)
    if models:
        model = models[0]
        single_model = len(models) == 1

    # If multi-label flag is set and no training_set was provided, label
    # info is extracted from the user_metadata. If models belong to an
    # ensemble, the ensemble must be retrieved to get the user_metadata.
    if model and args.multi_label and multi_label_data is None:
        if len(ensemble_ids) > 0 and isinstance(ensemble_ids[0], dict):
            resource = ensemble_ids[0]
        elif belongs_to_ensemble(model):
            ensemble_id = get_ensemble_id(model)
            resource = r.get_ensemble(ensemble_id,
                                      api=api,
                                      verbosity=args.verbosity,
                                      session_file=session_file)
        else:
            resource = model
        multi_label_data = l.get_multi_label_data(resource)

    # We update the model's public state if needed
    if model:
        if isinstance(model, basestring):
            if not args.evaluate:
                query_string = MINIMUM_MODEL
            else:
                query_string = r.FIELDS_QS
                model = u.check_resource(model,
                                         api.get_model,
                                         query_string=query_string)
        if (args.black_box or args.white_box
                or r.shared_changed(args.shared, model)):
            model_args = {}
            if r.shared_changed(args.shared, model):
                model_args.update(shared=args.shared)
            if args.black_box or args.white_box:
                model_args.update(r.set_publish_model_args(args))
            if model_args:
                model = r.update_model(model,
                                       model_args,
                                       args,
                                       api=api,
                                       path=path,
                                       session_file=session_file)
                models[0] = model

    # We get the fields of the model if we haven't got
    # them yet and need them
    if model and not args.evaluate and test_set:
        # If more than one model, use the full field structure
        if (not single_model and not args.multi_label
                and belongs_to_ensemble(model)):
            if len(ensemble_ids) > 0:
                ensemble_id = ensemble_ids[0]
            else:
                ensemble_id = get_ensemble_id(model)
            local_ensemble = Ensemble(ensemble_id, api=api)
        fields, objective_field = pm.get_model_fields(
            model,
            csv_properties,
            args,
            single_model=single_model,
            multi_label_data=multi_label_data,
            local_ensemble=local_ensemble)

    # Fills in all_labels from user_metadata
    if args.multi_label and not all_labels:
        (objective_field, labels, all_labels,
         multi_label_fields) = l.multi_label_sync(objective_field, labels,
                                                  multi_label_data, fields,
                                                  multi_label_fields)
    if model:
        # retrieves max_categories data, if any
        args.max_categories = get_metadata(model, 'max_categories',
                                           args.max_categories)
        other_label = get_metadata(model, 'other_label', other_label)
    # If predicting
    if models and has_test(args) and not args.evaluate:
        models_per_label = 1
        test_dataset = None

        if args.multi_label:
            # When prediction starts from existing models, the
            # multi_label_fields can be retrieved from the user_metadata
            # in the models
            if args.multi_label_fields is None and multi_label_fields:
                multi_label_field_names = [
                    field[1] for field in multi_label_fields
                ]
                args.multi_label_fields = ",".join(multi_label_field_names)
            test_set = ps.multi_label_expansion(test_set,
                                                test_set_header,
                                                objective_field,
                                                args,
                                                path,
                                                labels=labels,
                                                session_file=session_file,
                                                input_flag=True)[0]
            test_set_header = True

        # Remote predictions: predictions are computed as batch predictions
        # in bigml.com except when --no-batch flag is set on or multi-label
        # or max-categories are used
        if (args.remote and not args.no_batch and not args.multi_label
                and not args.method in [THRESHOLD_CODE, COMBINATION]):
            # create test source from file
            test_name = "%s - test" % name
            if args.test_source is None:
                (test_source, resume, csv_properties,
                 test_fields) = ps.test_source_processing(
                     test_set,
                     test_set_header,
                     api,
                     args,
                     resume,
                     name=test_name,
                     description=description,
                     field_attributes=test_field_attributes,
                     types=test_types,
                     session_file=session_file,
                     path=path,
                     log=log)
            else:
                test_source_id = bigml.api.get_source_id(args.test_source)
                test_source = api.check_resource(test_source_id,
                                                 api.get_source)
            if args.test_dataset is None:
                # create test dataset from test source
                dataset_args = r.set_basic_dataset_args(
                    test_name, description, args)
                test_dataset, resume = pd.alternative_dataset_processing(
                    test_source,
                    "test",
                    dataset_args,
                    api,
                    args,
                    resume,
                    session_file=session_file,
                    path=path,
                    log=log)
            else:
                test_dataset_id = bigml.api.get_dataset_id(args.test_dataset)
                test_dataset = api.check_resource(test_dataset_id,
                                                  api.get_dataset)

            csv_properties.update(objective_field=None,
                                  objective_field_present=False)
            test_fields = pd.get_fields_structure(test_dataset, csv_properties)

            batch_prediction_args = r.set_batch_prediction_args(
                name,
                description,
                args,
                fields=fields,
                dataset_fields=test_fields,
                fields_map=fields_map)

            remote_predict(model,
                           test_dataset,
                           batch_prediction_args,
                           args,
                           api,
                           resume,
                           prediction_file=output,
                           session_file=session_file,
                           path=path,
                           log=log)
        else:
            models_per_label = args.number_of_models
            if (args.multi_label and len(ensemble_ids) > 0
                    and args.number_of_models == 1):
                # use case where ensembles are read from a file
                models_per_label = len(models) / len(ensemble_ids)
            predict(test_set,
                    test_set_header,
                    models,
                    fields,
                    output,
                    objective_field,
                    args,
                    api=api,
                    log=log,
                    resume=resume,
                    session_file=session_file,
                    labels=labels,
                    models_per_label=models_per_label,
                    other_label=other_label,
                    multi_label_data=multi_label_data)

    # When combine_votes flag is used, retrieve the predictions files saved
    # in the comma separated list of directories and combine them
    if votes_files:
        model_id = re.sub(r'.*(model_[a-f0-9]{24})__predictions\.csv$', r'\1',
                          votes_files[0]).replace("_", "/")
        try:
            model = u.check_resource(model_id, api.get_model)
        except ValueError, exception:
            sys.exit("Failed to get model %s: %s" % (model_id, str(exception)))

        local_model = Model(model)
        message = u.dated("Combining votes.\n")
        u.log_message(message, log_file=session_file, console=args.verbosity)
        combine_votes(votes_files, local_model.to_prediction, output,
                      args.method)

Example #9

0

Show file

File: create_ensemble_steps.py Project: deven96/bigml-python

def create_local_ensemble(step):
    world.local_ensemble = Ensemble(world.ensemble_id, world.api)
    world.local_model = Model(world.local_ensemble.model_ids[0], world.api)

Example #10

0

Show file

File: create_ensemble_steps.py Project: deven96/bigml-python

def i_create_local_ensemble_from_file(step, export_file):
    world.local_ensemble = Ensemble(res_filename(export_file))

Example #11

0

Show file

File: create_ensemble_steps.py Project: deven96/bigml-python

def create_local_ensemble_with_list_of_local_models(step, number_of_models):
    local_models = [
        Model(model) for model in world.models[-int(number_of_models):]
    ]
    world.local_ensemble = Ensemble(local_models, world.api)

Example #12

0

Show file

File: test.py Project: bisoncorps/Rudeboy-PredictionEngine

from bigml.ensemble import Ensemble
# Downloads and generates a local version of the ensemble, if it
# hasn't been downloaded previously.
from bigml.api import BigML
ensemble = Ensemble('ensemble/5ddd1a3f1efc925827001f7a',
                    api=BigML("deven96",
                              "81795cceca568fff4115d5c047071728a0700673",
                              domain="bigml.io"))
# To make predictions fill the desired input_data in next line.
input_data = {}
ret = ensemble.predict({'comment_text': "f**k you c**t"}, full=True)
print(ret)

Example #13

0

Show file

File: pythonTests.py Project: bigmlcom/ML4iOS

api = BigML(dev_mode=True)
model = api.get_model("model/563a1c7a3cd25747430023ce")
prediction = api.create_prediction(model, {"petal length": 4.07, "sepal width": 3.15, "petal width": 1.51})

local_model = Model("model/56430eb8636e1c79b0001f90", api=api)
prediction = local_model.predict(
    {"petal length": 0.96, "sepal width": 4.1, "petal width": 2.52}, 2, add_confidence=True, multiple=3
)

local_model = Ensemble("ensemble/564a02d5636e1c79b5006e13", api=api)
local_model = Ensemble("ensemble/564a081bc6c19b6cf3011c60", api=api)
prediction = local_model.predict(
    {"petal length": 0.95, "sepal width": 3.9, "petal width": 1.51, "sepal length": 7.0}, method=2, add_confidence=True
)

local_ensemble = Ensemble("ensemble/564623d4636e1c79b00051f7", api=api)
prediction = local_ensemble.predict({"Price": 5.8, "Grape": "Pinot Grigio", "Country": "Italy", "Rating": 92}, True)

local_anomaly = Anomaly("anomaly/564c5a76636e1c3d52000007", api=api)
prediction = local_anomaly.anomaly_score(
    {"petal length": 4.07, "sepal width": 3.15, "petal width": 1.51, "sepal length": 6.02, "species": "Iris-setosa"},
    True,
)
prediction = local_anomaly.anomaly_score(
    {"petal length": 0.96, "sepal width": 4.1, "petal width": 2.51, "sepal length": 6.02, "species": "Iris-setosa"},
    True,
)
prediction = local_anomaly.anomaly_score({"petal length": 0.96, "sepal width": 4.1, "petal width": 2.51}, True)

api.pprint(prediction)

Example #14

0

Show file

File: ensemble.py Project: rachelhannah007/Accident-Analysis

# Requires BigML Python bindings
#
# Install via: pip install bigml
#
# or clone it:
#   git clone https://github.com/bigmlcom/python.git
from bigml.ensemble import Ensemble
# Downloads and generates a local version of the ensemble, if it
# hasn't been downloaded previously.
from bigml.api import BigML
ensemble = Ensemble('ensemble/5cacf3dceba31d30ba000d60',
                    api=BigML("rshelton",
                              "adabd734dd2a2af5cb4e49176f0eb472cfa8ce5a",
                              domain="bigml.io"))
# To make predictions fill the desired input_data in next line.
input_data = {}
ensemble.predict(input_data, full=True)
#
# input_data: dict for the input values
# (e.g. {"petal length": 1, "sepal length": 3})
# full: if set to True, the output will be a dictionary that includes all the
# available information in the predicted node. The attributes vary depending
# on the ensemble type. Please check:
# https://bigml.readthedocs.io/en/latest/#local-ensemble-s-predictions

Example #15

0

Show file

File: predictive_app_constructor.py Project: jank3/BigMl-Predictive-App

        }]
    })
API.ok(SCRIPT_model_or_ensemble)

EXECUTION_model_or_enemble = API.create_execution(
    SCRIPT_model_or_ensemble['resource'],
    {'inputs': [["ts-id", training_set]]})
API.ok(EXECUTION_model_or_enemble)

model_or_ensemble = EXECUTION_model_or_enemble["object"]["execution"]["result"]

#Locally store the model or ensemble

if model_or_ensemble[:1] == 'e':
    global local_ensemble
    local_ensemble = Ensemble(model_or_ensemble)
    picklEoR = local_ensemble
else:
    global local_model
    local_model = Model(model_or_ensemble)
    picklEoR = local_model

#batch prediction to check if the model is accurate
batch_prediction = API.create_batch_prediction(model_or_ensemble, testing_set,
                                               {"all_fields": True})
API.ok(batch_prediction)
API.download_batch_prediction(batch_prediction,
                              filename=(filename[:-4] +
                                        "-Model-or-Ensemble-Check.csv"))

#Store the data the has been created from this python file

Example #16

0

Show file

    def _predict_nba(request, context):
        """

        :param request: iterable sequence of bundled rows
        :return: string
        """
        # Disable caching by uncomment the following two lines
        #md = (('qlik-cache', 'no-store'),)
        #context.send_initial_metadata(md)

        params = []

        # Iterate over bundled rows to retrieve data
        for request_rows in request:
            # Iterate over rows
            for row in request_rows.rows:
                # Retrieve string value of parameter and append to the params variable
                # Length of param is 1 since one column is received, the [0] collects the first value in the list
                param = [d.strData for d in row.duals][0]
                print('param:', param)
                params.append(param)

        print('params:', params)

        # Possible selections to predict
        opt_selections = ['Kevin Durant',
                          'Allen Iverson',
                          'Carmelo Anthony',
                          'Isaiah Thomas',
                          'Cory Jefferson',
                          'Robbie Hummel',
                          'Wesley Johnson']

        # Check selections
        if len(params) == 1 and any([selection in params for selection in opt_selections]):
            selection = params[0].split(' ')  # list of first name and last name
            file = 'NBA_data/Demo_predictPPGPk_{}_{}'.format(selection[0], selection[1])

            with open(file, 'rb') as f:
                data = pickle.load(f)
            print('data:', data)
            correct_res = data['NBA PPG']
            del data['NBA PPG']

            try:
                # Use pre-trained ensemble
                ensemble_link = 'ensemble/5727212049c4a15ca1004b77'
                ensemble = Ensemble(ensemble_link, api=BigML(dev_mode=True, domain='bigml.io'))  # saves locally
            except:
                err = sys.exc_info()
                logging.error('Unexpected error: {}, {}, {}'.format(err[2].tb_frame.f_code.co_filename,
                                                                  err[2].tb_lineno, err[1]))

            # Predict data using the trained ensemble
            res = ensemble.predict(data, with_confidence=True)
            print('res:', res)

            result = 'Predicted number of PPG: {} <br> ' \
                     'Correct number of PPG: {} <br>' \
                     'Confidence: {}'.format(round(res[0], 1), correct_res, round(res[1], 1))
        else:
            result = 'Not possible to predict.'

        # Create an iterable of dual with the result
        duals = iter([SSE.Dual(strData=result)])

        # Yield the row data as bundled rows
        yield SSE.BundledRows(rows=[SSE.Row(duals=duals)])