Exemple #1
0
def create_ensembles(datasets, ensemble_ids, ensemble_args, args,
                     number_of_ensembles=1,
                     api=None, path=None, session_file=None, log=None):
    """Create ensembles from input data

    """

    if api is None:
        api = bigml.api.BigML()
    ensembles = ensemble_ids[:]
    existing_ensembles = len(ensembles)
    model_ids = []
    ensemble_args_list = []
    if isinstance(ensemble_args, list):
        ensemble_args_list = ensemble_args
    if args.dataset_off and args.evaluate:
        args.test_dataset_ids = datasets[:]
    if not args.multi_label:
        datasets = datasets[existing_ensembles:]
    if number_of_ensembles > 0:
        message = dated("Creating %s.\n" %
                        plural("ensemble", number_of_ensembles))
        log_message(message, log_file=session_file,
                    console=args.verbosity)
        inprogress = []
        for i in range(0, number_of_ensembles):
            wait_for_available_tasks(inprogress, args.max_parallel_ensembles,
                                     api, "ensemble",
                                     wait_step=args.number_of_models)

            if ensemble_args_list:
                ensemble_args = ensemble_args_list[i]

            if args.dataset_off and args.evaluate:
                multi_dataset = args.test_dataset_ids[:]
                del multi_dataset[i + existing_ensembles]
                ensemble = api.create_ensemble(multi_dataset,
                                               ensemble_args,
                                               retries=None)
            else:
                ensemble = api.create_ensemble(datasets, ensemble_args,
                                               retries=None)
            ensemble_id = check_resource_error(ensemble,
                                               "Failed to create ensemble: ")
            log_message("%s\n" % ensemble_id, log_file=log)
            ensemble_ids.append(ensemble_id)
            inprogress.append(ensemble_id)
            ensembles.append(ensemble)
            log_created_resources("ensembles", path, ensemble_id,
                                  mode='a')
        models, model_ids = retrieve_ensembles_models(ensembles, api, path)
        if number_of_ensembles < 2 and args.verbosity:
            message = dated("Ensemble created: %s\n" %
                            get_url(ensemble))
            log_message(message, log_file=session_file,
                        console=args.verbosity)
            if args.reports:
                report(args.reports, path, ensemble)

    return ensembles, ensemble_ids, models, model_ids
Exemple #2
0
def create_ensembles(datasets,
                     ensemble_ids,
                     ensemble_args,
                     args,
                     number_of_ensembles=1,
                     api=None,
                     path=None,
                     session_file=None,
                     log=None):
    """Create ensembles from input data

    """
    if api is None:
        api = bigml.api.BigML()
    ensembles = ensemble_ids[:]
    model_ids = []
    ensemble_args_list = []
    if isinstance(ensemble_args, list):
        ensemble_args_list = ensemble_args
    if number_of_ensembles > 0:
        message = dated("Creating %s.\n" %
                        plural("ensemble", number_of_ensembles))
        log_message(message, log_file=session_file, console=args.verbosity)
        query_string = ALL_FIELDS_QS
        inprogress = []
        for i in range(0, number_of_ensembles):
            wait_for_available_tasks(inprogress,
                                     args.max_parallel_ensembles,
                                     api.get_ensemble,
                                     "ensemble",
                                     query_string=query_string,
                                     wait_step=args.number_of_models)

            if ensemble_args_list:
                ensemble_args = ensemble_args_list[i]
            ensemble = api.create_ensemble(datasets, ensemble_args)
            ensemble_id = check_resource_error(ensemble,
                                               "Failed to create ensemble: ")
            log_message("%s\n" % ensemble_id, log_file=log)
            ensemble_ids.append(ensemble_id)
            inprogress.append(ensemble_id)
            ensembles.append(ensemble)
            log_created_resources("ensembles",
                                  path,
                                  ensemble_id,
                                  open_mode='a')
        models, model_ids = retrieve_ensembles_models(ensembles, api, path)
        if number_of_ensembles < 2 and args.verbosity:
            message = dated("Ensemble created: %s.\n" % get_url(ensemble))
            log_message(message, log_file=session_file, console=args.verbosity)
            if args.reports:
                report(args.reports, path, ensemble)

    return ensembles, ensemble_ids, models, model_ids
Exemple #3
0
def create_ensembles(datasets, ensemble_ids, ensemble_args, args,
                     number_of_ensembles=1,
                     api=None, path=None, session_file=None, log=None):
    """Create ensembles from input data

    """
    if api is None:
        api = bigml.api.BigML()
    ensembles = ensemble_ids[:]
    model_ids = []
    ensemble_args_list = []
    if isinstance(ensemble_args, list):
        ensemble_args_list = ensemble_args
    if number_of_ensembles > 0:
        message = dated("Creating %s.\n" %
                        plural("ensemble", number_of_ensembles))
        log_message(message, log_file=session_file,
                    console=args.verbosity)
        query_string = ALL_FIELDS_QS
        inprogress = []
        for i in range(0, number_of_ensembles):
            wait_for_available_tasks(inprogress, args.max_parallel_ensembles,
                                     api.get_ensemble, "ensemble",
                                     query_string=query_string,
                                     wait_step=args.number_of_models)

            if ensemble_args_list:
                ensemble_args = ensemble_args_list[i]
            ensemble = api.create_ensemble(datasets, ensemble_args)
            ensemble_id = check_resource_error(ensemble,
                                               "Failed to create ensemble: ")
            log_message("%s\n" % ensemble_id, log_file=log)
            ensemble_ids.append(ensemble_id)
            inprogress.append(ensemble_id)
            ensembles.append(ensemble)
            log_created_resources("ensembles", path, ensemble_id,
                                  open_mode='a')
        models, model_ids = retrieve_ensembles_models(ensembles, api, path)
        if number_of_ensembles < 2 and args.verbosity:
            message = dated("Ensemble created: %s.\n" %
                            get_url(ensemble))
            log_message(message, log_file=session_file,
                        console=args.verbosity)
            if args.reports:
                report(args.reports, path, ensemble)

    return ensembles, ensemble_ids, models, model_ids
Exemple #4
0
def create_ensemble(dataset, ensemble_args, args, api=None, path=None,
                    session_file=None, log=None):
    """Create ensemble from input data

    """
    if api is None:
        api = bigml.api.BigML()
    message = dated("Creating ensemble.\n")
    log_message(message, log_file=session_file,
                console=args.verbosity)
    ensemble = api.create_ensemble(dataset, ensemble_args)
    log_created_resources("ensemble", path,
                          bigml.api.get_ensemble_id(ensemble))
    check_resource_error(ensemble, "Failed to create ensemble: ")
    log_message("%s\n" % ensemble['resource'], log_file=log)

    return ensemble
Exemple #5
0
def create_ensembles(dataset, ensemble_ids, ensemble_args, args,
                     number_of_ensembles=1,
                     api=None, path=None, session_file=None, log=None):
    """Create ensembles from input data

    """
    if api is None:
        api = bigml.api.BigML()
    ensembles = ensemble_ids[:]
    model_ids = []
    ensemble_args_list = []
    if isinstance(ensemble_args, list):
        ensemble_args_list = ensemble_args
    if number_of_ensembles > 0:
        message = dated("Creating %s.\n" %
                        plural("ensemble", number_of_ensembles))
        log_message(message, log_file=session_file,
                    console=args.verbosity)
        query_string = ALL_FIELDS_QS
        for i in range(0, number_of_ensembles):
            if i % args.max_parallel_ensembles == 0 and i > 0:
                try:
                    ensembles[i - 1] = check_resource(
                        ensembles[i - 1], api.get_ensemble,
                        query_string=query_string)
                except ValueError, exception:
                    sys.exit("Failed to get a finished ensemble: %s" %
                             str(exception))
            if ensemble_args_list:
                ensemble_args = ensemble_args_list[i]
            ensemble = api.create_ensemble(dataset, ensemble_args)
            ensemble_id = check_resource_error(ensemble,
                                               "Failed to create ensemble: ")
            log_message("%s\n" % ensemble_id, log_file=log)
            ensemble_ids.append(ensemble_id)
            ensembles.append(ensemble)
            log_created_resources("ensembles", path, ensemble_id,
                                  open_mode='a')

        models, model_ids = retrieve_ensembles_models(ensembles, api, path)
        if number_of_ensembles < 2 and args.verbosity:
            message = dated("Ensemble created: %s.\n" %
                            get_url(ensemble))
            log_message(message, log_file=session_file,
                        console=args.verbosity)
Exemple #6
0
def create_ensemble(dataset,
                    ensemble_args,
                    args,
                    api=None,
                    path=None,
                    session_file=None,
                    log=None):
    """Create ensemble from input data

    """
    if api is None:
        api = bigml.api.BigML()
    message = dated("Creating ensemble.\n")
    log_message(message, log_file=session_file, console=args.verbosity)
    ensemble = api.create_ensemble(dataset, ensemble_args)
    log_created_resources("ensemble", path,
                          bigml.api.get_ensemble_id(ensemble))
    check_resource_error(ensemble, "Failed to create ensemble: ")
    log_message("%s\n" % ensemble['resource'], log_file=log)

    return ensemble
source_test = api.create_source('test.csv')

# DATASET CREATION

dataset_train = api.create_dataset(source_train) 
datatest_test = api.create_dataset(source_test)

# TRAIN / VAL SPLIT CREATION

trainset = api.create_dataset(dataset_train, {"name": "Train 80% ", "sample_rate": 0.8})
valset = api.create_dataset(dataset_train, {"name": "Validation 20% ", "sample_rate": 0.8, "out_of_bag": True})

# MODEL ENSEMBLE

ensemble_args = {"objective_field": "SeriousDlqin2yrs"}
ensemble = api.create_ensemble(trainset, ensemble_args)

# EVALUATION ENSEMBLE

evaluation = api.create_evaluation(ensemble, valset)

# BATCH PREDICTION ON VAL  

prediction_args = {"name": "prediction"}
batch_prediction = api.create_batch_prediction(ensemble, valset, {
    "header": True,
    "all_fields": True,
    "probabilities": True})

# BATCH PREDICTION ON TEST
prediction_args = {"name": "prediction"}