def optimize_model_parameter_split(x, y, model_name=None, loss_function="accuracy", parameter=None, max_evals=100, n_folds=5, isWrite=True, times=1, problem_pattern="classification"):
    """
    hyperopt model turning
    """
    if model_name == None and parameter == None:
        print "you must set parameter or model_name"
        return None
    elif parameter != None:
        param = parameter
    elif model_name != None:
        param = parameter_dictionary[model_name]
    else:
        return None

    x_trains = []
    x_tests = []
    y_trains = []
    y_tests = []

    for time in xrange(times):
        x_train, x_test, y_train, y_test = cross_validation.train_test_split(
            x, y, test_size=0.0125)
        x_trains.append(x_train)
        x_tests.append(x_test)
        y_trains.append(y_train)
        y_tests.append(y_test)

    trials = Trials()
    function = lambda param: optimize_model_function_split(
        param, x_trains, x_tests, y_trains, y_tests, loss_function)
    print param
    print "========================================================================"
    best_param = fmin(function, param,
                      algo=tpe.suggest, max_evals=max_evals, trials=trials)
    print "========================================================================"
    print "write result to csv files"

    # write the csv file
    if isWrite:
        datas = []
        for trial_data in trials.trials:
            print trial_data
            trial_parameter_dictionary = {}
            trial_parameter_dictionary['model'] = model_name
            trial_parameter_dictionary['tid'] = trial_data['misc']['tid']
            for key, value in trial_data['misc']['vals'].items():
                print key, value[0]
                trial_parameter_dictionary[key] = value[0]
            trial_parameter_dictionary['loss'] = trial_data['result']['loss']
            trial_parameter_dictionary[
                'status'] = trial_data['result']['status']
            datas.append(trial_parameter_dictionary)
        filename = str(time.time()) + ".csv"
        dictionary_in_list_convert_to_csv(datas, filename)

    print trials.statuses()
    return best_param
def main():

    #with open(RESULTS_FILE, 'rb') as cached_pcd_file:
    #    cache_data = pickle.load(cached_pcd_file)
    #    pprint.pprint(cache_data)
    #return

    #with open(RESULTS_FILE, 'rb') as cached_pcd_file:
    #    cache_data = pickle.load(cached_pcd_file)
    #    for alg in ALGORITHMS:
    #        if ALGORITHMS[alg]["do_hyper"]:
    #            ALGORITHMS[alg]["opt_param"] = cache_data[alg]["opt_param"]

    #### STEP 1 - Get classified pointcloud ####

    environment = PointCloudEnvironment(my_print, TERRAIN_ASSESSMENT_FILE,
                                        POINTCLOUD_FILE)
    coverable_points = environment.coverable_pcd.points
    traversable_points = environment.traversable_pcd.points
    motion_planner = MotionPlanner(my_print, environment.traversable_pcd)

    #If from terrain assessment file:
    #with open(TERRAIN_ASSESSMENT_FILE, 'rb') as cached_pcd_file:
    #    cache_data = pickle.load(cached_pcd_file)
    #    coverable_points = cache_data["coverable_points"]
    #    traversable_points = cache_data["traversable_points"]
    #traversable_pcd = PointCloud(my_print, points= traversable_points)
    #motion_planner = MotionPlanner(my_print, traversable_pcd)

    #### STEP 2 - Hyper parameters ####
    for algorithm_key, algorithm in ALGORITHMS.items():
        if algorithm["do_hyper"]:
            trials = Trials()
            hyper_optimizer = HyptoOptimizer(save_data, algorithm, my_print,
                                             HYPER_START_POS, motion_planner,
                                             coverable_points)
            if algorithm_key == "BA*":
                opt_param = fmin(hyper_optimizer.hyper_test_bastar,
                                 space=(hp.uniform('angle_offset', 0,
                                                   np.pi * 2),
                                        hp.uniform('step_size', 0.5, 1),
                                        hp.uniform('visited_threshold', 0.25,
                                                   0.5)),
                                 algo=tpe.suggest,
                                 max_evals=HYPER_MAX_EVAL,
                                 trials=trials)
            elif algorithm_key == "Inward Spiral":
                opt_param = fmin(hyper_optimizer.hyper_test_inward_spiral,
                                 space=(hp.uniform('step_size', 0.5, 1),
                                        hp.uniform('visited_threshold', 0.25,
                                                   0.5)),
                                 algo=tpe.suggest,
                                 max_evals=HYPER_MAX_EVAL,
                                 trials=trials)
            elif algorithm_key == "Sampled BA*":
                coverage_2 = algorithm["hyper_min_coverage"] / 100
                opt_param = fmin(
                    hyper_optimizer.hyper_test_sampled_bastar_param,
                    space=(hp.uniform('coverage_1', 0.25, coverage_2),
                           hp.uniform('coverage_2', coverage_2 - 0.025,
                                      coverage_2),
                           hp.uniform('max_distance', 1, 10),
                           hp.uniform('max_distance_part_II', 1,
                                      20), hp.uniform('max_iterations', 30,
                                                      150),
                           hp.uniform('min_bastar_coverage', 0.005, 0.05),
                           hp.uniform('min_spiral_length', 2, 100),
                           hp.uniform('nbr_of_angles', 0.6,
                                      8.4), hp.uniform('step_size', 0.66,
                                                       1.33),
                           hp.uniform('visited_threshold', 0.25, 0.5)),
                    algo=tpe.suggest,
                    max_evals=HYPER_MAX_EVAL,
                    trials=trials)
            print(trials.statuses())
            algorithm["opt_param"] = opt_param
            algorithm["hyper_data"] = trials.trials
            ALGORITHMS[algorithm_key] = algorithm
            save_data(ALGORITHMS)

    #### STEP 3 - Full tests ####
    for start_point_nr in range(NUMBER_OF_START_POINTS):
        #start_point = get_random_point(traversable_points)
        start_point = start_points[start_point_nr]
        print("Start point " + str(start_point_nr) + ": " + str(start_point))

        for algorithm_key, algorithm in ALGORITHMS.items():
            if algorithm["do_experiment"]:
                experimenter = Experimenter(algorithm, print)
                parameters = None
                if "opt_param" in algorithm:
                    parameters = algorithm["opt_param"]

                cpp = algorithm["cpp"](my_print, motion_planner,
                                       coverable_points,
                                       algorithm["experiment_time_limit"],
                                       parameters)

                if "sample_specific_stats" in algorithm:
                    experimenter.perform_sample_cpp(cpp, start_point,
                                                    start_point_nr)
                    algorithm["sample_specific_stats"].append(
                        experimenter.sample_specific_stats)
                else:
                    experimenter.perform_cpp(cpp, start_point, start_point_nr)

                algorithm["experiment_results"].append(experimenter.results)
                ALGORITHMS[algorithm_key] = algorithm
                save_data(ALGORITHMS)
Example #3
0
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials


def objective(x):
    return {
        'loss': x**2,
        'status': STATUS_OK,
        'eval_time': time.time(),
        'other_stuff': {
            'type': None,
            'value': [0, 1, 2]
        },
        'attachments': {
            'time_module': pickle.dumps(time.time)
        }
    }


trials = Trials()
best = fmin(objective,
            space=hp.uniform('x', -10, 10),
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

print best
print trials.trials[0]
print trials.results[0]
print trials.losses()[0]
print trials.statuses()[0]
def main():

    #with open(RESULTS_FILE, 'rb') as cached_pcd_file:
    #    cache_data = pickle.load(cached_pcd_file)
    #    pprint.pprint(cache_data)
    #return

    with open(RESULTS_FILE, 'rb') as cached_pcd_file:
        cache_data = pickle.load(cached_pcd_file)
        ALGORITHMS = deepcopy(cache_data)
        for alg in ALGORITHMS:
            ALGORITHMS[alg]["do_hyper"] = False
            ALGORITHMS[alg][
                "cpp"] = lambda print, motion_planner, cov_points, time_limit, parameters: RandomBAstar3(
                    print, motion_planner, PointCloud(
                        print, points=cov_points), time_limit, parameters)

    #### STEP 1 - Get classified pointcloud ####

    environment = PointCloudEnvironment(my_print, TERRAIN_ASSESSMENT_FILE,
                                        POINTCLOUD_FILE)
    coverable_points = environment.coverable_pcd.points
    traversable_points = environment.traversable_pcd.points
    motion_planner = MotionPlanner(my_print, environment.traversable_pcd)

    #If from terrain assessment file:
    #with open(TERRAIN_ASSESSMENT_FILE, 'rb') as cached_pcd_file:
    #    cache_data = pickle.load(cached_pcd_file)
    #    coverable_points = cache_data["coverable_points"]
    #    traversable_points = cache_data["traversable_points"]
    #traversable_pcd = PointCloud(my_print, points= traversable_points)
    #motion_planner = MotionPlanner(my_print, traversable_pcd)

    #### STEP 2 - Hyper parameters ####
    for algorithm_key, algorithm in ALGORITHMS.items():
        if algorithm["do_hyper"]:
            trials = Trials()
            hyper_optimizer = HyptoOptimizer(save_data, algorithm, my_print,
                                             HYPER_START_POS, motion_planner,
                                             coverable_points)
            opt_param = fmin(
                hyper_optimizer.hyper_test_newest_sampled_bastar_param,
                space=(hp.uniform('ba_exploration', 0.75,
                                  0.95), hp.uniform('max_distance', 1, 5),
                       hp.uniform('max_distance_part_II', 4, 10),
                       hp.uniform('min_bastar_cost_per_coverage', 5000, 10000),
                       hp.uniform('min_spiral_cost_per_coverage', 10000,
                                  20000), hp.uniform('step_size', 0.5, 1.0),
                       hp.uniform('visited_threshold', 0.25, 0.5)),
                algo=tpe.suggest,
                max_evals=HYPER_MAX_EVAL,
                trials=trials)
            print(trials.statuses())
            algorithm["opt_param"] = opt_param
            algorithm["hyper_data"] = trials.trials
            ALGORITHMS[algorithm_key] = algorithm
            save_data(ALGORITHMS)

    #### STEP 3 - Full tests ####
    for start_point_nr in range(NUMBER_OF_START_POINTS):
        #start_point = get_random_point(traversable_points)
        start_point = start_points[start_point_nr]
        print("Start point " + str(start_point_nr) + ": " + str(start_point))

        for algorithm_key, algorithm in ALGORITHMS.items():
            if algorithm["do_experiment"]:
                experimenter = Experimenter(algorithm, print)
                parameters = None
                if "opt_param" in algorithm:
                    parameters = algorithm["opt_param"]

                cpp = algorithm["cpp"](my_print, motion_planner,
                                       coverable_points,
                                       algorithm["experiment_time_limit"],
                                       parameters)

                if "sample_specific_stats" in algorithm:
                    experimenter.perform_sample_cpp(cpp, start_point,
                                                    start_point_nr)
                    algorithm["sample_specific_stats"].append(
                        experimenter.sample_specific_stats)
                else:
                    experimenter.perform_cpp(cpp, start_point, start_point_nr)

                algorithm["experiment_results"].append(experimenter.results)
                ALGORITHMS[algorithm_key] = algorithm
                save_data(ALGORITHMS)
def optimize_model_parameter_validation(x, y, model_name=None, loss_function="accuracy", parameter=None, max_evals=100, n_folds=5, isWrite=True, problem_pattern="classification"):
    """
    hyperopt model turning
    """
    if model_name == None and parameter == None:
        print "you must set parameter or model_name"
        return None
    elif parameter != None:
        param = parameter
    elif model_name != None:
        param = parameter_dictionary[model_name]
    else:
        return None

    validation_indexs = []

    if problem_pattern == "classification":
        for train_index, test_index in cross_validation.StratifiedKFold(y, n_folds=n_folds):
            validation_indexs.append((train_index, test_index))
    else:
        for train_index, test_index in cross_validation.KFold(len(y), n_folds=n_folds):
            validation_indexs.append((train_index, test_index))

    trials = Trials()
    function = lambda param: optimize_model_function(
        param, x, y, validation_indexs, loss_function)
    print param
    print "========================================================================"
    best_param = fmin(function, param,
                      algo=tpe.suggest, max_evals=max_evals, trials=trials)
    print "========================================================================"
    print "write result to csv files"

    # write the csv file
    if isWrite:
        datas = []
        for trial_data in trials.trials:
            print trial_data
            trial_parameter_dictionary = {}
            trial_parameter_dictionary['model'] = model_name
            trial_parameter_dictionary['tid'] = trial_data['misc']['tid']
            for key, value in trial_data['misc']['vals'].items():
                print key, value[0]
                trial_parameter_dictionary[key] = value[0]
            trial_parameter_dictionary['loss'] = trial_data['result']['loss']
            trial_parameter_dictionary[
                'status'] = trial_data['result']['status']
            datas.append(trial_parameter_dictionary)
        filename = str(time.time()) + ".csv"
        dictionary_in_list_convert_to_csv(datas, filename)

    print trials.statuses()
    return best_param

    def model_evaluation(clf, x, y, evaluate_function_name, labeled_type, label_convert_type="normal"):
        if evaluate_function_name == "accuracy":
            y_pred = clf.predict(x)
            score = evaluate_function(
                y, y_pred, evaluate_function_name)
            score = -score
        elif evaluate_function_name == "logloss":
            y_pred = clf.predict(x)
            score = evaluate_function(
                y, y_pred, evaluate_function_name)
            train_score = -score
        elif evaluate_function_name == "mean_squared_error":
            y_pred = clf.predict(x)
            score = evaluate_function(
                y, y_pred, evaluate_function_name)
        elif evaluate_function_name == "gini":
            y_pred = clf.predict(x)
            score = evaluate_function(
                y, y_pred, evaluate_function_name)
            score = -score
            train_score = -train_score
        elif evaluate_function_name == "rmsle":
            y_pred = clf.predict(x)
            score = evaluate_function(
                y, y_pred, evaluate_function_name)
        elif evaluate_function_name == "auc":
            if params['model'] == "XGBREGLOGISTIC":
                y_pred = clf.predict_proba(x_test)
            else:
                y_pred = clf.predict_proba(x_test)[:, 1]

            train_score = evaluate_function(
                y_train, train_y_pred, evaluate_function_name)
            score = evaluate_function(y_test, y_pred, evaluate_function_name)
            score = -score
            train_score = -train_score
        elif evaluate_function_name == "rmspe":
            y_pred = clf.predict(x)
            score = evaluate_function(
                y, y_pred, evaluate_function_name)
            score = score
        return score
Example #6
0
class HyperoptImpl:

    def __init__(self, estimator=None, max_evals=50, cv=5, handle_cv_failure=False, 
                scoring='accuracy', best_score=0.0, max_opt_time=None, max_eval_time=None, 
                pgo:Optional[PGO]=None, show_progressbar=True, args_to_scorer=None,
                verbose=False):
        self.max_evals = max_evals
        if estimator is None:
            self.estimator = LogisticRegression()
        else:
            self.estimator = estimator
        self.search_space = hp.choice('meta_model', [hyperopt_search_space(self.estimator, pgo=pgo)])
        self.scoring = scoring
        self.best_score = best_score
        self.handle_cv_failure = handle_cv_failure
        self.cv = cv
        self._trials = Trials()
        self.max_opt_time = max_opt_time
        self.max_eval_time = max_eval_time
        self.show_progressbar = show_progressbar
        if args_to_scorer is not None:
            self.args_to_scorer = args_to_scorer
        else:
            self.args_to_scorer = {}
        self.verbose = verbose


    def fit(self, X_train, y_train):
        opt_start_time = time.time()
        self.cv = check_cv(self.cv, y = y_train, classifier=True) #TODO: Replace the classifier flag value by using tags?
        def hyperopt_train_test(params, X_train, y_train):
            warnings.filterwarnings("ignore")

            trainable = create_instance_from_hyperopt_search_space(self.estimator, params)
            try:
                cv_score, logloss, execution_time = cross_val_score_track_trials(trainable, X_train, y_train, cv=self.cv, scoring=self.scoring, args_to_scorer=self.args_to_scorer)
                logger.debug("Successful trial of hyperopt with hyperparameters:{}".format(params))
            except BaseException as e:
                #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion
                if self.handle_cv_failure:
                    X_train_part, X_validation, y_train_part, y_validation = train_test_split(X_train, y_train, test_size=0.20)
                    start = time.time()
                    trained = trainable.fit(X_train_part, y_train_part)
                    scorer = check_scoring(trainable, scoring=self.scoring)
                    cv_score  = scorer(trained, X_validation, y_validation, **self.args_to_scorer)
                    execution_time = time.time() - start
                    y_pred_proba = trained.predict_proba(X_validation)
                    try:
                        logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba)
                    except BaseException:
                        logloss = 0
                        logger.debug("Warning, log loss cannot be computed")
                else:
                    logger.debug(e)
                    logger.debug("Error {} with pipeline:{}".format(e, trainable.to_json()))
                    raise e
            return cv_score, logloss, execution_time
            
            
        def proc_train_test(params, X_train, y_train, return_dict):
            return_dict['params'] = copy.deepcopy(params)
            try:
                score, logloss, execution_time = hyperopt_train_test(params, X_train=X_train, y_train=y_train)
                return_dict['loss'] = self.best_score - score
                return_dict['time'] = execution_time
                return_dict['log_loss'] = logloss
                return_dict['status'] = STATUS_OK
            except BaseException as e:
                logger.warning(f"Exception caught in Hyperopt:{type(e)}, {traceback.format_exc()} with hyperparams: {params}, setting status to FAIL")
                return_dict['status'] = STATUS_FAIL
                return_dict['error_msg'] = f"Exception caught in Hyperopt:{type(e)}, {traceback.format_exc()} with hyperparams: {params}"
                if self.verbose:
                    print(return_dict['error_msg'])

        def get_final_trained_estimator(params, X_train, y_train):
            warnings.filterwarnings("ignore")
            trainable = create_instance_from_hyperopt_search_space(self.estimator, params)
            trained = trainable.fit(X_train, y_train)
            return trained

        def f(params):
            current_time = time.time()
            if (self.max_opt_time is not None) and ((current_time - opt_start_time) > self.max_opt_time) :
                # if max optimization time set, and we have crossed it, exit optimization completely
                sys.exit(0)
            if self.max_eval_time:
                # Run hyperopt in a subprocess that can be interupted
                manager = multiprocessing.Manager()
                proc_dict = manager.dict()
                p = multiprocessing.Process(
                    target=proc_train_test,
                    args=(params, X_train, y_train, proc_dict))
                p.start()
                p.join(self.max_eval_time)
                if p.is_alive():
                    p.terminate()
                    p.join()
                    logger.warning(f"Maximum alloted evaluation time exceeded. with hyperparams: {params}, setting status to FAIL")
                    proc_dict['status'] = STATUS_FAIL
                if 'status' not in proc_dict:
                    logger.warning(f"Corrupted results, setting status to FAIL")
                    proc_dict['status'] = STATUS_FAIL
            else:
                proc_dict = {}
                proc_train_test(params, X_train, y_train, proc_dict)
            return proc_dict

        try :
            fmin(f, self.search_space, algo=tpe.suggest, max_evals=self.max_evals, trials=self._trials, rstate=np.random.RandomState(SEED),
            show_progressbar=self.show_progressbar)
        except SystemExit :
            logger.warning('Maximum alloted optimization time exceeded. Optimization exited prematurely')
        except AllTrialsFailed:
            self._best_estimator = None
            if STATUS_OK not in self._trials.statuses():
                raise ValueError('Error from hyperopt, none of the trials succeeded.')

        try :
            best_params = space_eval(self.search_space, self._trials.argmin)
            logger.info(
                'best score: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}'.format(
                    self.best_score - self._trials.average_best_error(), self.max_evals, best_params
                )
            )
            trained = get_final_trained_estimator(best_params, X_train, y_train)
            self._best_estimator = trained
        except BaseException as e :
            logger.warning('Unable to extract the best parameters from optimization, the error: {}'.format(e))
            self._best_estimator = None

        return self

    def predict(self, X_eval):
        import warnings
        warnings.filterwarnings("ignore")
        if self._best_estimator is None:
            raise ValueError("Can not predict as the best estimator is None. Either an attempt to call `predict` "
        "before calling `fit` or all the trials during `fit` failed.")
        trained = self._best_estimator
        try:
            predictions = trained.predict(X_eval)
        except ValueError as e:
            logger.warning("ValueError in predicting using Hyperopt:{}, the error is:{}".format(trained, e))
            predictions = None

        return predictions

    def summary(self):
        """Table summarizing the trial results (ID, loss, time, log_loss, status).

Returns
-------
result : DataFrame"""
        def make_record(trial_dict):
            try:
                loss = trial_dict['result']['loss']
            except BaseException:
                loss = np.nan
            try:
                time = trial_dict['result']['time']
            except BaseException:
                time = '-'
            try:
                log_loss = trial_dict['result']['log_loss']
            except BaseException:
                log_loss = np.nan

            return {
                'name': f'p{trial_dict["tid"]}',
                'tid': trial_dict['tid'],
                'loss': trial_dict['result'].get('loss', float('nan')),
                'time': trial_dict['result'].get('time', float('nan')),
                'log_loss': trial_dict['result'].get('log_loss', float('nan')),
                'status': trial_dict['result']['status']}
        records = [make_record(td) for td in self._trials.trials]
        result = pd.DataFrame.from_records(records, index='name')
        return result

    def get_pipeline(self, pipeline_name=None, astype='lale'):
        """Retrieve one of the trials.

Parameters
----------
pipeline_name : union type, default None

    - string
        Key for table returned by summary(), return a trainable pipeline.

    - None
        When not specified, return the best trained pipeline found.

astype : 'lale' or 'sklearn', default 'lale'
    Type of resulting pipeline.

Returns
-------
result : Trained operator if best, trainable operator otherwise.
"""
        if pipeline_name is None:
            result = getattr(self, '_best_estimator', None)
        else:
            tid = int(pipeline_name[1:])
            params = self._trials.trials[tid]['result']['params']
            result = create_instance_from_hyperopt_search_space(
                self.estimator, params)
        if result is None or astype == 'lale':
            return result
        assert astype == 'sklearn', astype
        return result.export_to_sklearn_pipeline()
Example #7
0
def wikiLearn():
    """
    不是特别懂
    """
    # 1、简单的函数
    from hyperopt import fmin, tpe, hp
    best = fmin(fn=lambda x: x ** 2,
                space=hp.uniform('x', -10, 10),
                algo=tpe.suggest,
                max_evals=100)
    print best
    # 2、使用函数+ok状态
    from hyperopt import fmin, tpe, hp, STATUS_OK
    def objective(x):
        return {'loss': x ** 2, 'status': STATUS_OK }
    best = fmin(objective,
                space=hp.uniform('x', -10, 10),
                algo=tpe.suggest,
                max_evals=100)
    print best
    # 3、使用dict的返回
    import pickle
    import time
    from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
    def objective(x):
        return {
            'loss': x ** 2,
            'status': STATUS_OK,
            # -- store other results like this
            'eval_time': time.time(),
            'other_stuff': {'type': None, 'value': [0, 1, 2]},
            # -- attachments are handled differently
            'attachments': {'time_module': pickle.dumps(time.time)}
        }
    trials = Trials()
    best = fmin(objective,
                space=hp.uniform('x', -10, 10),
                algo=tpe.suggest,
                max_evals=100,
                trials=trials)
    print best
    print trials.trials
    print trials.results
    print trials.losses()
    print trials.statuses()
    # 没明白 attachments 是什么意思
    msg = trials.trial_attachments(trials.trials[5])['time_module']
    time_module = pickle.loads(msg)
    from hyperopt import hp
    space = hp.choice('a',
                      [
                          ('case 1', 1 + hp.lognormal('c1', 0, 1)),
                          ('case 2', hp.uniform('c2', -10, 10))
                      ])
    import hyperopt.pyll.stochastic
    print hyperopt.pyll.stochastic.sample(space)
    # hp.choice(label, options)
    # hp.randint(label, upper)                  # [0,upper]
    # hp.uniform(label, low, high)
    # hp.quniform(label, low, high, q)          # round(uniform(low, high) / q) * q
    # hp.loguniform(label, low, high)
    # hp.qloguniform(label, low, high, q)       # round(exp(uniform(low, high)) / q) * q
    # hp.normal(label, mu, sigma)
    # hp.qnormal(label, mu, sigma, q)           # round(normal(mu, sigma) / q) * q
    # hp.lognormal(label, mu, sigma)
    # hp.qlognormal(label, mu, sigma, q)        # round(exp(normal(mu, sigma)) / q) * q
    # 4、对于sklearn使用
    from hyperopt import hp
    space = hp.choice('classifier_type', [
        {
            'type': 'naive_bayes',
        },
        {
            'type': 'svm',
            'C': hp.lognormal('svm_C', 0, 1),
            'kernel': hp.choice('svm_kernel', [
                {'ktype': 'linear'},
                {'ktype': 'RBF', 'width': hp.lognormal('svm_rbf_width', 0, 1)},
            ]),
        },
        {
            'type': 'dtree',
            'criterion': hp.choice('dtree_criterion', ['gini', 'entropy']),
            'max_depth': hp.choice('dtree_max_depth',
                                   [None, hp.qlognormal('dtree_max_depth_int', 3, 1, 1)]),
            'min_samples_split': hp.qlognormal('dtree_min_samples_split', 2, 1, 1),
        },
    ])
    # 5、还是没有搞懂 scope.define
    import hyperopt.pyll
    from hyperopt.pyll import scope
    @scope.define
    def foo(a, b=0):
        print 'running foo', a, b
        return a + b / 2
    # -- this will print 0, foo is called as usual.
    print foo(0)
    # In describing search spaces you can use `foo` as you
    # would in normal Python. These two calls will not actually call foo,
    # they just record that foo should be called to evaluate the graph.
    space1 = scope.foo(hp.uniform('a', 0, 10))
    space2 = scope.foo(hp.uniform('a', 0, 10), hp.normal('b', 0, 1))
    # -- this will print an pyll.Apply node
    print space1
    # -- this will draw a sample by running foo()
    print hyperopt.pyll.stochastic.sample(space1)
Example #8
0
def best_net(
        # output
        best_net_filename,
        best_preproc_filename,
        best_params_filename,
        best_stage_results_filename,
        # search process parameters:
        overfit,
        max_evals,
        trials_filename,
        overwrite_trials,
        temp_net_filename,
        temp_preproc_filename,
        temp_stage_results_filename,
        dataset_dir,
        dataset_name=None,
        dataset_size=None,
        dataset_static=False,
        dataset_inmem=False,
        shuffle_train=True,
        seed=42,
        image_data_format=K.image_data_format(),
        cache_datasets=False,
        preproc='default',
        min_epochs=3,
        max_epochs=50,
        retrain=False):
    # from the search space we get:
    # no_xval, no_test,
    # features, img_side, resolution_degrees, grayscale, angle_encoding, force_xy, bounding, dropout,
    # batch_size, optimizer, lr, optimizer_kwargs

    if os.path.isfile(trials_filename) and not overwrite_trials:
        with open(trials_filename, 'rb') as trials_file:
            trials = pickle.load(trials_file)
    else:
        space = get_space(overfit)

        trials = Trials()

        def objective(args):
            args = fix_args(**args)

            safe_save(trials, trials_filename)

            print('experiment', objective.iter, '/', max_evals, '; best err:',
                  objective.min)
            print('training args:', args)

            try:
                resolution_degrees = None
                err = train(
                    temp_net_filename, temp_preproc_filename,
                    temp_stage_results_filename, dataset_dir, dataset_name,
                    dataset_size, dataset_static, dataset_inmem, shuffle_train,
                    seed, image_data_format, args['no_xval'], args['no_test'],
                    cache_datasets, args['features'], args['img_side'],
                    resolution_degrees, args['grayscale'], preproc,
                    args['angle_encoding'], args['force_xy'], args['bounding'],
                    args['n_classes'], args['convs_per_block'],
                    args['skip_layer_connections'], args['dropout'],
                    args['l2_penalty'], args['batch_size'], args['optimizer'],
                    args['lr'], args['optimizer_kwargs'], min_epochs,
                    max_epochs, args['stages'], retrain)
            except:
                print('model training failed!')
                traceback.print_exc()
                return {'status': STATUS_FAIL}
            else:
                if err < objective.min:
                    shutil.copyfile(temp_net_filename, best_net_filename)
                    shutil.copyfile(temp_preproc_filename,
                                    best_preproc_filename)
                    shutil.copyfile(temp_stage_results_filename,
                                    best_stage_results_filename)
                    all_args = args.copy()
                    all_args.update({
                        'dataset_dir': dataset_dir,
                        'dataset_name': dataset_name,
                        'dataset_size': dataset_size,
                        'dataset_static': dataset_static,
                        'dataset_inmem': dataset_inmem,
                        'shuffle_train': shuffle_train,
                        'image_data_format': image_data_format,
                        'cache_datasets': cache_datasets,
                        'resolution_degrees': resolution_degrees,
                        'preproc': preproc,
                        'min_epochs': min_epochs,
                        'max_epochs': max_epochs,
                        'retrain': retrain
                    })

                    with open(best_params_filename, 'wb') as best_params_file:
                        pickle.dump(all_args, best_params_file)
                    print('NEW BEST FOUND')
                objective.min = min(err, objective.min)

                # return {'loss': train_loss_at_best_xval, 'true_loss': -best_corr_xval, 'status': STATUS_OK,
                #                 'model': model}
                return {'loss': err, 'status': STATUS_OK, 'args': args}
            finally:
                objective.iter += 1
                gc.collect()  # try to help free some memory...

        objective.min = numpy.inf
        objective.iter = 1

        # by default, tpe.suggest runs 20 random configurations in the beginning to get a rough map of the space
        # to override this behaviour, use this:
        # algo = lambda *args, **kwargs: tpe.suggest(*args, n_startup_jobs=5,**kwargs)

        algo = tpe.suggest

        fmin(objective,
             space,
             algo,
             max_evals,
             trials,
             rstate=numpy.random.RandomState(42))

        safe_save(trials, trials_filename)

    ok_id = []
    for status_id, status in enumerate(trials.statuses()):
        if status == STATUS_OK:
            ok_id.append(status_id)

    losses = trials.losses()
    losses = [numpy.inf if loss is None else loss for loss in losses]
    min_loss = min(losses)
    min_loss_id_losses = losses.index(min_loss)
    min_loss_id = ok_id[min_loss_id_losses]
    best_args = trials.results[min_loss_id]

    return best_args
# why does hp.randint('x', 10) always return same number?
# works without space=____
trials = Trials()
best = fmin(objective,
            space=space,
            # space=hp.quniform('x', -10, 10, .00001),
            algo=tpe.suggest,
            max_evals=10,
            trials=trials)

# why is this red? where is the syntax error?
print best
print hyperopt.space_eval(space, best)
# always prints all the floats, regardless of status
print trials.losses()
print trials.statuses()
print trials.results
# here it's not red..
print best

# msg = trials.trial_attachments(trials.trials[5])['time_module']
# time_module = pickle.loads(msg)

# print time_module
# print msg

space = hp.choice('a',
        [
            ('case 1', 1 + hp.randint('c1', 10)),
            ('case 2', hp.uniform('c2', -10, 10))
        ]),