def define_search_space(objective, starting_params): if objective == "rmse": prediction = starting_params["collaborative_params"][ "prediction_network_params"] rmse_space = { 'lr': hp.qlognormal("lr", np.log(prediction["lr"]), 0.5 * prediction["lr"], 0.05 * prediction["lr"]), 'epochs': hp.quniform('epochs', prediction["epochs"] - 20, prediction["epochs"] + 20, 5), 'kernel_l2': hp.choice('kernel_l2', [ 0.0, hp.qloguniform('kernel_l2_choice', np.log(1e-9), np.log(1e-5), 5e-9) ]), 'batch_size': hp.qloguniform('batch_size', np.log(512), np.log(1023), 512), # 'batch_size': hp.choice('batch_size', [512]), 'conv_depth': hp.quniform('conv_depth', 1, prediction["conv_depth"] + 1, 1), 'gaussian_noise': hp.qlognormal('gaussian_noise', np.log(prediction["gaussian_noise"]), 0.5 * prediction["gaussian_noise"], 0.005), 'network_depth': hp.quniform('network_depth', 1, prediction['network_depth'] + 1, 1), 'n_dims': hp.quniform('n_dims', starting_params["n_dims"] - 32, starting_params["n_dims"] + 64, 16), } return rmse_space if objective == "ndcg": embedding = starting_params["collaborative_params"]["user_item_params"] ndcg_space = { 'gcn_lr': hp.qlognormal("gcn_lr", np.log(embedding["gcn_lr"]), 0.5 * embedding["gcn_lr"], 0.05 * embedding["gcn_lr"]), 'gcn_epochs': hp.quniform('gcn_epochs', embedding["gcn_epochs"], embedding["gcn_epochs"] + 20, 5), 'gaussian_noise': hp.qlognormal('gaussian_noise', np.log(embedding["gaussian_noise"]), 1.0 * embedding["gaussian_noise"], 0.005), 'margin': hp.quniform('margin', 0.8, 1.8, 0.2), 'n_dims': hp.quniform('n_dims', starting_params["n_dims"], starting_params["n_dims"] + 96, 16), } return ndcg_space
def main(): client = Client() print 'n. clients: ', len(client) digits = load_digits() X = MinMaxScaler().fit_transform(digits.data) y = digits.target pre_processing = hp.choice('preproc_algo', [ scope.PCA( n_components=1 + hp.qlognormal( 'pca_n_comp', np.log(10), np.log(10), 1), whiten=hp.choice( 'pca_whiten', [False, True])), scope.GMM( n_components=1 + hp.qlognormal( 'gmm_n_comp', np.log(100), np.log(10), 1), covariance_type=hp.choice( 'gmm_covtype', ['spherical', 'tied', 'diag', 'full'])), ]) classifier = hp.choice('classifier', [ scope.DecisionTreeClassifier( criterion=hp.choice('dtree_criterion', ['gini', 'entropy']), max_features=hp.uniform('dtree_max_features', 0, 1), max_depth=hp.quniform('dtree_max_depth', 1, 25, 1)), scope.SVC( C=hp.lognormal('svc_rbf_C', 0, 3), kernel='rbf', gamma=hp.lognormal('svc_rbf_gamma', 0, 2), tol=hp.lognormal('svc_rbf_tol', np.log(1e-3), 1)), ]) sklearn_space = {'pre_processing': pre_processing, 'classifier': classifier} digits_cv_split_filenames = mmap_utils.persist_cv_splits( X, y, name='digits_10', n_cv_iter=10) mmap_utils.warm_mmap_on_cv_splits(client, digits_cv_split_filenames) trials = hyperselect.IPythonTrials(client) trials.fmin( partial(compute_evaluation, cv_split_filename=digits_cv_split_filenames[0], ), sklearn_space, algo=hyperopt.tpe.suggest, max_evals=30, verbose=1, ) trials.wait() print trials.best_trial
class DecisionTreeModel(TreeBasedModel): @staticmethod def build_estimator(args, train_data=None): return DecisionTreeRegressor(random_state=RANDOM_STATE, presort=True, **args) hp_space = { "criterion": hp.choice("criterion", ["mse", "friedman_mse", "mae"]), "max_depth": hp.pchoice( "max_depth_enabled", [ (0.7, None), (0.3, 1 + scope.int(hp.qlognormal("max_depth", np.log(30), 0.5, 3))), ], ), "splitter": hp.choice("splitter_str", ["best", "random"]), "max_features": hp.pchoice( "max_features_str", [ (0.2, "sqrt"), # most common choice. (0.1, "log2"), # less common choice. (0.1, None), # all features, less common choice. (0.6, hp.uniform("max_features_str_frac", 0.0, 1.0)), ], ), "min_samples_split": scope.int(hp.quniform("min_samples_split_str", 2, 10, 1)), "min_samples_leaf": hp.choice( "min_samples_leaf_enabled", [ 1, scope.int( hp.qloguniform("min_samples_leaf", np.log(1.5), np.log(50.5), 1) ), ], ), }
def parse_search_space(self, learner_space): ''' search space is dictionary {'n_estimators': ('uniform', 1, 1000, 'discrete')} ''' search_space = dict() for k, v in learner_space.iteritems(): if v[2] == 'samples': v = (v[0], v[1], min(100, self.X.shape[0]/len(self.kf)-1), v[3]) if v[3] == 'discrete': search_space[k] = hp.quniform(k, v[1], v[2], 1) elif v[0] == 'uniform': search_space[k] = hp.uniform(k, v[1], v[2]) elif v[0] == 'loguniform': search_space[k] = hp.loguniform(k, v[1], v[2]) elif v[0] == 'normal': search_space[k] = hp.normal(k, v[1], v[2]) elif v[0] == 'lognormal': search_space[k] = hp.lognormal(k, v[1], v[2]) elif v[0] == 'quniform': search_space[k] = hp.quniform(k, v[1], v[2], v[3]) elif v[0] == 'qloguniform': search_space[k] = hp.qloguniform(k, v[1], v[2], v[3]) elif v[0] == 'qnormal': search_space[k] = hp.qnormal(k, v[1], v[2], v[3]) elif v[0] == 'qlognormal': search_space[k] = hp.qlognormal(k, v[1], v[2], v[3]) return search_space
def build_dist_func_instance(hp_name, func, args, hp_size=None): ''' args: hp_name: the name of the hyperparameter associated with this func func: name of hyperopt dist func args: list of float values processing: instantiate the named dist func with specified args return: instance of hyperopt dist func ''' if func == "choice": dist = hp.choice(hp_name, args) elif func == "randint": max_value = 65535 if len(args) == 0 else args[0] # specify "size=None" to workaround hyperopt bug if hp_size: # let size default to () (error if we try to set it explictly) dist = hp.randint(hp_name, max_value) else: dist = hp.randint(hp_name, max_value, size=None) elif func == "uniform": arg_check(func, args, count=2) dist = hp.uniform(hp_name, *args) elif func == "normal": arg_check(func, args, count=2) dist = hp.normal(hp_name, *args) elif func == "loguniform": arg_check(func, args, count=2) dist = hp.loguniform(hp_name, *args) elif func == "lognormal": arg_check(func, args, count=2) dist = hp.lognormal(hp_name, *args) elif func == "quniform": arg_check(func, args, count=3) dist = hp.quniform(hp_name, *args) elif func == "qnormal": arg_check(func, args, count=3) dist = hp.qnormal(hp_name, *args) elif func == "qloguniform": arg_check(func, args, count=3) dist = hp.qloguniform(hp_name, *args) elif func == "qlognormal": arg_check(func, args, count=3) dist = hp.qlognormal(hp_name, *args) return dist
def test_read_qlognormal(self): # 0 float # 1 hyperopt_param # 2 Literal{qlognormal} # 3 qlognormal # 4 Literal{0.0} # 5 Literal{1.0} # 6 Literal{0.5} qlognormal = hp.qlognormal("qlognormal", 0.0, 1.0, 0.5).inputs()[0].inputs()[1] ret = self.pyll_reader.read_qlognormal(qlognormal, "qlognormal") expected = configuration_space.NormalFloatHyperparameter( "qlognormal", 0.0, 1.0, q=0.5, base=np.e) self.assertEqual(expected, ret) qlognormal = hp.qlognormal("qlognormal", 1.0, 5.0, 1.0).inputs()[0].inputs()[1] ret = self.pyll_reader.read_qlognormal(qlognormal, "qlognormal") expected = configuration_space.NormalIntegerHyperparameter( "qlognormal", 1.0, 5.0, base=np.e) self.assertEqual(expected, ret)
def log_normal_from_bounds(label, left_bound, right_bound, quantization=None): log_left_bound = np.log(left_bound) log_right_bound = np.log(right_bound) log_mean = (log_left_bound + log_right_bound) / 2.0 log_sigma = (log_right_bound - log_left_bound) / 4.0 mean = np.exp(log_mean) hp_variable = (hp.lognormal(label, log_mean, log_sigma) if quantization is None else hp.qlognormal(label, log_mean, log_sigma, quantization)) dist = stats.lognorm(log_sigma, scale=mean) return Parameter(label, mean, hp_variable, dist.logpdf, dist.cdf)
def define_search_space(objective, starting_params): prediction = starting_params["collaborative_params"][ "prediction_network_params"] space = { 'lr': hp.qlognormal("lr", np.log(prediction["lr"]), 0.5 * prediction["lr"], 0.05 * prediction["lr"]), 'epochs': hp.quniform('epochs', prediction["epochs"] - 10, prediction["epochs"] + 20, 5), 'kernel_l2': hp.choice('kernel_l2', [ 0.0, hp.qloguniform('kernel_l2_choice', np.log(1e-9), np.log(1e-5), 5e-9) ]), 'batch_size': hp.qloguniform('batch_size', np.log(1024), np.log(4096), 1024), 'conv_depth': hp.quniform('conv_depth', 1, prediction["conv_depth"] + 2, 1), 'gcn_layers': hp.quniform('gcn_layers', 1, prediction["gcn_layers"] + 1, 1), 'ncf_layers': hp.quniform('ncf_layers', 1, prediction["ncf_layers"] + 1, 1), 'ps_proportion': hp.choice('ps_proportion', [ 0.0, hp.qloguniform('ps_proportion_choice', np.log(0.1), np.log(prediction["ps_proportion"] + 1.0), 0.05) ]), 'ns_proportion': hp.quniform('ns_proportion', 0.0, prediction["ns_proportion"] + 2.0, 0.1), 'nsh': hp.quniform('nsh', 0.0, prediction["nsh"] + 2.0, 0.1), # 'gaussian_noise': hp.qlognormal('gaussian_noise', np.log(prediction["gaussian_noise"]), # 0.5 * prediction["gaussian_noise"], 0.005), 'gaussian_noise': hp.choice('gaussian_noise', [ 0.0, hp.qloguniform('gaussian_noise_choice', np.log(1e-3), np.log(0.5), 1e-3) ]), 'margin': hp.choice('margin', [ 0.0, hp.qloguniform('margin_choice', np.log(1e-4), np.log(0.05), 5e-4) ]), 'n_dims': hp.quniform('n_dims', starting_params["n_dims"] - 16, starting_params["n_dims"] + 64, 16), } return space
def log_normal_from_bounds(label, left_bound, right_bound, quantization=None): log_left_bound = np.log(left_bound) log_right_bound = np.log(right_bound) log_mean = (log_left_bound + log_right_bound) / 2.0 log_sigma = (log_right_bound - log_left_bound) / 4.0 mean = np.exp(log_mean) hp_variable = ( hp.lognormal(label, log_mean, log_sigma) if quantization is None else hp.qlognormal(label, log_mean, log_sigma, quantization) ) dist = stats.lognorm(log_sigma, scale=mean) return Parameter(label, mean, hp_variable, dist.logpdf, dist.cdf)
def many_dists(): a = hp.choice('a', [0, 1, 2]) b = hp.randint('b', 10) c = hp.uniform('c', 4, 7) d = hp.loguniform('d', -2, 0) e = hp.quniform('e', 0, 10, 3) f = hp.qloguniform('f', 0, 3, 2) g = hp.normal('g', 4, 7) h = hp.lognormal('h', -2, 2) i = hp.qnormal('i', 0, 10, 2) j = hp.qlognormal('j', 0, 2, 1) k = hp.pchoice('k', [(.1, 0), (.9, 1)]) z = a + b + c + d + e + f + g + h + i + j + k return {'loss': scope.float(scope.log(1e-12 + z ** 2)), 'status': base.STATUS_OK}
def many_dists(): a = hp.choice("a", [0, 1, 2]) b = hp.randint("b", 10) bb = hp.randint("bb", 12, 25) c = hp.uniform("c", 4, 7) d = hp.loguniform("d", -2, 0) e = hp.quniform("e", 0, 10, 3) f = hp.qloguniform("f", 0, 3, 2) g = hp.normal("g", 4, 7) h = hp.lognormal("h", -2, 2) i = hp.qnormal("i", 0, 10, 2) j = hp.qlognormal("j", 0, 2, 1) k = hp.pchoice("k", [(0.1, 0), (0.9, 1)]) z = a + b + bb + c + d + e + f + g + h + i + j + k return {"loss": scope.float(scope.log(1e-12 + z ** 2)), "status": base.STATUS_OK}
def colkmeans(name, n_clusters=None, init=None, n_init=None, max_iter=None, tol=None, precompute_distances=True, verbose=0, random_state=None, copy_x=True, n_jobs=1): rval = scope.sklearn_ColumnKMeans( n_clusters=scope.int( hp.qloguniform( name + '.n_clusters', low=np.log(1.51), high=np.log(19.5), q=1.0)) if n_clusters is None else n_clusters, init=hp.choice( name + '.init', ['k-means++', 'random'], ) if init is None else init, n_init=hp.choice( name + '.n_init', [1, 2, 10, 20], ) if n_init is None else n_init, max_iter=scope.int( hp.qlognormal( name + '.max_iter', np.log(300), np.log(10), q=1, )) if max_iter is None else max_iter, tol=hp.lognormal( name + '.tol', np.log(0.0001), np.log(10), ) if tol is None else tol, precompute_distances=precompute_distances, verbose=verbose, random_state=random_state, copy_x=copy_x, n_jobs=n_jobs, ) return rval
class DecisionTreeModel(TreeBasedModel): @staticmethod def build_estimator(args, train_data=None): return DecisionTreeClassifier(random_state=RANDOM_STATE, presort=True, **args) hp_space = { 'max_depth': hp.pchoice( 'max_depth_enabled', [(0.7, None), (0.3, 1 + scope.int(hp.qlognormal('max_depth', np.log(30), 0.5, 3)))]), 'splitter': hp.choice('splitter_str', ['best', 'random']), 'max_features': hp.pchoice( 'max_features_str', [ (0.2, 'sqrt'), # most common choice. (0.1, 'log2'), # less common choice. (0.1, None), # all features, less common choice. (0.6, hp.uniform('max_features_str_frac', 0., 1.)) ]), 'min_samples_split': scope.int(hp.quniform('min_samples_split_str', 2, 10, 1)), 'min_samples_leaf': hp.choice('min_samples_leaf_enabled', [ 1, scope.int( hp.qloguniform('min_samples_leaf', np.log(1.5), np.log(50.5), 1)) ]), 'class_weight': hp.pchoice('class_weight', [ (0.5, None), (0.5, 'balanced'), ]) }
class RandomForestsModel(TreeBasedModel): @staticmethod def build_estimator(args, train_data=None): return RandomForestRegressor(random_state=RANDOM_STATE, n_jobs=-1, **args) hp_space = { "max_depth": hp.pchoice( "max_depth_enabled", [ (0.7, None), (0.3, 1 + scope.int(hp.qlognormal("max_depth", np.log(30), 0.5, 3))), ], ), "n_estimators": scope.int(hp.qloguniform("n_estimators", np.log(9.5), np.log(300), 1)), "min_samples_leaf": hp.choice( "min_samples_leaf_enabled", [ 1, scope.int( hp.qloguniform("min_samples_leaf", np.log(1.5), np.log(50.5), 1)), ], ), "max_features": hp.pchoice( "max_features_str", [ (0.1, "sqrt"), # most common choice. (0.2, "log2"), # less common choice. (0.1, None), # all features, less common choice. (0.6, hp.uniform("max_features_str_frac", 0.0, 1.0)), ], ), }
class RandomForestsModel(TreeBasedModel): @staticmethod def build_estimator(args, train_data=None): return RandomForestClassifier(random_state=RANDOM_STATE, n_jobs=-1, **args) hp_space = { 'max_depth': hp.pchoice( 'max_depth_enabled', [(0.7, None), (0.3, 1 + scope.int(hp.qlognormal('max_depth', np.log(30), 0.5, 3)))]), 'n_estimators': scope.int(hp.qloguniform('n_estimators', np.log(9.5), np.log(300), 1)), 'min_samples_leaf': hp.choice('min_samples_leaf_enabled', [ 1, scope.int( hp.qloguniform('min_samples_leaf', np.log(1.5), np.log(50.5), 1)) ]), 'max_features': hp.pchoice( 'max_features_str', [ (0.2, 'sqrt'), # most common choice. (0.1, 'log2'), # less common choice. (0.1, None), # all features, less common choice. (0.6, hp.uniform('max_features_str_frac', 0., 1.)) ]), 'class_weight': hp.pchoice('class_weight', [(0.5, None), (0.3, 'balanced'), (0.2, 'balanced_subsample')]) }
print(f"Broken", str(args)) return { 'status': 'ok', # or 'fail' if nan loss 'loss': np.inf } loss = np.average(trials) print(f"Finished with {loss}", str(args)) return { 'status': 'ok', # or 'fail' if nan loss 'loss': loss } space = { 'niterations': hp.qlognormal('niterations', np.log(10), 1.0, 1), 'npop': hp.qlognormal('npop', np.log(100), 1.0, 1), 'alpha': hp.lognormal('alpha', np.log(10.0), 1.0), 'fractionReplacedHof': hp.lognormal('fractionReplacedHof', np.log(0.1), 1.0), 'fractionReplaced': hp.lognormal('fractionReplaced', np.log(0.1), 1.0), 'perturbationFactor': hp.lognormal('perturbationFactor', np.log(1.0), 1.0), 'weightMutateConstant': hp.lognormal('weightMutateConstant', np.log(4.0), 1.0), 'weightMutateOperator': hp.lognormal('weightMutateOperator', np.log(0.5), 1.0), 'weightAddNode': hp.lognormal('weightAddNode', np.log(0.5), 1.0), 'weightInsertNode': hp.lognormal('weightInsertNode', np.log(0.5), 1.0), 'weightDeleteNode': hp.lognormal('weightDeleteNode', np.log(0.5), 1.0), 'weightSimplify': hp.lognormal('weightSimplify', np.log(0.05), 1.0), 'weightRandomize': hp.lognormal('weightRandomize', np.log(0.25), 1.0), } ################################################################################
def createHyperoptSpace(self): name = self.root if 'anyOf' in self.config or 'oneOf' in self.config: data = [] if 'anyOf' in self.config: data = self.config['anyOf'] else: data = self.config['oneOf'] choices = hp.choice(name, [ Hyperparameter(param, name + "." + str(index)).createHyperoptSpace() for index, param in enumerate(data) ]) return choices elif self.config['type'] == 'object': space = {} for key in self.config['properties'].keys(): config = self.config['properties'][key] space[key] = Hyperparameter(config, name + "." + key).createHyperoptSpace() return space elif self.config['type'] == 'number': mode = self.config.get('mode', 'uniform') scaling = self.config.get('scaling', 'linear') if mode == 'uniform': min = self.config.get('min', 0) max = self.config.get('max', 1) rounding = self.config.get('rounding', None) if scaling == 'linear': if rounding is not None: return hp.quniform(name, min, max, rounding) else: return hp.uniform(name, min, max) elif scaling == 'logarithmic': if rounding is not None: return hp.qloguniform(name, math.log(min), math.log(max), rounding) else: return hp.loguniform(name, math.log(min), math.log(max)) if mode == 'normal': mean = self.config.get('mean', 0) stddev = self.config.get('stddev', 1) rounding = self.config.get('rounding', None) if scaling == 'linear': if rounding is not None: return hp.qnormal(name, mean, stddev, rounding) else: return hp.normal(name, mean, stddev) elif scaling == 'logarithmic': if rounding is not None: return hp.qlognormal(name, math.log(mean), math.log(stddev), rounding) else: return hp.lognormal(name, math.log(mean), math.log(stddev))
def get_space(Utype=True, UNBktype=helper_naive_type(), Ualpha=hp.lognormal('alpha_', 0, 1), Ufit_prior=hp.choice('bool_', [True, False]), Ubinarize=hp.choice('binarize_', [.0, hp.lognormal('threshold_', 0, 1)]), UC=hp.lognormal('svm_C', 0, 2), Uwidth=hp.lognormal('svm_rbf_width', 0, 1), USVMktype=helper_svm(), Ucriterion=hp.choice('dtree_criterion', ['entropy', 'gini']), Umax_depth=hp.choice('dtree_max_depth', [None, 1 + hp.qlognormal('dtree_max_depth_int', 3, 1, 1)]), Umin_samples_split=1 + hp.qlognormal('dtree_min_samples_split', 2, 1, 1), Uweights=hp.choice('weighting', ['uniform', 'distance']), Ualgo=hp.choice('algos', ['auto', 'brute', 'ball_tree', 'kd_tree']), Uleaf_sz=20+hp.randint('size', 20), Up=hp.choice('distance', [1, 2]), Un_neighbors=hp.quniform('num', 3, 19, 1), Uradius=hp.uniform('rad', 0, 2), UNktype=helper_neighbors(), Uout_label=None, Upreprocess=True, Unorm=choice(['l1', 'l2']), Unaxis=1, Uw_mean=choice([True, False]), Uw_std=True, Usaxis=0, Ufeature_range=(0, 1), Un_components=None, Uwhiten=hp.choice('whiten_chose', [True, False])): give_me_bayes = get_bayes( UNBktype, Ualpha, Ufit_prior, Ubinarize, Upreprocess, Unorm, Unaxis, Uw_mean, Uw_std, Usaxis, Ufeature_range, Un_components, Uwhiten) give_me_svm = get_svm( UC, USVMktype, Uwidth, Upreprocess, Unorm, Unaxis, Uw_mean, Uw_std, Usaxis, Ufeature_range, Un_components, Uwhiten) give_me_dtree = get_dtree( Ucriterion, Umax_depth, Umin_samples_split, Upreprocess, Unorm, Unaxis, Uw_mean, Uw_std, Usaxis, Ufeature_range, Un_components, Uwhiten) give_me_neighbors = get_neighbors( UNktype, Uweights, Ualgo, Uleaf_sz, Up, Un_neighbors, Uradius, Uout_label, Upreprocess, Unorm, Unaxis, Uw_mean, Uw_std, Usaxis, Ufeature_range, Un_components, Uwhiten) if Utype == 'naive_bayes': res_space = give_me_bayes elif Utype == 'svm': res_space = give_me_svm elif Utype == 'dtree': res_space = give_me_dtree elif Utype == 'neighbors': res_space = give_me_neighbors else: return hp.choice('quick_fix', [give_me_bayes, give_me_svm, give_me_dtree, give_me_neighbors]) return hp.choice('quick_fix', [res_space])
'var_losses': losses, 'time': delta_t} trials = Trials() best = fmin(objective, space=parameter_space, algo=tpe.suggest, max_evals=max_evals, trials=trials) return best, trials if __name__ == "__main__": farneback_space = { 'pyr_scale': hp.uniform('pyr_scale', 0.1, 0.9), 'levels': hp.qloguniform('levels', 0, np.log(11), q=1), 'winsize': hp.qlognormal('winsize', np.log(100), 1/6*np.log(100), 1), 'iterations': hp.quniform('iterations', 2, 6, 1), 'poly_n': hp.quniform('poly_n', 2, 14, 1), 'poly_sigma': hp.uniform('poly_sigma', 0.1, 2.0), 'scale': hp.qloguniform('scale', np.log(0.1), np.log(1e7), 1), } def flow_calculator(args): return FarnebackFlow(**args) snt_steps = {(0*hour, 2*hour): hour, (0*hour, 4*hour): 2*hour, (0*hour, 6*hour): 3*hour}
print(count) print(cfg) print(loss) print() return loss if __name__ == '__main__': space = { # Actual batch_size == batch_size * num_concepts #'batch_size': hp.qlognormal('batch_size', 2.0, 0.3, 1),#4, #'epochs': hp.qlognormal('epochs', 8.3, 0.3, 100),#4000, # How often to anneal temperature # More like a traditional epoch due to small dataset size #'superepoch': hp.qlognormal('superepoch', 5.3, 0.2, 10),#200, 'e_dense_size': hp.qlognormal('e_dense_size', 2.5, 0.4, 1), #20, 'd_dense_size': hp.qlognormal('d_dense_size', 3., 0.5, 1), #20, #'input_dim': 8, #'num_concepts': 7, #'sentence_len': 7, #'vocab_size': 2, 'temp_init': hp.lognormal('temp_init', 1.2, 0.4), #4, 'temp_decay': hp.uniform('temp_decay', 0.8, 1), #0.9, #'train_st': hp.choice('train_st', [ # ('st_false', 0), # ('st_true', 1), #]), #'test_prop': 0.1, 'dropout_rate': hp.uniform('dropout_rate', 0, 0.4), #0.3, #'verbose': True,
{ 'ktype': 'linear' }, { 'ktype': 'RBF', 'width': hp.lognormal('rbf_width', 0, 1) }, ]), } params_DT = { 'criterion': hp.choice('criterion', ['gini', 'entropy']), 'max_depth': hp.choice('max_depth', [None, hp.qlognormal('max_depth_int', 3, 1, 1)]), 'min_samples_split': uniform_float("min_samples_split", 0, 1), #"max_features": np.random.randint(1, len(X_train.columns),20), #"min_samples_leaf": [2,3,4,5,6], } params_RF = { "n_estimators": uniform_int("n_estimators", 100, 1000), "max_depth": uniform_int("max_depth", 3, 15), "max_features": uniform_float("max_features", 0, 1), "criterion": hp.choice("criterion", ["gini", "entropy"]) } ##################################### TPE (Hyperopt library)
def main(): args = argParser() datafile = args.datafile resultprefix = 'tmp/' + args.datafile.rpartition('/')[2].rpartition('.')[0] if args.resultprefix: resultprefix = args.resultprefix # mod: include clustering method in clustering filename. cluster_file = resultprefix + '.cluster' subsequence_file = resultprefix + '.subsequence' graph_file = resultprefix + '.pdf' rstate = None seed_desc = "UNSET" if args.seed: rstate = np.random.RandomState(args.seed) seed_desc = str(args.seed) plot = args.plot print('PARSER PARAMETERS: ') print('--------------------') print('\tinput file: %s' % datafile) print('\tresult prefix: %s' % resultprefix) print('\tgraph_file: %s' % graph_file) print('\tsaved cluster results: %s' % os.path.isfile(cluster_file)) print('\tsaved subsequence results: %s' % os.path.isfile(subsequence_file)) print('\tseed: %s' % seed_desc) print('\tparallel: %s' % ("Yes" if PARALLEL else "No")) print('\tplot: %s' % ("Yes" if plot else "No")) print("--------------------") # run clustering new_cluster = False if os.path.isfile(cluster_file): print("Loading previous clustering...") cluster_results = pickle.load(open(cluster_file, 'rb')) long_keys = ["original_pts", "clustered_pts"] printable_results = {k : v for k, v in cluster_results.items() \ if k not in long_keys} print("Loaded clustering with parameters: " + str(printable_results)) else: print("Generating clustering...") # get data input_data = lib.parsing.parseSwitchTrace(datafile) np_input_data = np.array(input_data) # clustering X = np_input_data Y = lib.clustering.runPipeline(bGmmConf, X) cluster_results = {} cluster_results['original_pts'] = input_data cluster_results['clustered_pts'] = pickle.dumps(Y) if not os.path.exists(os.path.dirname(cluster_file)): try: os.makedirs(os.path.dirname(cluster_file)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: print("problem here: ", cluster_file, " ", os.path.dirname(cluster_file)) raise pickle.dump(cluster_results, open(cluster_file, 'wb')) new_cluster = True # run subsequencing if not new_cluster and os.path.isfile(subsequence_file): print("Loading previous subsequences...") subsequences = pickle.load(open(subsequence_file, 'rb')) long_keys = ["merged_freq", "coverage_sum"] printable_results = {k : v for k, v in subsequences.items() \ if k not in long_keys} print("Loaded subsequences with parameters: " + str(printable_results)) else: print("Generating subsequences...") space = { 'min_frequency_thresh': hp.qlognormal('min_frequency_thresh', 4, 0.6, 1), 'clustered_pts': cluster_results['clustered_pts'] } if PARALLEL: trials = MongoTrials('mongo://localhost:45555/db/jobs', exp_key='tpprof1') else: trials = Trials() best = fmin(fn=subsequence_objective, space=space, algo=tpe.suggest, max_evals=SUBSEQUENCE_EVALS, trials=trials, rstate=rstate) best_trial = trials.trials[np.argmin( [r['loss'] for r in trials.results])] subsequence_freq = pickle.loads( trials.trial_attachments(best_trial)['subsequence_freq']) subsequence_coverage = pickle.loads( trials.trial_attachments(best_trial)['subsequence_coverage']) merged_freq, coverage_sum = \ lib.subsequencing.merge_stable(subsequence_freq, subsequence_coverage) best['merged_freq'] = merged_freq best['coverage_sum'] = coverage_sum subsequences = best pickle.dump(subsequences, open(subsequence_file, 'wb')) if plot: print("Drawing profile...") lib.drawing.plot(cluster_results['original_pts'], pickle.loads(cluster_results['clustered_pts']), subsequences['merged_freq'], subsequences['coverage_sum'], graph_file, plot) else: print('Drawing disabled')
{ 'type': 'knn', }, # { # 'type': 'svm', # 'C': hp.lognormal('svm_C', 0, 1), # 'kernel': hp.choice('svm_kernel', [ # {'ktype': 'linear'}, # {'ktype': 'RBF', 'width': hp.lognormal('svm_rbf_width', 0, 1)}, # ]), # }, { 'type': 'randomforest', 'criterion': hp.choice('dtree_criterion', ['gini', 'entropy']), 'max_depth': hp.choice('dtree_max_depth', [None, hp.qlognormal('dtree_max_depth_int', 3, 1, 1)]), 'min_samples_split': hp.qlognormal('dtree_min_samples_split', 2, 1, 1), }, ])} trials = Trials() def objective(p): if p['classifier_type']['type'] == 'knn': clf_x = KNeighborsRegressor() clf_y = KNeighborsRegressor() elif p['classifier_type']['type'] == 'randomforest': clf_x = RandomForestRegressor(max_depth=p['classifier_type']['max_depth'], min_samples_split=p['classifier_type']['min_samples_split']) clf_y = RandomForestRegressor(max_depth=p['classifier_type']['max_depth'],
def wikiLearn(): """ 不是特别懂 """ # 1、简单的函数 from hyperopt import fmin, tpe, hp best = fmin(fn=lambda x: x ** 2, space=hp.uniform('x', -10, 10), algo=tpe.suggest, max_evals=100) print best # 2、使用函数+ok状态 from hyperopt import fmin, tpe, hp, STATUS_OK def objective(x): return {'loss': x ** 2, 'status': STATUS_OK } best = fmin(objective, space=hp.uniform('x', -10, 10), algo=tpe.suggest, max_evals=100) print best # 3、使用dict的返回 import pickle import time from hyperopt import fmin, tpe, hp, STATUS_OK, Trials def objective(x): return { 'loss': x ** 2, 'status': STATUS_OK, # -- store other results like this 'eval_time': time.time(), 'other_stuff': {'type': None, 'value': [0, 1, 2]}, # -- attachments are handled differently 'attachments': {'time_module': pickle.dumps(time.time)} } trials = Trials() best = fmin(objective, space=hp.uniform('x', -10, 10), algo=tpe.suggest, max_evals=100, trials=trials) print best print trials.trials print trials.results print trials.losses() print trials.statuses() # 没明白 attachments 是什么意思 msg = trials.trial_attachments(trials.trials[5])['time_module'] time_module = pickle.loads(msg) from hyperopt import hp space = hp.choice('a', [ ('case 1', 1 + hp.lognormal('c1', 0, 1)), ('case 2', hp.uniform('c2', -10, 10)) ]) import hyperopt.pyll.stochastic print hyperopt.pyll.stochastic.sample(space) # hp.choice(label, options) # hp.randint(label, upper) # [0,upper] # hp.uniform(label, low, high) # hp.quniform(label, low, high, q) # round(uniform(low, high) / q) * q # hp.loguniform(label, low, high) # hp.qloguniform(label, low, high, q) # round(exp(uniform(low, high)) / q) * q # hp.normal(label, mu, sigma) # hp.qnormal(label, mu, sigma, q) # round(normal(mu, sigma) / q) * q # hp.lognormal(label, mu, sigma) # hp.qlognormal(label, mu, sigma, q) # round(exp(normal(mu, sigma)) / q) * q # 4、对于sklearn使用 from hyperopt import hp space = hp.choice('classifier_type', [ { 'type': 'naive_bayes', }, { 'type': 'svm', 'C': hp.lognormal('svm_C', 0, 1), 'kernel': hp.choice('svm_kernel', [ {'ktype': 'linear'}, {'ktype': 'RBF', 'width': hp.lognormal('svm_rbf_width', 0, 1)}, ]), }, { 'type': 'dtree', 'criterion': hp.choice('dtree_criterion', ['gini', 'entropy']), 'max_depth': hp.choice('dtree_max_depth', [None, hp.qlognormal('dtree_max_depth_int', 3, 1, 1)]), 'min_samples_split': hp.qlognormal('dtree_min_samples_split', 2, 1, 1), }, ]) # 5、还是没有搞懂 scope.define import hyperopt.pyll from hyperopt.pyll import scope @scope.define def foo(a, b=0): print 'running foo', a, b return a + b / 2 # -- this will print 0, foo is called as usual. print foo(0) # In describing search spaces you can use `foo` as you # would in normal Python. These two calls will not actually call foo, # they just record that foo should be called to evaluate the graph. space1 = scope.foo(hp.uniform('a', 0, 10)) space2 = scope.foo(hp.uniform('a', 0, 10), hp.normal('b', 0, 1)) # -- this will print an pyll.Apply node print space1 # -- this will draw a sample by running foo() print hyperopt.pyll.stochastic.sample(space1)
Distribution looks like exp(normal(mu, sigma)) """ return hp.lognormal(name, mu, sigma) def qlognormal(name, (mu, sigma, q)): """ Function to create hyperopt qlognormal variable Input ------------------ name - Variable name (mu, sigma, q) - Tuple of mean, standard deviation and q value. Distribution looks like round(exp(normal(mu, sigma))/ q)* q """ return hp.qlognormal(name, mu, sigma, q) def show_distributions_info(): print "List of Distributions available" list_dist = [uniform, randint, choice, loguniform, quniform, qloguniform, normal, lognormal, qlognormal] for dist in list_dist: print "Distribution name :", dist.__name__ print "Docstring" print "------------------------------------" print dist.__doc__ # Additional helped functions def gen_metric(func, metric): if metric == 'auc': return func.auc()
Distribution looks like exp(normal(mu, sigma)) """ return hp.lognormal(name, mu, sigma) def qlognormal(name, (mu, sigma, q)): """ Function to create hyperopt qlognormal variable Input ------------------ name - Variable name (mu, sigma, q) - Tuple of mean, standard deviation and q value. Distribution looks like round(exp(normal(mu, sigma))/ q)* q """ return hp.qlognormal(name, mu, sigma, q) def show_distributions_info(): print "List of Distributions available" list_dist = [ uniform, randint, choice, loguniform, quniform, qloguniform, normal, lognormal, qlognormal ] for dist in list_dist: print "Distribution name :", dist.__name__ print "Docstring" print "------------------------------------" print dist.__doc__
def qlognormal(label, *args, **kwargs): return hp.qlognormal(label, *args, **kwargs)
def hyperopt(self): return hp.qlognormal(self.label.name, self.mu, self.sigma, self.q)
def createHyperoptSpace(self, lockedValues=None): name = self.root if lockedValues is None: lockedValues = {} if 'anyOf' in self.config or 'oneOf' in self.config: data = [] if 'anyOf' in self.config: data = self.config['anyOf'] else: data = self.config['oneOf'] subSpaces = [ Hyperparameter(param, self, name + "." + str(index)).createHyperoptSpace(lockedValues) for index, param in enumerate(data) ] for index, space in enumerate(subSpaces): space["$index"] = index choices = hp.choice(self.hyperoptVariableName, subSpaces) return choices elif 'enum' in self.config: if self.name in lockedValues: return lockedValues[self.name] choices = hp.choice(self.hyperoptVariableName, self.config['enum']) return choices elif 'constant' in self.config: if self.name in lockedValues: return lockedValues[self.name] return self.config['constant'] elif self.config['type'] == 'object': space = {} for key in self.config['properties'].keys(): config = self.config['properties'][key] space[key] = Hyperparameter( config, self, name + "." + key).createHyperoptSpace(lockedValues) return space elif self.config['type'] == 'number': if self.name in lockedValues: return lockedValues[self.name] mode = self.config.get('mode', 'uniform') scaling = self.config.get('scaling', 'linear') if mode == 'uniform': min = self.config.get('min', 0) max = self.config.get('max', 1) rounding = self.config.get('rounding', None) if scaling == 'linear': if rounding is not None: return hp.quniform(self.hyperoptVariableName, min, max, rounding) else: return hp.uniform(self.hyperoptVariableName, min, max) elif scaling == 'logarithmic': if rounding is not None: return hp.qloguniform(self.hyperoptVariableName, math.log(min), math.log(max), rounding) else: return hp.loguniform(self.hyperoptVariableName, math.log(min), math.log(max)) if mode == 'randint': max = self.config.get('max', 1) return hp.randint(self.hyperoptVariableName, max) if mode == 'normal': mean = self.config.get('mean', 0) stddev = self.config.get('stddev', 1) rounding = self.config.get('rounding', None) if scaling == 'linear': if rounding is not None: return hp.qnormal(self.hyperoptVariableName, mean, stddev, rounding) else: return hp.normal(self.hyperoptVariableName, mean, stddev) elif scaling == 'logarithmic': if rounding is not None: return hp.qlognormal(self.hyperoptVariableName, math.log(mean), math.log(stddev), rounding) else: return hp.lognormal(self.hyperoptVariableName, math.log(mean), math.log(stddev))
assert(min) return hp.loguniform(name, np.log(min), np.log(max)) params_SVM = { "C": hp.lognormal("C", 0, 1), "kernel": hp.choice("kernel", [ {"ktype": "linear"}, {"ktype": "RBF", "width": hp.lognormal("rbf_width", 0, 1)}, ]), } params_DT = { "criterion": hp.choice("criterion", ["gini", "entropy"]), "max_depth": hp.choice("max_depth", [None, hp.qlognormal("max_depth_int", 3, 1, 1)]), "min_samples_split": uniform_float("min_samples_split", 0, 1), #"max_features": np.random.randint(1, len(X_train.columns),20), #"min_samples_leaf": [2,3,4,5,6], } params_RF = { "n_estimators": uniform_int("n_estimators", 100, 1000), "max_depth": uniform_int("max_depth", 3, 15), "max_features": uniform_float("max_features", 0, 1), "criterion": hp.choice("criterion", ["gini", "entropy"]) } params_XGB = { "max_depth": hp.quniform("max_depth", 4, 16, 1), "min_child_weight": hp.quniform("min_child", 1, 10, 1),
'dtree_max_features', 0, 1), max_depth=hp.quniform( 'dtree_max_depth', 0, 25, 1)), scope.SVC( C=hp.lognormal( 'svc_rbf_C', 0, 3), kernel='rbf', gamma=hp.lognormal( 'svc_rbf_gamma', 0, 2), tol=hp.lognormal( 'svc_rbf_tol', np.log(1e-3), 1)), ]) pre_processing = hp.choice('preproc_algo', [ scope.PCA( n_components=1 + hp.qlognormal( 'pca_n_comp', np.log(10), np.log(10), 1), whiten=hp.choice( 'pca_whiten', [False, True])), scope.GMM( n_components=1 + hp.qlognormal( 'gmm_n_comp', np.log(100), np.log(10), 1), covariance_type=hp.choice( 'gmm_covtype', ['spherical', 'tied', 'diag', 'full'])), ]) sklearn_space = {'pre_processing': pre_processing, 'classifier': classifier} from hyperopt.pyll.stochastic import sample print sample(sklearn_space) print sample(sklearn_space)
del df_varlist space = { 'objective': 'binary:logistic', 'eval_metric': 'auc', 'seed': 9999, 'tree_method': 'hist', 'grow_policy': 'lossguide', 'max_delta_step': hp.lognormal('max_delta_step', 0, 1), 'min_child_weight': hp.lognormal('min_child_weight', 0, 1), 'gamma': hp.lognormal('gamma', 0, 1), 'lambda': hp.lognormal('lambda', 0, 1), 'alpha': hp.lognormal('alpha', 0, 1), 'eta': hp.loguniform('eta', log(2**-7), 0), 'max_leaves': 128 - hp.qloguniform('max_leaves', log(4), log(128), 1), 'max_bin': 2 + hp.qlognormal('max_bin', log(256 - 2), 1, 1), 'subsample': 1.5 - hp.loguniform('subsample', log(0.5), 0), 'colsample_bytree': 1.5 - hp.loguniform('colsample_bytree', log(0.5), 0), 'colsample_bylevel': 1.5 - hp.loguniform('colsample_bylevel', log(0.5), 0) } best_prenatal = choose_model(df_2016_train[prenatal_vars], df_2016_valid[prenatal_vars], df_2016_hvalid[prenatal_vars], 'Prenatal') best_NICU = choose_model(df_2016_NICU_train[prenatal_vars], df_2016_NICU_valid[prenatal_vars], df_2016_NICU_hvalid[prenatal_vars], 'NICU') best_post_birth = choose_model(df_2016_train, df_2016_valid, df_2016_hvalid, 'Postnatal')