def get_tune_space(self): return { 'sad_onset': chocolate.uniform(0., 1.), 'sad_offset': chocolate.uniform(0., 1.), 'scd_alpha': chocolate.uniform(0., 1.), 'scd_min_duration': chocolate.uniform(0., 5.), }
def test_conditional_space(self): u = uniform(0.0, 2) l = uniform(1, 4) qu = uniform(0.01, 1) ql = uniform(5, 10) s = [{ "k1": "a", "k2": "b", "a": u, "b": l }, { "k1": "a", "k2": "c", "a": qu, "c": ql }] space = Space(s) space_read = self.conn.get_space() self.assertEqual(space_read, None) self.conn.insert_space(space) space_read = self.conn.get_space() self.assertEqual(space, space_read) self.assertRaises(AssertionError, self.conn.insert_space, space)
def get_tune_space(self): return { 'sad_onset': chocolate.uniform(0., 1.), 'sad_offset': chocolate.uniform(0., 1.), 'scd_alpha': chocolate.uniform(0., 1.), 'scd_min_duration': chocolate.uniform(0., 5.), }
def get_tune_space(self): base_space = super().get_tune_space() space = { 'cls_damping': chocolate.uniform(0.5, 1.), # FIXME: be smarter about this parameter 'cls_preference': chocolate.uniform(-8.0, 0.0)} base_space.update(space) return base_space
def get_tune_space(self): base_space = super().get_tune_space() space = { 'cls_damping': chocolate.uniform(0.5, 1.), # FIXME: be smarter about this parameter 'cls_preference': chocolate.uniform(-8.0, 0.0) } base_space.update(space) return base_space
def main(): X, y = load_boston(return_X_y=True) # Connect to sqlite database in current directory conn = choco.SQLiteConnection(url="sqlite:///gbt-boston.db") s = {"learning_rate": choco.uniform(0.001, 0.1), "n_estimators": choco.quantized_uniform(25, 525, 1), "max_depth": choco.quantized_uniform(2, 25, 1), "subsample": choco.uniform(0.7, 1.0)} sampler = choco.QuasiRandom(conn, s, random_state=110, skip=3) token, params = sampler.next() loss = score_gbt(X, y, params) sampler.update(token, loss)
def setUp(self): l1 = log(low=-3, high=5, base=10) l2 = log(low=-2, high=3, base=10) u = uniform(low=-1, high=1) qu = quantized_uniform(low=1, high=20, step=1) self.space = Space([{ "algo": { "svm": { "C": l1, "kernel": { "linear": None, "rbf": { "gamma": l2 } }, "cond2": { "aa": None, "bb": { "abc": u } } }, "knn": { "n_neighbors": qu } } }, { "cond3": 0, "p": l1, "p2": qu }])
def example_run_bayesian(): # initialisation mean = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] variances = [4, 5, 4, 5, 4, 5, 4, 5, 4, 5] x1, y1, x2, y2 = 60, 342, 726, 725 dimension = len(mean) number = 20 space = {} for x in range(dimension): space["{}".format(x)] = choco.uniform(mean[x] - variances[x],mean[x] + variances[x]) # pip install sclite3 # sclite3 TEST.db conn = choco.SQLiteConnection("sqlite:///TEST.db") conn.lock() bay = choco.Bayes(conn, space, clear_db=True) (token, point_next) = bay.next() point = format_next(point_next) all_pos = [] all_score = [] for x in range(number): loss = extract_score(x, x1, y1, x2, y2, point) bay.update(token, loss) (token, point_next) = bay.next() point = format_next(point_next) print("\rProgress : {}%".format(100*x//number), end="") all_pos.append(point) all_score.append(1-loss) np.savetxt("Score_list", all_score) np.savetxt("Point_list", all_pos) return True
def test_pickle(self): data = [{ "abc": 0, "def": 2 }, { "abc": 1 }, { "def": 42, "abc": 67, "hij": 23 }] comp = [{ "abc": 0, "def": 2 }, { "abc": 1 }, { "def": 42, "abc": 67, "hij": 23 }] space = { "a": uniform(1, 2), "b": { "c": { "c1": uniform(0, 5) }, "d": { "d1": uniform(0, 6) } } } for d in data: self.conn.insert_result(d) for c in comp: self.conn.insert_complementary(c) self.conn.insert_space(Space(space)) s = pickle.dumps(self.conn) l = pickle.loads(s) self.assertEqual(self.conn.results.equals(l.results), True) self.assertEqual(self.conn.complementary.equals(l.complementary), True) self.assertEqual(l.space, self.conn.space)
def main(): conn = choco.SQLiteConnection("sqlite:///my_db.db") # results = conn.results_as_dataframe() space = {"x": choco.uniform(-6, 6), "y": choco.uniform(-6, 6)} # Refer to https://chocolate.readthedocs.io/tutorials/algo.html sampler = choco.QuasiRandom(conn, space, clear_db=True) #sampler = choco.MOCMAES(conn, space, mu=0.1, clear_db=True) # Token: {'_chocolate_id': 0} # Params: {'y': 1.4641226269602674, 'x': 2.5223111999723393} token, params = sampler.next() loss = himmelblau(**params) sampler.update(token, loss) print("Token: {}, loss: {}".format(token, loss)) """
def test_space(self): s = { "a": uniform(1, 2), "b": { "c": { "c1": uniform(0, 5) }, "d": { "d1": uniform(0, 6) } } } space = Space(s) space_read = self.conn.get_space() self.assertEqual(space_read, None) self.conn.insert_space(space) space_read = self.conn.get_space() self.assertEqual(space, space_read) self.assertRaises(AssertionError, self.conn.insert_space, space)
def CMAES(data_manager, n, file_name, conn): data_cv, labels_cv = data_manager.data_cv['data_cv'], data_manager.data_cv[ 'labels_cv'] data_test, labels_test = data_manager.data_cv[ 'data_test'], data_manager.data_cv['labels_test'] def score_gbt(params): rfc = RandomForestClassifier(**params) results = cross_val_score(rfc, data_cv, labels_cv, cv=2, n_jobs=1) val = np.mean(results) return -val space = { 'n_estimators': choco.uniform(10, 1000), # 12 'max_depth': choco.uniform(1, 35), # 11 'min_samples_split': choco.uniform(2, 100), # 21 'min_samples_leaf': choco.uniform(1, 100), # 21 'max_features': choco.uniform(0.1, 0.9) } sampler = choco.CMAES(conn, space) plot_data = {"time": [], "reward": [], "param": []} start_time = time.time() for i in range(n): token, params = sampler.next() print(params) params['n_estimators'] = int(params['n_estimators']) params['max_depth'] = int(params['max_depth']) params['min_samples_split'] = int(params['min_samples_split']) params['min_samples_leaf'] = int(params['min_samples_leaf']) loss = score_gbt(params) print(loss) sampler.update(token, loss) step_time = time.time() ont_time = step_time - start_time plot_data["time"].append(ont_time) plot_data["reward"].append(-loss) plot_data["param"].append(params) plot = pd.DataFrame(data=plot_data) plot.to_csv(file_name, index=False)
def init_choco_sampler(args): conn = choco.SQLiteConnection(args['sqlite_dbase']) space = { "lr": choco.log(low=-5, high=-3, base=10), "lr_decay": choco.uniform(high=1, low=0), "reg_scale": choco.uniform(low=0, high=1), "last_reg_scale": choco.uniform(low=0, high=1), "weight": choco.uniform(low=1, high=50), "weight_decay": choco.uniform(low=0, high=1), "contrast": choco.uniform(low=-100, high=100) } sampler = choco.CMAES(conn, space) return (sampler, conn)
def __init__(self, method: Optional[str] = 'distance', metric: Optional[str] = 'euclidean', normalize: Optional[bool] = False, max_iter: Optional[int] = 1000): super().__init__() self.method = method self.metric = metric self.max_iter = max_iter self.normalize = normalize min_dist, max_dist = dist_range(metric=self.metric, normalize=self.normalize) if not np.isfinite(max_dist): # this is arbitray and might lead to suboptimal results max_dist = 20 msg = (f'bounding distance threshold to {max_dist:g}: ' f'this might lead to suboptimal results.') warnings.warn(msg) self.threshold = chocolate.uniform(min_dist, max_dist)
def convert_param_to_choco(param): """Convert a single search parameter suitably for ``chocolate``. """ from math import log10 import chocolate as choco if param['type'] == 'BOOL': return choco.choice([False, True]) if param['type'] == 'INT': return choco.quantized_uniform(low=param['min'], high=param['max'] + 1, step=1) if param['type'] == 'STRING': return choco.choice(param['options']) if param['type'] == 'FLOAT': return choco.uniform(low=param['min'], high=param['max']) if param['type'] == 'FLOAT_EXP': return choco.log(low=log10(param['min']), high=log10(param['max']), base=10) else: raise ValueError("Didn't understand space {}.".format(param))
def t_main_cmaes(data_manager, n, file_name, log_path, conn): data_cv, labels_cv = data_manager.data_cv['data_cv'], data_manager.data_cv[ 'labels_cv'] data_test, labels_test = data_manager.data_cv[ 'data_test'], data_manager.data_cv['labels_test'] sess = tf.Session() summary_writter = tf.summary.FileWriter(log_path, sess.graph) def score_gbt(params): xgb = XGBClassifier(**params) start_t = time.time() results = cross_val_score(xgb, data_cv, labels_cv, cv=5, n_jobs=1) one_step_time = time.time() - start_t val = np.mean(results) return -val, one_step_time space = { 'max_depth': choco.uniform(1, 25), 'learning_rate': choco.uniform(0.001, 0.1), 'n_estimators': choco.uniform(50, 1200), 'gamma': choco.uniform(0.05, 0.9), 'min_child_weight': choco.uniform(1, 9), 'subsample': choco.uniform(0.5, 1.0), 'colsample_bytree': choco.uniform(0.5, 1.0), 'colsample_bylevel': choco.uniform(0.5, 1.0), 'reg_alpha': choco.uniform(0.1, 0.9), 'reg_lambda': choco.uniform(0.01, 0.1) # 2 } sampler = choco.CMAES(conn, space) plot_data = {"clock_time": [], "reward": [], "param": [], "run_time": []} start_time = time.time() max_reward = 0 min_time = 0 for i in range(n): token, params = sampler.next() print(params) params['max_depth'] = int(params['max_depth']) params['n_estimators'] = int(params['n_estimators']) params['min_child_weight'] = int(params['min_child_weight']) loss, run_time = score_gbt(params) print(loss) sampler.update(token, loss) end_time = time.time() one_time = end_time - start_time summarize(summary_writter, -loss, i, 'reward') summarize(summary_writter, one_time, i, 'time') update_top_5_config(-loss, params, one_time) # if -loss>max_reward: # max_reward=-loss # min_time=run_time # summarize(summary_writter, max_reward, i, 'final_max_acc') # summarize(summary_writter, min_time, i, 'final_max_acc_time') plot_data["clock_time"].append(one_time) plot_data["run_time"].append(run_time) plot_data["reward"].append(-loss) plot_data["param"].append(params) plot = pd.DataFrame(data=plot_data) plot.to_csv(file_name, index=False) test_set_top_5_mean = test_set_result(data_manager, top_5_config["config"]) test_result = { "method": ["cmaes"], "dataset": [data_manager.data_set_index], "top_5_mean_test_reward": [test_set_top_5_mean], "top_5_mean_val_reward": [np.mean(top_5_config["reward"])], "top_5_mean_time": [str(np.mean(top_5_config["runtime"])) + "s"] } test_result_df = pd.DataFrame(test_result) test_result_df.to_csv(test_result_path, mode="a", header=False) print("---------训练结束!----------")
def get_new_suggestions(self, study, trials=[], number=1): """ Get the new suggested trials with Chocolate algorithm. """ # 1. Construct search space # Example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)} chocolate_search_space = {} # study = Study.objects.get(name=study_name) study_configuration_json = json.loads(study.study_configuration) params = study_configuration_json["params"] for param in params: param_name = param["parameterName"] if param["type"] == "INTEGER": # TODO: Support int type of search space) pass elif param["type"] == "DOUBLE": chocolate_search_space[param_name] = choco.uniform( param["minValue"], param["maxValue"]) elif param["type"] == "DISCRETE" or param["type"] == "CATEGORICAL": feasible_point_list = [ value.strip() for value in param["feasiblePoints"].split(",") ] chocolate_search_space[param_name] = choco.choice( feasible_point_list) conn = choco.SQLiteConnection("sqlite:///my_db.db") # Refer to https://chocolate.readthedocs.io/tutorials/algo.html if self.algorithm_name == "Grid": sampler = choco.Grid(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "Random": sampler = choco.Random(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "QuasiRandom": sampler = choco.QuasiRandom(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "Bayes": sampler = choco.Bayes(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "CMAES": sampler = choco.CMAES(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "MOCMAES": mu = 1 sampler = choco.MOCMAES(conn, chocolate_search_space, mu=mu, clear_db=True) # 2. Update with completed advisor trials # completed_advisor_trials = Trial.objects.filter( # study_name=study_name, status="Completed") completed_advisor_trials = [ i for i in trials if i.status == "Completed" ] for index, advisor_trial in enumerate(completed_advisor_trials): parameter_values_json = json.loads(advisor_trial.parameter_values) loss = advisor_trial.objective_value if study_configuration_json["goal"] == "MAXIMIZE": loss = -1 * loss entry = {"_chocolate_id": index, "_loss": loss} entry.update(parameter_values_json) # Should not use sampler.update(token, loss) conn.insert_result(entry) # 3. Run algorithm and construct return advisor trials return_trial_list = [] for i in range(number): # Example: {'_chocolate_id': 1} # Example: {u'hidden2': u'32', u'learning_rate': 0.07122424534644338, u'l1_normalization': 0.8402644688674471, u'optimizer': u'adam'} token, chocolate_params = sampler.next() parameter_values_json = {} for param in params: if (param["type"] == "INTEGER" or param["type"] == "DOUBLE" or param["type"] == "CATEGORICAL"): parameter_values_json[ param["parameterName"]] = chocolate_params[ param["parameterName"]] elif param["type"] == "DISCRETE": parameter_values_json[param["parameterName"]] = int( chocolate_params[param["parameterName"]]) new_advisor_trial = Trial.create(study.name, "ChocolateTrial") new_advisor_trial.parameter_values = json.dumps( parameter_values_json) # new_advisor_trial.save() return_trial_list.append(new_advisor_trial) return return_trial_list
def main(argv): # parse arguments args = parse_all_args() '''''' #Chocolate Code # Define the conditional search space space = { "lr": choco.uniform(low=.001, high=.1) } # Establish a connection to a SQLite local database conn = choco.SQLiteConnection("sqlite:///hpTuning.db") # Construct the optimizer sampler = choco.Bayes(conn, space) # Sample the next point token, params = sampler.next() '''''' train_set = PrototypicalDataset(args.input_path, args.train_path, n_support=args.support, n_query=args.query) dev_set = PrototypicalDataset(args.input_path, args.dev_path, apply_enhancements=False, n_support=args.support, n_query=args.query) # Use the same minibatch size to make each dataset use the same episode size train_loader = torch.utils.data.DataLoader(train_set, shuffle=True, drop_last=False, batch_size=args.mb, num_workers=0, pin_memory=True, collate_fn=protoCollate) dev_loader = torch.utils.data.DataLoader(dev_set, shuffle=True, drop_last=False, batch_size=args.mb, num_workers=0, pin_memory=True, collate_fn=protoCollate) Filter_specs = parse_filter_specs(args.filter_specs) Pre_trained_filters = None if not args.pre_trained is None: Pre_trained_filters = torch.load(args.pre_trained) model = ConvNeuralNet(args.embed_dim, args.f1, train_set.image_shape, Filter_specs=Filter_specs, Pre_trained_filters=Pre_trained_filters) if (args.checkpoint_path): state = torch.load(args.checkpoint_path) model.load_state_dict(state) print("Loaded checkpoint %s" % (args.checkpoint_path)) # torch saves the device the model was on, so we don't need to re-load to CUDA if it was saved from CUDA else: if (torch.cuda.is_available()): model = model.cuda() train_out = AggregatePerformanceRecord("train",args.out_path,dbg=args.print_reports) dev_out = AggregatePerformanceRecord("dev",args.out_path,dbg=args.print_reports) # test_out = PerformanceRecord("test",args.out_path,dbg=args.print_reports) N = len(train_set) # Calculate the loss for the sampled point (minimized) # This would be your training code loss = train(model,train_loader,dev_loader,train_out,dev_out,N,args,**params) # Add the loss to the database sampler.update(token, loss) # # Get test set performance # test_set = PrototypicalDataset(args.input_path, args.test_path, apply_enhancements=False, n_support=args.support, n_query=args.query) # test_loader = torch.utils.data.DataLoader(test_set, shuffle=True, # drop_last=False, batch_size=args.mb, num_workers=0, pin_memory=True, # collate_fn=protoCollate) # # we should make sure this fn works, but we should not run this on the actual test set even once before we are completely done training # # evaluate_test(model, test_loader, test_out, args) readTune()
def get_tune_space(self): space = { 'speech_onset': chocolate.uniform(0., 1.), 'speech_offset': chocolate.uniform(0., 1.), 'speech_min_duration_on': chocolate.uniform(0., 2.), 'speech_min_duration_off': chocolate.uniform(0., 2.), 'speech_pad_onset': chocolate.uniform(-1., 1.), 'speech_pad_offset': chocolate.uniform(-1., 1.) } if self.has_overlap_: space.update({ 'overlap_onset': chocolate.uniform(0., 1.), 'overlap_offset': chocolate.uniform(0., 1.), 'overlap_min_duration_on': chocolate.uniform(0., 2.), 'overlap_min_duration_off': chocolate.uniform(0., 2.), 'overlap_pad_onset': chocolate.uniform(-1., 1.), 'overlap_pad_offset': chocolate.uniform(-1., 1.) }) return space
def get_tune_space(self): space = { 'speech_onset': chocolate.uniform(0., 1.), 'speech_offset': chocolate.uniform(0., 1.), 'speech_min_duration_on': chocolate.uniform(0., 2.), 'speech_min_duration_off': chocolate.uniform(0., 2.), 'speech_pad_onset': chocolate.uniform(-1., 1.), 'speech_pad_offset': chocolate.uniform(-1., 1.) } if self.has_overlap_: space.update({ 'overlap_onset': chocolate.uniform(0., 1.), 'overlap_offset': chocolate.uniform(0., 1.), 'overlap_min_duration_on': chocolate.uniform(0., 2.), 'overlap_min_duration_off': chocolate.uniform(0., 2.), 'overlap_pad_onset': chocolate.uniform(-1., 1.), 'overlap_pad_offset': chocolate.uniform(-1., 1.) }) return space
def get_tune_space(self): base_space = super().get_tune_space() space = {'cls_threshold': chocolate.uniform(0, 2.)} base_space.update(space) return base_space
m.fit(trn_x, trn_y) y_pred = m.predict(tst_x) return -1*skm.f1_score(tst_y, y_pred, average='macro') space = [ # {'model': 'RandomForestClassifier', # "max_depth" : choco.quantized_uniform(2, 32, 2), # "min_samples_split": choco.quantized_uniform(2, 600, 2), # "n_estimators" : choco.quantized_uniform(125, 800, 25),}, {'model': 'SVC', "gamma": 'auto', "C": choco.log(-3, 3, 10), "kernel": choco.choice(['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']), "tol": choco.log(-5, -2, 10),}, {'model': 'XGBClassifier', "learning_rate" : choco.uniform(0.001, 0.1), "max_depth" : choco.quantized_uniform(2, 16, 2), "min_child_weight": choco.quantized_uniform(2, 10, 2), "subsample" : choco.quantized_uniform(0.7, 1.05, 0.05), "n_estimators" : choco.quantized_uniform(25, 525, 25),}, {'model': 'LogisticRegression', "penalty" : choco.choice(['l1', 'l2']), "C" : choco.log(-2, 1, 10),}, ] models = { 'RandomForestClassifier': RandomForestClassifier, 'SVC': SVC, 'XGBClassifier': XGBClassifier, 'LogisticRegression': LogisticRegression, }
def get_tune_space(self): base_space = super().get_tune_space() space = {'cls_threshold': chocolate.uniform(0, 2.)} base_space.update(space) return base_space