def example_run_bayesian(): # initialisation mean = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] variances = [4, 5, 4, 5, 4, 5, 4, 5, 4, 5] x1, y1, x2, y2 = 60, 342, 726, 725 dimension = len(mean) number = 20 space = {} for x in range(dimension): space["{}".format(x)] = choco.uniform(mean[x] - variances[x],mean[x] + variances[x]) # pip install sclite3 # sclite3 TEST.db conn = choco.SQLiteConnection("sqlite:///TEST.db") conn.lock() bay = choco.Bayes(conn, space, clear_db=True) (token, point_next) = bay.next() point = format_next(point_next) all_pos = [] all_score = [] for x in range(number): loss = extract_score(x, x1, y1, x2, y2, point) bay.update(token, loss) (token, point_next) = bay.next() point = format_next(point_next) print("\rProgress : {}%".format(100*x//number), end="") all_pos.append(point) all_score.append(1-loss) np.savetxt("Score_list", all_score) np.savetxt("Point_list", all_pos) return True
def main(argv=None): #using the chocolate to tune space = create_space() conn = choco.SQLiteConnection(url="sqlite:///db2.db") cv = choco.Repeat(repetitions=3, reduce=np.mean, rep_col="_repetition_id") sampler = choco.Bayes(conn, space, crossvalidation=cv) #train(x_train, y_train, vocab_processor, x_dev, y_dev) token, params = sampler.next() print(type(token)) print(token) x_train, y_train, x_dev, y_dev = preprocess( params["eps"], params["dev_sample_percentage"]) loss = train(x_train, y_train, x_dev, y_dev, params) print(loss) sampler.update(token, loss) results = conn.results_as_dataframe() print(results) results = pd.melt(results, id_vars=["_loss"], value_name='value', var_name="variable") sns.lmplot(x="value", y="_loss", data=results, col="variable", col_wrap=3, sharex=False) plt.show()
def create_optimizer(self, algorithm_name): # Search Space example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)} chocolate_search_space = {} for param in self.search_space.params: key = BaseChocolateService.encode(param.name) # Chocolate quantized_uniform distribution uses half-open interval: [low, high). if param.type == INTEGER: chocolate_search_space[key] = choco.quantized_uniform( int(param.min), int(param.max) + int(param.step), int(param.step)) elif param.type == DOUBLE: chocolate_search_space[key] = choco.quantized_uniform( float(param.min), float(param.max) + float(param.step), float(param.step)) # For Categorical and Discrete insert indexes to DB from list of values elif param.type == CATEGORICAL or param.type == DISCRETE: chocolate_search_space[key] = choco.choice( [idx for idx, _ in enumerate(param.list)]) if algorithm_name in DEPRECATED_ALGORITHM_NAME: warnings.warn( "Algorithm name '{}' is deprecated. Please use '{}'.".format( algorithm_name, DEPRECATED_ALGORITHM_NAME[algorithm_name], ), DeprecationWarning, ) algorithm_name = DEPRECATED_ALGORITHM_NAME[algorithm_name] # Refer to https://chocolate.readthedocs.io/tutorials/algo.html if algorithm_name == "grid": self.chocolate_optimizer = choco.Grid(self.conn, chocolate_search_space, clear_db=True) # hyperopt-random is the default option in katib. elif algorithm_name == "random": self.chocolate_optimizer = choco.Random(self.conn, chocolate_search_space, clear_db=True) elif algorithm_name == "quasirandom": self.chocolate_optimizer = choco.QuasiRandom( self.conn, chocolate_search_space, clear_db=True) elif algorithm_name == "bayesianoptimization": self.chocolate_optimizer = choco.Bayes(self.conn, chocolate_search_space, clear_db=True) # elif self.algorithm_name == "chocolate-CMAES": # self.chocolate_optimizer = choco.CMAES(self.conn, chocolate_search_space, clear_db=True) elif algorithm_name == "mocmaes": mu = 1 self.chocolate_optimizer = choco.MOCMAES(self.conn, chocolate_search_space, mu=mu, clear_db=True) else: raise Exception( '"Failed to create Chocolate optimizer for the algorithm: {}'. format(algorithm_name))
def create_optimizer(self, algorithm_name): # Search Space example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)} chocolate_search_space = {} for param in self.search_space.params: key = BaseChocolateService.encode(param.name) if param.type == INTEGER: chocolate_search_space[key] = choco.quantized_uniform( int(param.min), int(param.max), int(param.step)) elif param.type == DOUBLE: chocolate_search_space[key] = choco.quantized_uniform( float(param.min), float(param.max), float(param.step)) elif param.type == CATEGORICAL: chocolate_search_space[key] = choco.choice(param.list) else: chocolate_search_space[key] = choco.choice( [float(e) for e in param.list]) # Refer to https://chocolate.readthedocs.io/tutorials/algo.html if algorithm_name == "grid": self.chocolate_optimizer = choco.Grid(self.conn, chocolate_search_space, clear_db=True) # hyperopt-random is the default option in katib. elif algorithm_name == "chocolate-random": self.chocolate_optimizer = choco.Random(self.conn, chocolate_search_space, clear_db=True) elif algorithm_name == "chocolate-quasirandom": self.chocolate_optimizer = choco.QuasiRandom( self.conn, chocolate_search_space, clear_db=True) elif algorithm_name == "chocolate-bayesian-optimization": self.chocolate_optimizer = choco.Bayes(self.conn, chocolate_search_space, clear_db=True) # elif self.algorithm_name == "chocolate-CMAES": # self.chocolate_optimizer = choco.CMAES(self.conn, chocolate_search_space, clear_db=True) elif algorithm_name == "chocolate-mocmaes": mu = 1 self.chocolate_optimizer = choco.MOCMAES(self.conn, chocolate_search_space, mu=mu, clear_db=True) else: raise Exception( '"Failed to create Chocolate optimizer for the algorithm: {}'. format(algorithm_name))
def func(data): trn_x, trn_y, tst_x, tst_y, dbid = data conn = choco.SQLiteConnection(url="sqlite:///hpo/hpo_%s.db" % str(dbid)) sampler = choco.Random(conn, space) searcher = choco.Bayes(conn, space) print('START %s' % dbid) for _ in range(nseed): token, params = sampler.next() # print('START % 4d %s' % (i, params['model'])) loss = f1_score_model(trn_x, trn_y, tst_x, tst_y, **params) sampler.update(token, loss) # print('DONE % 4d %s' % (i, params['model'])) for _ in range(nruns): token, params = searcher.next() # print('START % 4d %s' % (i, params['model'])) loss = f1_score_model(trn_x, trn_y, tst_x, tst_y, **params) searcher.update(token, loss)
def run_chocolate(X_train, X_test, Y_train, Y_test, y_scaler_nl,run,space,iterations,clear_db): # CREATE and Connect to sqlite database in current directory conn = choco.SQLiteConnection(url="sqlite:///"+run+".db") #repeat each model run three times and take average #cv = choco.Repeat(repetitions=3, reduce=np.mean, rep_col="_repetition_id") #search strategy - Bayes attempts to "learn" patterns from ALL previous runs sampler = choco.Bayes(conn, space, clear_db=False)#, crossvalidation=cv) #sampler = choco.Grid(conn, space, clear_db=clear_db) #lets run 10 times and see what if we get a better answer for i in range(0,iterations): #examine db and pick next experiment token, params = sampler.next() #run experiment loss = _score(X_train, X_test, Y_train, Y_test, y_scaler_nl, params) #print("finished iteration",str(i),"loss",str(loss)) #add new result to database sampler.update(token, loss)
def get_new_suggestions(self, study, trials=[], number=1): """ Get the new suggested trials with Chocolate algorithm. """ # 1. Construct search space # Example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)} chocolate_search_space = {} # study = Study.objects.get(name=study_name) study_configuration_json = json.loads(study.study_configuration) params = study_configuration_json["params"] for param in params: param_name = param["parameterName"] if param["type"] == "INTEGER": # TODO: Support int type of search space) pass elif param["type"] == "DOUBLE": chocolate_search_space[param_name] = choco.uniform( param["minValue"], param["maxValue"]) elif param["type"] == "DISCRETE" or param["type"] == "CATEGORICAL": feasible_point_list = [ value.strip() for value in param["feasiblePoints"].split(",") ] chocolate_search_space[param_name] = choco.choice( feasible_point_list) conn = choco.SQLiteConnection("sqlite:///my_db.db") # Refer to https://chocolate.readthedocs.io/tutorials/algo.html if self.algorithm_name == "Grid": sampler = choco.Grid(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "Random": sampler = choco.Random(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "QuasiRandom": sampler = choco.QuasiRandom(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "Bayes": sampler = choco.Bayes(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "CMAES": sampler = choco.CMAES(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "MOCMAES": mu = 1 sampler = choco.MOCMAES(conn, chocolate_search_space, mu=mu, clear_db=True) # 2. Update with completed advisor trials # completed_advisor_trials = Trial.objects.filter( # study_name=study_name, status="Completed") completed_advisor_trials = [ i for i in trials if i.status == "Completed" ] for index, advisor_trial in enumerate(completed_advisor_trials): parameter_values_json = json.loads(advisor_trial.parameter_values) loss = advisor_trial.objective_value if study_configuration_json["goal"] == "MAXIMIZE": loss = -1 * loss entry = {"_chocolate_id": index, "_loss": loss} entry.update(parameter_values_json) # Should not use sampler.update(token, loss) conn.insert_result(entry) # 3. Run algorithm and construct return advisor trials return_trial_list = [] for i in range(number): # Example: {'_chocolate_id': 1} # Example: {u'hidden2': u'32', u'learning_rate': 0.07122424534644338, u'l1_normalization': 0.8402644688674471, u'optimizer': u'adam'} token, chocolate_params = sampler.next() parameter_values_json = {} for param in params: if (param["type"] == "INTEGER" or param["type"] == "DOUBLE" or param["type"] == "CATEGORICAL"): parameter_values_json[ param["parameterName"]] = chocolate_params[ param["parameterName"]] elif param["type"] == "DISCRETE": parameter_values_json[param["parameterName"]] = int( chocolate_params[param["parameterName"]]) new_advisor_trial = Trial.create(study.name, "ChocolateTrial") new_advisor_trial.parameter_values = json.dumps( parameter_values_json) # new_advisor_trial.save() return_trial_list.append(new_advisor_trial) return return_trial_list
def main(argv): # parse arguments args = parse_all_args() '''''' #Chocolate Code # Define the conditional search space space = { "lr": choco.uniform(low=.001, high=.1) } # Establish a connection to a SQLite local database conn = choco.SQLiteConnection("sqlite:///hpTuning.db") # Construct the optimizer sampler = choco.Bayes(conn, space) # Sample the next point token, params = sampler.next() '''''' train_set = PrototypicalDataset(args.input_path, args.train_path, n_support=args.support, n_query=args.query) dev_set = PrototypicalDataset(args.input_path, args.dev_path, apply_enhancements=False, n_support=args.support, n_query=args.query) # Use the same minibatch size to make each dataset use the same episode size train_loader = torch.utils.data.DataLoader(train_set, shuffle=True, drop_last=False, batch_size=args.mb, num_workers=0, pin_memory=True, collate_fn=protoCollate) dev_loader = torch.utils.data.DataLoader(dev_set, shuffle=True, drop_last=False, batch_size=args.mb, num_workers=0, pin_memory=True, collate_fn=protoCollate) Filter_specs = parse_filter_specs(args.filter_specs) Pre_trained_filters = None if not args.pre_trained is None: Pre_trained_filters = torch.load(args.pre_trained) model = ConvNeuralNet(args.embed_dim, args.f1, train_set.image_shape, Filter_specs=Filter_specs, Pre_trained_filters=Pre_trained_filters) if (args.checkpoint_path): state = torch.load(args.checkpoint_path) model.load_state_dict(state) print("Loaded checkpoint %s" % (args.checkpoint_path)) # torch saves the device the model was on, so we don't need to re-load to CUDA if it was saved from CUDA else: if (torch.cuda.is_available()): model = model.cuda() train_out = AggregatePerformanceRecord("train",args.out_path,dbg=args.print_reports) dev_out = AggregatePerformanceRecord("dev",args.out_path,dbg=args.print_reports) # test_out = PerformanceRecord("test",args.out_path,dbg=args.print_reports) N = len(train_set) # Calculate the loss for the sampled point (minimized) # This would be your training code loss = train(model,train_loader,dev_loader,train_out,dev_out,N,args,**params) # Add the loss to the database sampler.update(token, loss) # # Get test set performance # test_set = PrototypicalDataset(args.input_path, args.test_path, apply_enhancements=False, n_support=args.support, n_query=args.query) # test_loader = torch.utils.data.DataLoader(test_set, shuffle=True, # drop_last=False, batch_size=args.mb, num_workers=0, pin_memory=True, # collate_fn=protoCollate) # # we should make sure this fn works, but we should not run this on the actual test set even once before we are completely done training # # evaluate_test(model, test_loader, test_out, args) readTune()
def getSuggestions(self, search_space, trials, request_number): """ Get the new suggested trials with chocolate algorithm. """ # Example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)} chocolate_search_space = {} for param in search_space.params: key = BaseChocolateService.encode(param.name) if param.type == INTEGER: chocolate_search_space[key] = choco.quantized_uniform( int(param.min), int(param.max), 1) elif param.type == DOUBLE: chocolate_search_space[key] = choco.quantized_uniform( float(param.min), float(param.max), float(param.step)) elif param.type == CATEGORICAL: chocolate_search_space[key] = choco.choice(param.list) else: chocolate_search_space[key] = choco.choice( [float(e) for e in param.list]) conn = choco.SQLiteConnection("sqlite:///my_db.db") # Refer to https://chocolate.readthedocs.io/tutorials/algo.html if self.algorithm_name == "grid": sampler = choco.Grid(conn, chocolate_search_space, clear_db=True) # hyperopt-random is the default option in katib. elif self.algorithm_name == "chocolate-random": sampler = choco.Random(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "chocolate-quasirandom": sampler = choco.QuasiRandom(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "chocolate-bayesian-optimization": sampler = choco.Bayes(conn, chocolate_search_space, clear_db=True) # elif self.algorithm_name == "chocolate-CMAES": # sampler = choco.CMAES(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "chocolate-MOCMAES": mu = 1 sampler = choco.MOCMAES(conn, chocolate_search_space, mu=mu, clear_db=True) else: raise Exception('"Failed to create the algortihm: {}'.format( self.algorithm_name)) for index, trial in enumerate(trials): loss_for_choco = float(trial.target_metric.value) if search_space.goal == MAX_GOAL: loss_for_choco = -1 * loss_for_choco entry = {"_chocolate_id": index, "_loss": loss_for_choco} for param in search_space.params: param_assignment = None for assignment in trial.assignments: if param.name == assignment.name: param_assignment = assignment.value break if param.type == INTEGER: param_assignment = int(param_assignment) elif param.type == DOUBLE: param_assignment = float(param_assignment) entry.update({ BaseChocolateService.encode(param.name): param_assignment }) logger.info(entry) # Should not use sampler.update(token, loss), because we will create # a new BaseChocolateService instance for every request. Thus we need # to insert all previous trials every time. conn.insert_result(entry) list_of_assignments = [] for i in range(request_number): try: token, chocolate_params = sampler.next() list_of_assignments.append( BaseChocolateService.convert(search_space, chocolate_params)) except StopIteration: logger.info( "Chocolate db is exhausted, increase Search Space or decrease maxTrialCount!" ) return list_of_assignments
X, y, stratify=y, test_size=config.train_val_split_pct) return x_train, y_train, x_val, y_val if __name__ == '__main__': num_cols = [ 'amount_tsh', 'gps_height', 'population', 'total_missing', 'longitude', 'latitude' ] train_x = read_data(config.d_xtrain) train_y = read_data(config.d_ytrain) test_x = read_data(config.d_xtrain) train_x = train_x[num_cols] test_x = test_x[num_cols] scaler = StandardScaler(with_mean=True, with_std=True) scaler.fit(train_x) train_x = scaler.transform(train_x) test_x = scaler.transform(test_x) x_train, y_train, x_val, y_val = split(train_x, train_y) conn = choco.MongoDBConnection("mongodb://localhost:27017/") sampler = choco.Bayes(conn, param_grid, clear_db=True) token, params = sampler.next() loss = model_train(x_train, y_train, x_val, y_val, params) sampler.update(token, loss)
self.validation_error = 100000.0 if __name__ == '__main__': if len(sys.argv) == 1: # original params from article model=LatentAttention(frac_train=0.99, n_z=20, batchsize=100, learning_rate=0.001, max_epochs=10, e_h1=16, e_h2=32, d_h1=32, d_h2=16, run_id=-1); model.train() print("loss={}".format(float(model.validation_error))) exit(0) # Params from optimizer search_space = { "n_z": choco.quantized_uniform(5, 100, 1), "learning_rate": choco.log(-20, -8, 2), "max_epochs": choco.quantized_uniform(5, 200, 1), "e_h1": choco.quantized_uniform(16, 256, 1), "e_h2": choco.quantized_uniform(16, 256, 1), "d_h1": choco.quantized_uniform(16, 256, 1), "d_h2": choco.quantized_uniform(16, 256, 1), } connection = choco.SQLiteConnection("sqlite:///no_labels_results.sqlite3") sampler = choco.Bayes(connection, search_space) token, sample = sampler.next() print("Parameters: {} Token: {}".format(sample, token)) run_id = token['_chocolate_id'] model = LatentAttention(0.99, batchsize=150, run_id=run_id, **sample) model.train() sampler.update(token, float(model.validation_error))
'XGBClassifier': XGBClassifier, 'RandomForestClassifier': RandomForestClassifier, 'GaussianNB': GaussianNB, 'KNeighborsClassifier': KNeighborsClassifier, } if __name__ == "__main__": datafn = 'HOUR_00024.csv' dbid = datafn.split('_')[1].split('.')[0] # dbid = datetime.datetime.now().strftime('%m%d%y%H%M%S') # dbid = 1 N_RUNS = 1024 N_PROC = 8 datafn = os.path.join(DATA_PATH, 'hour', datafn) ids_fn = os.path.join(RAW_PATH, 'd_ids_split.pickle') trn_x, trn_y, tst_x, tst_y = load_or_gen_data(datafn, ids_fn) conn = choco.SQLiteConnection(url="sqlite:///hpo/hpo_%s.db" % str(dbid)) # searcher = choco.Random(conn, space) searcher = choco.Bayes(conn, space) f = getProcFunc(conn, searcher) with mp.Pool(processes=N_PROC) as pool: pool.map(f, range(N_RUNS)) df = conn.results_as_dataframe() df.to_csv("hpo/hpo_%s.csv" % str(dbid))