def create_optimizer(self, algorithm_name): # Search Space example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)} chocolate_search_space = {} for param in self.search_space.params: key = BaseChocolateService.encode(param.name) # Chocolate quantized_uniform distribution uses half-open interval: [low, high). if param.type == INTEGER: chocolate_search_space[key] = choco.quantized_uniform( int(param.min), int(param.max) + int(param.step), int(param.step)) elif param.type == DOUBLE: chocolate_search_space[key] = choco.quantized_uniform( float(param.min), float(param.max) + float(param.step), float(param.step)) # For Categorical and Discrete insert indexes to DB from list of values elif param.type == CATEGORICAL or param.type == DISCRETE: chocolate_search_space[key] = choco.choice( [idx for idx, _ in enumerate(param.list)]) if algorithm_name in DEPRECATED_ALGORITHM_NAME: warnings.warn( "Algorithm name '{}' is deprecated. Please use '{}'.".format( algorithm_name, DEPRECATED_ALGORITHM_NAME[algorithm_name], ), DeprecationWarning, ) algorithm_name = DEPRECATED_ALGORITHM_NAME[algorithm_name] # Refer to https://chocolate.readthedocs.io/tutorials/algo.html if algorithm_name == "grid": self.chocolate_optimizer = choco.Grid(self.conn, chocolate_search_space, clear_db=True) # hyperopt-random is the default option in katib. elif algorithm_name == "random": self.chocolate_optimizer = choco.Random(self.conn, chocolate_search_space, clear_db=True) elif algorithm_name == "quasirandom": self.chocolate_optimizer = choco.QuasiRandom( self.conn, chocolate_search_space, clear_db=True) elif algorithm_name == "bayesianoptimization": self.chocolate_optimizer = choco.Bayes(self.conn, chocolate_search_space, clear_db=True) # elif self.algorithm_name == "chocolate-CMAES": # self.chocolate_optimizer = choco.CMAES(self.conn, chocolate_search_space, clear_db=True) elif algorithm_name == "mocmaes": mu = 1 self.chocolate_optimizer = choco.MOCMAES(self.conn, chocolate_search_space, mu=mu, clear_db=True) else: raise Exception( '"Failed to create Chocolate optimizer for the algorithm: {}'. format(algorithm_name))
def main(): X, y = load_boston(return_X_y=True) # Connect to sqlite database in current directory conn = choco.SQLiteConnection(url="sqlite:///gbt-boston.db") s = {"learning_rate": choco.uniform(0.001, 0.1), "n_estimators": choco.quantized_uniform(25, 525, 1), "max_depth": choco.quantized_uniform(2, 25, 1), "subsample": choco.uniform(0.7, 1.0)} sampler = choco.QuasiRandom(conn, s, random_state=110, skip=3) token, params = sampler.next() loss = score_gbt(X, y, params) sampler.update(token, loss)
def create_optimizer(self, algorithm_name): # Search Space example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)} chocolate_search_space = {} for param in self.search_space.params: key = BaseChocolateService.encode(param.name) if param.type == INTEGER: chocolate_search_space[key] = choco.quantized_uniform( int(param.min), int(param.max), int(param.step)) elif param.type == DOUBLE: chocolate_search_space[key] = choco.quantized_uniform( float(param.min), float(param.max), float(param.step)) elif param.type == CATEGORICAL: chocolate_search_space[key] = choco.choice(param.list) else: chocolate_search_space[key] = choco.choice( [float(e) for e in param.list]) # Refer to https://chocolate.readthedocs.io/tutorials/algo.html if algorithm_name == "grid": self.chocolate_optimizer = choco.Grid(self.conn, chocolate_search_space, clear_db=True) # hyperopt-random is the default option in katib. elif algorithm_name == "chocolate-random": self.chocolate_optimizer = choco.Random(self.conn, chocolate_search_space, clear_db=True) elif algorithm_name == "chocolate-quasirandom": self.chocolate_optimizer = choco.QuasiRandom( self.conn, chocolate_search_space, clear_db=True) elif algorithm_name == "chocolate-bayesian-optimization": self.chocolate_optimizer = choco.Bayes(self.conn, chocolate_search_space, clear_db=True) # elif self.algorithm_name == "chocolate-CMAES": # self.chocolate_optimizer = choco.CMAES(self.conn, chocolate_search_space, clear_db=True) elif algorithm_name == "chocolate-mocmaes": mu = 1 self.chocolate_optimizer = choco.MOCMAES(self.conn, chocolate_search_space, mu=mu, clear_db=True) else: raise Exception( '"Failed to create Chocolate optimizer for the algorithm: {}'. format(algorithm_name))
def setUp(self): l1 = log(low=-3, high=5, base=10) l2 = log(low=-2, high=3, base=10) u = uniform(low=-1, high=1) qu = quantized_uniform(low=1, high=20, step=1) self.space = Space([{ "algo": { "svm": { "C": l1, "kernel": { "linear": None, "rbf": { "gamma": l2 } }, "cond2": { "aa": None, "bb": { "abc": u } } }, "knn": { "n_neighbors": qu } } }, { "cond3": 0, "p": l1, "p2": qu }])
def convert_param_to_choco(param): """Convert a single search parameter suitably for ``chocolate``. """ from math import log10 import chocolate as choco if param['type'] == 'BOOL': return choco.choice([False, True]) if param['type'] == 'INT': return choco.quantized_uniform(low=param['min'], high=param['max'] + 1, step=1) if param['type'] == 'STRING': return choco.choice(param['options']) if param['type'] == 'FLOAT': return choco.uniform(low=param['min'], high=param['max']) if param['type'] == 'FLOAT_EXP': return choco.log(low=log10(param['min']), high=log10(param['max']), base=10) else: raise ValueError("Didn't understand space {}.".format(param))
def create_space(): space = { "learning_rate": choco.log(low=-5, high=-2, base=10), "dropout_keep_prob": choco.quantized_uniform(low=0.0, high=0.95, step=0.05), "num_filters": choco.quantized_uniform(low=50, high=200, step=10), "batch_size": choco.quantized_uniform(low=64, high=256, step=16), "num_epochs": choco.quantized_uniform(low=100, high=200, step=10), "l2_reg_lambda": choco.quantized_uniform(low=0.0, high=10.0, step=0.5), "eps": choco.quantized_uniform(low=1.0, high=10.0, step=0.02), "dev_sample_percentage": choco.quantized_uniform(low=0.1, high=0.3, step=0.01) } return space
def getSuggestions(self, search_space, trials, request_number): """ Get the new suggested trials with chocolate algorithm. """ # Example: {"x" : choco.uniform(-6, 6), "y" : choco.uniform(-6, 6)} chocolate_search_space = {} for param in search_space.params: key = BaseChocolateService.encode(param.name) if param.type == INTEGER: chocolate_search_space[key] = choco.quantized_uniform( int(param.min), int(param.max), 1) elif param.type == DOUBLE: chocolate_search_space[key] = choco.quantized_uniform( float(param.min), float(param.max), float(param.step)) elif param.type == CATEGORICAL: chocolate_search_space[key] = choco.choice(param.list) else: chocolate_search_space[key] = choco.choice( [float(e) for e in param.list]) conn = choco.SQLiteConnection("sqlite:///my_db.db") # Refer to https://chocolate.readthedocs.io/tutorials/algo.html if self.algorithm_name == "grid": sampler = choco.Grid(conn, chocolate_search_space, clear_db=True) # hyperopt-random is the default option in katib. elif self.algorithm_name == "chocolate-random": sampler = choco.Random(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "chocolate-quasirandom": sampler = choco.QuasiRandom(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "chocolate-bayesian-optimization": sampler = choco.Bayes(conn, chocolate_search_space, clear_db=True) # elif self.algorithm_name == "chocolate-CMAES": # sampler = choco.CMAES(conn, chocolate_search_space, clear_db=True) elif self.algorithm_name == "chocolate-MOCMAES": mu = 1 sampler = choco.MOCMAES(conn, chocolate_search_space, mu=mu, clear_db=True) else: raise Exception('"Failed to create the algortihm: {}'.format( self.algorithm_name)) for index, trial in enumerate(trials): loss_for_choco = float(trial.target_metric.value) if search_space.goal == MAX_GOAL: loss_for_choco = -1 * loss_for_choco entry = {"_chocolate_id": index, "_loss": loss_for_choco} for param in search_space.params: param_assignment = None for assignment in trial.assignments: if param.name == assignment.name: param_assignment = assignment.value break if param.type == INTEGER: param_assignment = int(param_assignment) elif param.type == DOUBLE: param_assignment = float(param_assignment) entry.update({ BaseChocolateService.encode(param.name): param_assignment }) logger.info(entry) # Should not use sampler.update(token, loss), because we will create # a new BaseChocolateService instance for every request. Thus we need # to insert all previous trials every time. conn.insert_result(entry) list_of_assignments = [] for i in range(request_number): try: token, chocolate_params = sampler.next() list_of_assignments.append( BaseChocolateService.convert(search_space, chocolate_params)) except StopIteration: logger.info( "Chocolate db is exhausted, increase Search Space or decrease maxTrialCount!" ) return list_of_assignments
def hyperparameter_job_(train_methyl_array, val_methyl_array, interest_col, n_bins, custom_loss, torque, search_strategy, total_time, delay_time, gpu, additional_command, additional_options, update, n_epochs, job, survival, optimize_time, random_state, capsule_choice, custom_capsule_file, retrain_top_job, batch_size, output_top_job_params, limited_capsule_names_file, min_capsule_len_low_bound, gsea_superset, tissue, number_sets, use_set, gene_context, select_subtypes, custom_hyperparameters, min_capsules, fit_spw, l1_l2): additional_params=dict(train_methyl_array=train_methyl_array, val_methyl_array=val_methyl_array, interest_col=interest_col, n_bins=n_bins, custom_loss=custom_loss, job=job, batch_size=batch_size, number_sets=number_sets, min_capsules=min_capsules ) if n_epochs: additional_params['n_epochs']=n_epochs if gsea_superset: additional_params['gsea_superset']=gsea_superset if l1_l2: additional_params['l1_l2']=l1_l2 if tissue: additional_params['tissue']=tissue if custom_capsule_file: additional_params['custom_capsule_file']=custom_capsule_file if output_top_job_params: retrain_top_job=True if limited_capsule_names_file: additional_params['limited_capsule_names_file']=limited_capsule_names_file if update and not (retrain_top_job and output_top_job_params): additional_params['capsule_choice']=capsule_choice select_subtypes=list(filter(None,select_subtypes)) if select_subtypes: additional_params['select_subtypes']=select_subtypes if use_set: additional_params['use_set']=use_set if gene_context: additional_params['gene_context']=gene_context if fit_spw: additional_params['fit_spw']=fit_spw else: select_subtypes=list(filter(None,select_subtypes)) if select_subtypes: additional_params['select_subtypes']=' -ss '.join(list(filter(None,select_subtypes))) additional_params['capsule_choice']=' -cc '.join(list(filter(None,capsule_choice))) if use_set: additional_params['use_set']='' if gene_context: additional_params['gene_context']='' if fit_spw: additional_params['fit_spw']='' if not survival: additional_params['gamma2']=1e-2 def score_loss(params): #job=np.random.randint(0,1000000) start_time=time.time() params['hidden_topology']=','.join([str(int(params['el{}s'.format(j)])) for j in range(params['nehl']+1)]) params['decoder_topology']=','.join([str(int(params['dl{}s'.format(j)])) for j in range(params['ndhl']+1)]) del_params=['el{}s'.format(j) for j in range(params['nehl']+1)]+['dl{}s'.format(j) for j in range(params['ndhl']+1)] del_params=set(del_params+[k for k in params if k.startswith('el') or k.startswith('dl')]) # for k in list(params.keys()): # if k.endswith('_size'): # del params[k] # print(params) # print(params['nehl'],params['ndhl']) # print(del_params) for param in del_params: del params[param] del params['nehl'], params['ndhl'] params.update(additional_params) print(params) command='{} methylcaps-model model_capsnet {} || methylcaps-model report_loss -j {}'.format('CUDA_VISIBLE_DEVICES=0' if gpu and not torque else '',' '.join(['--{} {}'.format(k,v) for k,v in params.items() if v or k=='use_set']),params['job'])#,'&' if not torque else '') if output_top_job_params and retrain_top_job: print('Top params command: ') print('{} --predict'.format(command.split('||')[0])) exit() elif output_top_job_params: print('Continuing training of random parameters, please specify retrain_top_job.') if update: val_loss = model_capsnet_(**params) else: val_loss = return_val_loss(command, torque, total_time, delay_time, job, gpu, additional_command, additional_options) end_time=time.time() if optimize_time: return val_loss, start_time-end_time else: return val_loss grid=dict(n_epochs=dict(low=10, high=50, step=10), bin_len=dict(low=500000, high=1000000, step=100000), min_capsule_len=dict(low=min_capsule_len_low_bound, high=500, step=25), primary_caps_out_len=dict(low=10, high=100, step=5), caps_out_len=dict(low=10, high=100, step=5), nehl=dict(low=10,high=300,step=10,n_layers=3), ndhl=dict(low=100,high=300,step=10,n_layers=3), learning_rate=dict(low=-5,high=-1,step=1,base=10), gamma=dict(low=-5,high=-1,step=1,base=10), gamma2=dict(low=-5,high=-1,step=1,base=10), overlap=dict(low=0., high=.5, step=.1), routing_iterations=dict(low=2, high=4, step=1)) if os.path.exists(custom_hyperparameters): from ruamel.yaml import safe_load as load with open(custom_hyperparameters) as f: new_grid = load(f) print(new_grid) for k in new_grid: for k2 in new_grid[k]: grid[k][k2]=new_grid[k][k2] n_layers=dict(encoder=grid['nehl'].pop('n_layers'),decoder=grid['ndhl'].pop('n_layers')) grid=dict(n_epochs=choco.quantized_uniform(**grid['n_epochs']), bin_len=choco.quantized_uniform(**grid['bin_len']), min_capsule_len=choco.quantized_uniform(**grid['min_capsule_len']), primary_caps_out_len=choco.quantized_uniform(**grid['primary_caps_out_len']), caps_out_len=choco.quantized_uniform(**grid['caps_out_len']), nehl={i: {'el{}s'.format(j):choco.quantized_uniform(**grid['nehl']) for j in range(i+1)} for i in range(n_layers['encoder'])}, gamma=choco.quantized_log(**grid['gamma']), ndhl={i: {'dl{}s'.format(j):choco.quantized_uniform(**grid['ndhl']) for j in range(i+1)} for i in range(n_layers['decoder'])}, learning_rate=choco.quantized_log(**grid['learning_rate']), routing_iterations=choco.quantized_uniform(**grid['routing_iterations']), overlap=choco.quantized_uniform(**grid['overlap']), gamma2=choco.quantized_log(**grid['gamma2']) ) # ADD BATCH SIZE if n_epochs: grid.pop('n_epochs') if not survival: grid.pop('gamma2') if 'genomic_binned' not in list(capsule_choice): for k in ['overlap','bin_len']: grid.pop(k) if retrain_top_job: conn=choco.SQLiteConnection('sqlite:///hyperparameter_scan.db') results=conn.results_as_dataframe() results=results[~results['_loss'].isnull()] params=dict(results.iloc[np.argmin(results['_loss'].values)]) for k in ['bin_len','caps_out_len','min_capsule_len','ndhl','nehl','primary_caps_out_len','routing_iterations']: if k in params: params[k]=int(params[k]) del params['_loss'] top_loss=score_loss(params) pickle.dump(top_loss,open('top_loss.pkl','wb')) else: optimization_method = search_strategy#'bayes' optimization_methods=['random','quasi','bayes'] sampler_opts={} if optimization_method in ['random']: sampler_opts['n_bootstrap']=10000 #sampler_opts['random_state']=random_state elif optimization_method in ['quasi']: sampler_opts['seed']=random_state sampler_opts['skip']=3 elif optimization_method in ['bayes']: sampler_opts['n_bootstrap']=35 sampler_opts['utility_function']='ei' sampler_opts['xi']=0.1 #sampler_opts['random_state']=42 #print(optimization_method) optimizer = dict(random=choco.Bayes,quasi=choco.QuasiRandom,bayes=choco.Bayes)[optimization_method] # Random hyp_conn = choco.SQLiteConnection(url="sqlite:///hyperparameter_scan.db") sampler = optimizer(hyp_conn, grid, **sampler_opts) #print(sampler) if 0 and optimization_method in ['bayes']: sampler.random_state=np.random.RandomState(42) token,params=sampler.next() loss=score_loss(params) if (loss if not optimize_time else loss[0])>=0: sampler.update(token, loss)
y_pred = m.predict(tst_x) return -1*skm.f1_score(tst_y, y_pred, average='macro') space = [ # {'model': 'RandomForestClassifier', # "max_depth" : choco.quantized_uniform(2, 32, 2), # "min_samples_split": choco.quantized_uniform(2, 600, 2), # "n_estimators" : choco.quantized_uniform(125, 800, 25),}, {'model': 'SVC', "gamma": 'auto', "C": choco.log(-3, 3, 10), "kernel": choco.choice(['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']), "tol": choco.log(-5, -2, 10),}, {'model': 'XGBClassifier', "learning_rate" : choco.uniform(0.001, 0.1), "max_depth" : choco.quantized_uniform(2, 16, 2), "min_child_weight": choco.quantized_uniform(2, 10, 2), "subsample" : choco.quantized_uniform(0.7, 1.05, 0.05), "n_estimators" : choco.quantized_uniform(25, 525, 25),}, {'model': 'LogisticRegression', "penalty" : choco.choice(['l1', 'l2']), "C" : choco.log(-2, 1, 10),}, ] models = { 'RandomForestClassifier': RandomForestClassifier, 'SVC': SVC, 'XGBClassifier': XGBClassifier, 'LogisticRegression': LogisticRegression, }
print("Exception occurred.") self.validation_error = 100000.0 if __name__ == '__main__': if len(sys.argv) == 1: # original params from article model=LatentAttention(frac_train=0.99, n_z=20, batchsize=100, learning_rate=0.001, max_epochs=10, e_h1=16, e_h2=32, d_h1=32, d_h2=16, run_id=-1); model.train() print("loss={}".format(float(model.validation_error))) exit(0) # Params from optimizer search_space = { "n_z": choco.quantized_uniform(5, 100, 1), "learning_rate": choco.log(-20, -8, 2), "max_epochs": choco.quantized_uniform(5, 200, 1), "e_h1": choco.quantized_uniform(16, 256, 1), "e_h2": choco.quantized_uniform(16, 256, 1), "d_h1": choco.quantized_uniform(16, 256, 1), "d_h2": choco.quantized_uniform(16, 256, 1), } connection = choco.SQLiteConnection("sqlite:///no_labels_results.sqlite3") sampler = choco.Bayes(connection, search_space) token, sample = sampler.next() print("Parameters: {} Token: {}".format(sample, token)) run_id = token['_chocolate_id'] model = LatentAttention(0.99, batchsize=150, run_id=run_id, **sample) model.train() sampler.update(token, float(model.validation_error))
return -1 * skm.f1_score(tst_y, y_pred, average='macro') space = [ { 'model': 'SVC', "gamma": 'auto', "C": choco.log(-3, 3, 10), "kernel": choco.choice(['linear', 'poly', 'rbf', 'sigmoid', 'precomputed']), "tol": choco.log(-5, -2, 10), }, { 'model': 'XGBClassifier', "learning_rate": choco.uniform(0.001, 0.1), "max_depth": choco.quantized_uniform(2, 16, 2), "min_child_weight": choco.quantized_uniform(2, 10, 2), "subsample": choco.quantized_uniform(0.7, 1.05, 0.05), "n_estimators": choco.quantized_uniform(25, 525, 25), }, { 'model': 'RandomForestClassifier', "max_depth": choco.quantized_uniform(2, 10, 2), "min_samples_leaf": choco.quantized_uniform(2, 10, 2), "n_estimators": choco.quantized_uniform(25, 525, 25), }, { 'model': 'GaussianNB', "var_smoothing": choco.log(-12, -6, 10) }, {