def return_val_loss(command, torque, total_time, delay_time, job, gpu, additional_command, additional_options): if torque: assemble_run_torque(command, use_gpu=gpu, additions=additional_command, queue='gpuq' if gpu else "normal", time=np.ceil(total_time / 60.), ngpu=1, additional_options=additional_options) else: subprocess.call(command, shell=True) total_time *= 60. start_time = time.time() monitor = MonitorJobs(start_time, delay_time, total_time, job=job) monitor.run() while not monitor.stopped: time.sleep(delay_time) val_loss = monitor.return_val_loss() return val_loss
def coarse_scan(hyperparameter_input_csv, hyperparameter_output_log, generate_input, job_chunk_size, stratify_column, reset_all, torque, gpu, gpu_node, nohup, mlp=False, custom_jobs=[], model_complexity_factor=0.9, set_beta=-1., n_jobs=4, categorical=True, add_softmax=False, additional_command="", cuda=True, new_grid={}, randomseed=42, additional_opts="", max_epochs=-1): """Perform randomized hyperparameter grid search Parameters ---------- hyperparameter_input_csv : type CSV file containing hyperparameter inputs. hyperparameter_output_log : type CSV file containing prior runs. generate_input : type Generate hyperparameter input csv. job_chunk_size : type Number of jobs to be launched at same time. stratify_column : type Performing classification? reset_all : type Rerun all jobs previously scanned. torque : type Run jobs using torque. gpu : type What GPU to use, set to -1 to be agnostic to GPU selection. gpu_node : type What GPU to use, set to -1 to be agnostic to GPU selection, for torque submission. nohup : type Launch jobs using nohup. mlp : type If running prediction job (classification/regression) after VAE. custom_jobs : type Supply custom job parameters to be run. model_complexity_factor : type Degree of neural network model complexity for hyperparameter search. Search for less wide networks with a lower complexity value, bounded between 0 and infinity. set_beta : type Don't hyperparameter scan over beta (KL divergence weight), and set it to value. n_jobs : type Number of jobs to generate. categorical : type Classification task? add_softmax : type Add softmax layer at end of neural network. cuda : type Whether to use GPU? """ from itertools import cycle from pathos.multiprocessing import ProcessingPool as Pool np.random.seed(randomseed) os.makedirs(hyperparameter_input_csv[:hyperparameter_input_csv.rfind('/')], exist_ok=True) generated_input = [] np.random.seed(int(time.time())) if mlp: grid = { '--learning_rate_vae': [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1], '--learning_rate_mlp': [1e-5, 5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1], '--weight_decay': [1e-4], '--n_epochs': [25, 50, 75, 100, 200, 500, 700], '--scheduler': ['warm_restarts', 'null'], '--t_max': [10], '--eta_min': [1e-7, 1e-6], '--t_mult': [1., 1.2, 1.5, 2], '--batch_size': [50, 100, 256, 512], '--dropout_p': [0., 0.1, 0.2, 0.3, 0.5], '--n_workers': [4], '--loss_reduction': ['sum'] } topology_grid = [0, 100, 200, 300, 500, 1000, 2000, 3000, 4096] else: grid = { '--n_latent': [100, 150, 200, 300, 500], '--learning_rate': [5e-5, 1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1], '--weight_decay': [1e-4], '--n_epochs': [25, 50, 75, 100, 200, 500, 700], '--kl_warm_up': [0, 20], '--beta': [0., 0.5, 1, 10, 50, 100, 200, 500] if set_beta == -1. else [set_beta], '--scheduler': ['warm_restarts', 'null'], '--t_max': [10], '--eta_min': [1e-7, 1e-6], '--t_mult': [1., 1.2, 1.5, 2], '--batch_size': [50, 100, 256, 512], '--n_workers': [4], '--loss_reduction': ['sum'] } topology_grid = [0, 100, 200, 300, 500, 1000, 2000] if new_grid: grid, topology_grid = replace_grid(grid, new_grid, topology_grid) if max_epochs > 0: grid['--n_epochs'] = [ epoch for epoch in grid['--n_epochs'] if epoch <= max_epochs ] grid['--hidden_layer_topology' if mlp else '--hidden_layer_encoder_topology'] = [ generate_topology( topology_grid, probability_decay_factor=model_complexity_factor) for i in range(40) ] if generate_input: for i in range(n_jobs): generated_input.append(['False'] + [np.random.choice(grid[k]) for k in grid]) generated_input = [ pd.DataFrame(generated_input, columns=['--job_name'] + list(grid.keys())) ] if custom_jobs: custom_jobs[0].loc[:, '--job_name'] = 'False' generated_input = custom_jobs def run(x): print(x) subprocess.call(x, shell=True) lower = lambda x: x.lower() if gpu == -1: gpus = cycle(range(4)) else: gpus = cycle([gpu]) if os.path.exists(hyperparameter_input_csv): df = pd.read_csv(hyperparameter_input_csv) df = [df[[col for col in list(df) if not col.startswith('Unnamed')]]] else: df = [] df = pd.concat(df + generated_input, axis=0)[['--job_name'] + list(grid.keys())].fillna('') print(df) if reset_all: df.loc[:, '--job_name'] = 'False' df_final = df[df['--job_name'].astype(str).map(lower) == 'false'].reset_index(drop=True)[list(grid.keys())] commands = [] for i in range(df_final.shape[0]): job_id = str(np.random.randint(0, 100000000)) if not mlp: commands.append( 'sh -c "methylnet-embed perform_embedding -bce {} -v -j {} -hl {} -sc {} {} && pymethyl-visualize transform_plot -i embeddings/vae_methyl_arr.pkl -o visualizations/{}_vae_embed.html -c {} -nn 10 "' .format( "-c" if cuda else "", job_id, hyperparameter_output_log, stratify_column, ' '.join([ '{} {}'.format(k2, df_final.loc[i, k2]) for k2 in list(df_final) if (df_final.loc[i, k2] != '' and df_final.loc[i, k2] != np.nan) ]), job_id, stratify_column)) else: commands.append( 'sh -c "methylnet-predict make_prediction {} {} {} {} -v {} -j {} -hl {} {} && {}"' .format( "-c" if cuda else "", '-sft' if add_softmax else '', '-cat' if categorical else '', ''.join([' -ic {}'.format(col) for col in stratify_column]), '-do' if stratify_column[0] == 'disease_only' else '', job_id, hyperparameter_output_log, ' '.join([ '{} {}'.format(k2, df_final.loc[i, k2]) for k2 in list(df_final) if (df_final.loc[i, k2] != '' and df_final.loc[i, k2] != np.nan) ]), '&&'.join([ " pymethyl-visualize transform_plot -i predictions/vae_mlp_methyl_arr.pkl -o visualizations/{}_{}_mlp_embed.html -c {} -nn 8 " .format(job_id, col, col) for col in stratify_column ]))) #-do df.loc[np.arange(df.shape[0]) == np.where( df['--job_name'].astype(str).map(lower) == 'false')[0][0], '--job_name'] = job_id for i in range(len(commands)): commands[i] = '{} {} {} {}'.format('CUDA_VISIBLE_DEVICES="{}"'.format( next(gpus)) if not torque else "", 'nohup' if nohup else '', commands[i], '&' if nohup else '') # $gpuNum os.makedirs('visualizations', exist_ok=True) df.to_csv(hyperparameter_input_csv) if torque: for command in commands: job = assemble_run_torque( command, use_gpu=cuda, additions=additional_command, queue='gpuq' if cuda else "normal", time=4, ngpu=1, additional_options=additional_opts ) #'' if gpu_node == -1 else ' -l hostlist=g0{}'.format(gpu_node)) else: if len(commands) == 1: subprocess.call(commands[0], shell=True) else: commands = np.array_split(commands, len(commands) // job_chunk_size) for command_list in commands: if nohup: for command in command_list: print(command) subprocess.call(command, shell=True) else: if job_chunk_size <= 1: for command in command_list: subprocess.call(command, shell=True) else: dask.compute(*[ dask.delayed(lambda x: subprocess.call( x, shell=True))(command) for command in command_list ], scheduler='processes') """pool = Pool(len(command_list))
def hyperparameter_scan( train_methyl_array, val_methyl_array, interest_col, n_bins, custom_loss, torque, search_strategy, total_time, delay_time, gpu, additional_command, additional_options, n_jobs, n_workers, update, random_seed, optimize_time, capsule_choice, custom_capsule_file, retrain_top_job, batch_size, output_top_job_params, limited_capsule_names_file, n_epochs, min_capsule_len_low_bound, gsea_superset, tissue, number_sets, use_set, gene_context, select_subtypes, custom_hyperparameters, min_capsules, fit_spw, l1_l2): np.random.seed(random_seed) #subprocess.call('rm -f jobs.db',shell=True) opts = dict(train_methyl_array=train_methyl_array, val_methyl_array=val_methyl_array, interest_col=interest_col, n_bins=n_bins, custom_loss=custom_loss, search_strategy=search_strategy, total_time=total_time, delay_time=delay_time, random_state=random_seed, batch_size=batch_size, n_epochs=n_epochs, min_capsule_len_low_bound=min_capsule_len_low_bound, number_sets=number_sets, custom_hyperparameters=custom_hyperparameters, min_capsules=min_capsules) if torque and not update: opts['torque'] = '' if use_set: opts['use_set'] = '' if gene_context: opts['gene_context'] = '' if fit_spw: opts['fit_spw'] = '' if l1_l2: opts['l1_l2'] = l1_l2 if gsea_superset: opts['gsea_superset'] = gsea_superset if tissue: opts['tissue'] = tissue if gpu: opts['gpu'] = '' if optimize_time: opts['optimize_time'] = '' if capsule_choice: opts['capsule_choice'] = ' -cc '.join( list(filter(None, capsule_choice))) select_subtypes = list(filter(None, select_subtypes)) if select_subtypes: opts['select_subtypes'] = ' -ss '.join(select_subtypes) if limited_capsule_names_file: opts['limited_capsule_names_file'] = limited_capsule_names_file if retrain_top_job: n_jobs = 1 opts['retrain_top_job'] = '' if output_top_job_params: opts['output_top_job_params'] = '' if custom_capsule_file: opts['custom_capsule_file'] = custom_capsule_file additional_opts = dict(additional_command=additional_command, additional_options=additional_options) for job in [np.random.randint(0, 10000000) for i in range(n_jobs)]: opts['job'] = job command = 'methylcaps-hypjob hyperparameter_job {} {}'.format( ' '.join(['--{} {}'.format(k, v) for k, v in opts.items()]), ' '.join([ '--{} "{}"'.format(k, v) for k, v in additional_opts.items() ])) if update: command = '{} {}'.format(command, '-u') command = '{} {}'.format(command, '&' if not (torque and update) else '') if update: if torque: assemble_run_torque( command, use_gpu=gpu, queue='gpuq' if gpu else "normal", time=int(np.ceil(total_time / 60.)), ngpu=1, additions=additional_opts['additional_command'], additional_options=additional_opts['additional_options']) else: command = '{} {}'.format( 'CUDA_VISIBLE_DEVICES=0' if gpu else '', command) else: subprocess.call(command, shell=True)