# enable cluster training cluster = SlurmCluster(hyperparam_optimizer=hyperparams, log_path=hyperparams.log_path, python_cmd='python3', test_tube_exp_name=hyperparams.test_tube_exp_name) # email results if your hpc supports it cluster.notify_job_status(email='*****@*****.**', on_done=True, on_fail=True) # any modules for code to run in env cluster.load_modules(['python-3', 'anaconda3']) # add commands to the non slurm portion cluster.add_command('source activate myCondaEnv') # can also add custom slurm commands which show up as: # #comment # #SBATCH --cmd=value # ############ # cluster.add_slurm_cmd(cmd='cpus-per-task', value='1', comment='nb cpus per task') # set job compute details (this will apply PER set of hyperparameters) cluster.per_experiment_nb_cpus = 20 cluster.per_experiment_nb_nodes = 10 # each job (24 in total here) will use 200 cpus for each set of hyperparams # if job_display_name is set, it's what will display in the slurm queue cluster.optimize_parallel_cluster_cpu(train, nb_trials=24, job_name='first_tt_job', job_display_name='short_name')
cluster.notify_job_status(email='*****@*****.**', on_done=True, on_fail=True) # SLURM Module to load. cluster.load_modules(['python-3', 'anaconda3']) # Add commands to the non-SLURM portion. cluster.add_command('source activate myCondaEnv') # Add custom SLURM commands which show up as: # #comment # #SBATCH --cmd=value # ############ # cluster.add_slurm_cmd( # cmd='cpus-per-task', value='1', comment='CPUS per task.') # Set job compute details (this will apply PER set of hyperparameters.) cluster.per_experiment_nb_cpus = 20 cluster.per_experiment_nb_nodes = 10 # Each hyperparameter combination will use 200 cpus. cluster.optimize_parallel_cluster_cpu( # Function to execute: train, # Number of hyperparameter combinations to search: nb_trials=24, job_name='first_tt_job', # This is what will display in the slurm queue: job_display_name='short_name')