예제 #1
0
# enable cluster training
cluster = SlurmCluster(hyperparam_optimizer=hyperparams,
                       log_path=hyperparams.log_path,
                       python_cmd='python3',
                       test_tube_exp_name=hyperparams.test_tube_exp_name)

# email results if your hpc supports it
cluster.notify_job_status(email='*****@*****.**', on_done=True, on_fail=True)

# any modules for code to run in env
cluster.load_modules(['python-3', 'anaconda3'])
# add commands to the non slurm portion
cluster.add_command('source activate myCondaEnv')

# can also add custom slurm commands which show up as:
# #comment
# #SBATCH --cmd=value
# ############
# cluster.add_slurm_cmd(cmd='cpus-per-task', value='1', comment='nb cpus per task')

# set job compute details (this will apply PER set of hyperparameters)
cluster.per_experiment_nb_cpus = 20
cluster.per_experiment_nb_nodes = 10

# each job (24 in total here) will use 200 cpus for each set of hyperparams
# if job_display_name is set, it's what will display in the slurm queue
cluster.optimize_parallel_cluster_cpu(train,
                                      nb_trials=24,
                                      job_name='first_tt_job',
                                      job_display_name='short_name')
예제 #2
0
    cluster.notify_job_status(email='*****@*****.**',
                              on_done=True,
                              on_fail=True)

    # SLURM Module to load.
    cluster.load_modules(['python-3', 'anaconda3'])

    # Add commands to the non-SLURM portion.
    cluster.add_command('source activate myCondaEnv')

    # Add custom SLURM commands which show up as:
    # #comment
    # #SBATCH --cmd=value
    # ############
    # cluster.add_slurm_cmd(
    #    cmd='cpus-per-task', value='1', comment='CPUS per task.')

    # Set job compute details (this will apply PER set of hyperparameters.)
    cluster.per_experiment_nb_cpus = 20
    cluster.per_experiment_nb_nodes = 10

    # Each hyperparameter combination will use 200 cpus.
    cluster.optimize_parallel_cluster_cpu(
        # Function to execute:
        train,
        # Number of hyperparameter combinations to search:
        nb_trials=24,
        job_name='first_tt_job',
        # This is what will display in the slurm queue:
        job_display_name='short_name')