Esempio n. 1
0
    # Enable cluster training.
    cluster = SlurmCluster(
        hyperparam_optimizer=hyperparams,
        log_path=hyperparams.log_path,
        python_cmd='python3',
        #         test_tube_exp_name=hyperparams.test_tube_exp_name
    )

    # Email results if your hpc supports it.
    cluster.notify_job_status(email='*****@*****.**',
                              on_done=True,
                              on_fail=True)

    # SLURM Module to load.
    cluster.load_modules(['python-3', 'anaconda3'])

    # Add commands to the non-SLURM portion.
    cluster.add_command('source activate transformers')

    # Add custom SLURM commands which show up as:
    # #comment
    # #SBATCH --cmd=value
    # ############
    # cluster.add_slurm_cmd(
    #    cmd='cpus-per-task', value='1', comment='CPUS per task.')

    # Set job compute details (this will apply PER set of hyperparameters.)
    cluster.per_experiment_nb_gpus = 4
    cluster.per_experiment_nb_nodes = 2
    cluster.gpu_type = '1080ti'
Esempio n. 2
0
    parser.opt_range('--wd', default=1e-5, type=float, tunable=True, low=1e-7, high=1e-4, nb_samples=100, log_base=10)
    hyperparams = parser.parse_args()

    # Enable cluster training.
    cluster = SlurmCluster(
        hyperparam_optimizer=hyperparams,
        log_path=hyperparams.log_path,
        python_cmd='python3',
    )

    # Email results if your hpc supports it.
#    cluster.notify_job_statusi(email='*****@*****.**', on_done=True, on_fail=True)

    # SLURM Module to load.
    cluster.load_modules([
        'daint-gpu',
    ])

    # Add commands to the non-SLURM portion.
    cluster.add_command('. /apps/daint/UES/6.0.UP04/sandboxes/sarafael/miniconda-ss2020/bin/activate')

    cluster.add_command('export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK')
    cluster.add_command('export NCCL_DEBUG=INFO')
    cluster.add_command('export PYTHONFAULTHANDLER=1')
    cluster.add_command('export NCCL_IB_HCA=ipogif0')
    cluster.add_command('export NCCL_IB_CUDA_SUPPORT=1')

    cluster.add_command('srun nproc')
    cluster.add_command('srun which python')

    # Add custom SLURM commands which show up as: