예제 #1
0
 def test_mpi_multi_node_job_cluster(self):
     # First do a simple mpi job
     cluster = get_cluster(queue_type="knl",
                           mpi_mode=True,
                           cores=130,
                           **self.kwargs)
     self.assertIn("#SBATCH --cpus-per-task=1", cluster.job_header)
     self.assertIn("#SBATCH --ntasks-per-node=64", cluster.job_header)
     self.assertIn("#SBATCH -n 130", cluster.job_script())
     self.assertEqual(cluster._dummy_job.worker_cores, 1)
     self.assertEqual(cluster._dummy_job.worker_processes, 1)
     self.assertEqual(cluster._dummy_job.worker_process_threads, 1)
     with self.assertRaises(ValueError):
         get_cluster(queue_type="knl", mpi_mode=True, **self.kwargs)
     with self.assertRaises(ValueError):
         get_cluster(queue_type="knl",
                     cpus_per_task=37,
                     cores=2,
                     mpi_mode=True,
                     **self.kwargs)
     with self.assertRaises(ValueError):
         get_cluster(queue_type="knl",
                     cores=1,
                     ntasks_per_node=13,
                     mpi_mode=True,
                     **self.kwargs)
     controller.delete_cluster(cluster.name)
예제 #2
0
 def test_gpu_cluster(self):
     cluster = get_cluster(queue_type="gpus", **self.kwargs)
     self.assertEqual(cluster.name, "dask-worker-gpus")
     self.assertIn("#SBATCH -p gpus", cluster.job_header)
     self.assertIn("#SBATCH --gres=gpu:4", cluster.job_header)
     self.assertIsInstance(cluster.client, Client)
     controller.delete_cluster(cluster.name)
예제 #3
0
 def test_knl_cluster(self):
     cluster = get_cluster(queue_type="knl", **self.kwargs)
     self.assertEqual(cluster.name, "dask-worker-knl")
     self.assertIn("#SBATCH -p booster", cluster.job_header)
     self.assertIn("#SBATCH --cpus-per-task=64", cluster.job_header)
     self.assertIsInstance(cluster.client, Client)
     controller.delete_cluster(cluster.name)
예제 #4
0
 def test_custom_cluster(self):
     cluster = get_cluster(scheduler=SLURM, **self.kwargs)
     self.assertEqual(cluster.name, self.cluster_name)
     self.assertIsInstance(cluster.client, Client)
     with self.assertRaises(ValueError):
         get_cluster(SLURM, cores=128, **self.kwargs)
     controller.delete_cluster(cluster.name)
예제 #5
0
 def test_custom_cluster(self):
     cluster = get_cluster(scheduler=SLURM, **self.kwargs)
     self.assertEqual(cluster.name, self.cluster_name)
     self.assertIn("#SBATCH -J dask-worker-batch", cluster.job_header)
     self.assertIsInstance(cluster.client, Client)
     with self.assertRaises(ValueError):
         get_cluster(SLURM, cores=128, **self.kwargs)
     controller.delete_cluster(cluster.name)
예제 #6
0
 def test_fork_mpi_job_cluster(self):
     # First do a simple mpi job
     kwargs = self.kwargs
     kwargs.update({"fork_mpi": True})
     cluster = get_cluster(queue_type="knl",
                           mpi_mode=True,
                           cores=64,
                           **kwargs)
     self.assertNotIn(MPI_DASK_WRAPPER_MODULE, cluster._command_template)
     controller.delete_cluster(cluster.name)
    def test_task_by_cluster(self):
        cluster_type = CustomSLURMCluster
        original_cluster = cluster_type()

        @on_cluster(cluster=original_cluster)
        @task(cluster=original_cluster)
        def f():
            pass

        self._basic_tests(cluster_type=cluster_type, id_=original_cluster.name)
        controller.delete_cluster(original_cluster.name)
    def test_task_by_id_string(self):
        _id = "test1"
        cluster_type = CustomSLURMCluster
        original_cluster = cluster_type(name=_id)

        @on_cluster(cluster=original_cluster)
        @task(cluster_id=_id)
        def f():
            pass

        self._basic_tests(cluster_type=cluster_type, id_=_id)
        controller.delete_cluster(_id)
    def test_task_by_cluster_and_cluster_id_local(self):
        _id = "test1"
        cluster_type = LocalCluster
        original_cluster = cluster_type()

        @on_cluster(cluster=original_cluster, cluster_id=_id)
        @task(cluster=original_cluster, cluster_id="_id")
        def f():
            pass

        self._basic_tests(cluster_type=cluster_type, id_=_id)
        controller.delete_cluster(id_=_id)
    def test_init_only_id_local(self):
        _id = "test1"
        original_cluster = LocalCluster()
        controller.add_cluster(id_=_id, cluster=original_cluster)

        @on_cluster(cluster_id=_id)
        def f():
            pass

        cluster, client = controller.get_cluster(id_=_id)
        self.assertEqual(original_cluster, cluster)
        self.assertIsInstance(client, Client)
        controller.delete_cluster(_id)
예제 #11
0
 def test_cluster_pure_functions_mpi(self):
     # Check the pure attribute is set to False in MPI mode
     cluster = get_cluster(queue_type="knl",
                           mpi_mode=True,
                           cores=64,
                           **self.kwargs)
     self.assertEqual(cluster.pure, False)
     controller.delete_cluster(cluster.name)
     # Check this can be overridden with a kwarg
     cluster = get_cluster(queue_type="knl",
                           mpi_mode=True,
                           cores=64,
                           pure=True,
                           **self.kwargs)
     self.assertEqual(cluster.pure, True)
     controller.delete_cluster(cluster.name)
    def test_init_only_id(self):
        _id = "test1"
        original_cluster = CustomSLURMCluster(name=_id)
        with self.assertRaises(ClusterException) as ctx:
            controller.add_cluster(cluster=original_cluster)
        self.assertEqual('Cluster "{}" already exists!'.format(_id),
                         str(ctx.exception))

        @on_cluster(cluster_id=_id)
        def f():
            pass

        cluster, client = controller.get_cluster(id_=_id)
        self.assertEqual(original_cluster, cluster)
        self.assertEqual(cluster.name, _id)
        self.assertIsInstance(client, Client)
        controller.delete_cluster(_id)
예제 #13
0
 def test_mpi_complex_job_cluster(self):
     # Now a few more variables
     cluster = get_cluster(queue_type="gpus",
                           mpi_mode=True,
                           nodes=2,
                           ntasks_per_node=4,
                           **self.kwargs)
     self.assertIn("#SBATCH --cpus-per-task=6", cluster.job_header)
     self.assertIn("#SBATCH --ntasks-per-node=4", cluster.job_header)
     self.assertIn("#SBATCH --nodes=2", cluster.job_header)
     self.assertIn("#SBATCH --gres=gpu:4", cluster.job_header)
     self.assertEqual(cluster._dummy_job.worker_cores, 1)
     self.assertEqual(cluster._dummy_job.worker_processes, 1)
     self.assertEqual(cluster._dummy_job.worker_process_threads, 1)
     self.assertIn("#SBATCH -n 8", cluster.job_script())
     self.assertIn("export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}",
                   cluster.job_script())
     self.assertIn("export OMP_PROC_BIND=spread", cluster.job_script())
     self.assertIn("export OMP_PLACES=threads", cluster.job_script())
     controller.delete_cluster(cluster.name)
    def test_init_only_cluster_local(self):
        original_cluster = LocalCluster()
        with self.assertRaises(ClusterException) as ctx:

            @on_cluster(cluster=original_cluster)
            def f():
                pass

        self.assertEqual('LocalCluster requires "cluster_id" argument.',
                         str(ctx.exception))

        _id = "test1"

        @on_cluster(cluster=original_cluster, cluster_id=_id)
        def f():
            pass

        cluster, client = controller.get_cluster(_id)
        self.assertEqual(original_cluster, cluster)
        self.assertIsInstance(client, Client)
        controller.delete_cluster(_id)
    def test_init_both_cluster_and_id(self):
        _id = "test1"
        original_cluster = CustomSLURMCluster()

        with self.assertRaises(AssertionError):

            @on_cluster(cluster_id=_id, cluster=original_cluster)
            def f():
                pass

        controller.delete_cluster(original_cluster.name)
        original_cluster = CustomSLURMCluster()

        @on_cluster(cluster_id=original_cluster.name, cluster=original_cluster)
        def f():
            pass

        cluster, client = controller.get_cluster(id_=original_cluster.name)
        self.assertEqual(original_cluster, cluster)
        self.assertEqual(cluster.name, original_cluster.name)
        self.assertIsInstance(client, Client)
        controller.delete_cluster(original_cluster.name)

        original_cluster = CustomSLURMCluster(name=_id)

        @on_cluster(cluster_id=_id, cluster=original_cluster)
        def f():
            pass

        cluster, client = controller.get_cluster(id_=_id)
        self.assertEqual(original_cluster, cluster)
        self.assertEqual(cluster.name, _id)
        self.assertIsInstance(client, Client)
        controller.delete_cluster(_id)
예제 #16
0
 def test_mpi_job_cluster(self):
     # First do a simple mpi job
     cluster = get_cluster(queue_type="knl",
                           mpi_mode=True,
                           cores=64,
                           **self.kwargs)
     self.assertIn("#SBATCH --cpus-per-task=1", cluster.job_header)
     self.assertIn("#SBATCH --ntasks-per-node=64", cluster.job_header)
     self.assertIn("#SBATCH -n 64", cluster.job_script())
     self.assertIn(MPI_DASK_WRAPPER_MODULE, cluster._command_template)
     self.assertEqual(cluster.worker_cores, 1)
     self.assertEqual(cluster.worker_processes, 1)
     self.assertEqual(cluster.worker_process_threads, 1)
     self.assertIsInstance(cluster.client, Client)
     controller.delete_cluster(cluster.name)
     with self.assertRaises(ValueError):
         cluster = get_cluster(queue_type="knl",
                               mpi_mode=True,
                               nodes=64,
                               cores=64,
                               **self.kwargs)
         controller.delete_cluster(cluster.name)
    def test_task_by_cluster_and_cluster_id(self):
        _id = "test1"
        cluster_type = CustomSLURMCluster
        original_cluster = cluster_type(name=_id)

        with self.assertRaises(ClusterException) as ctx:

            @on_cluster(cluster=original_cluster)
            @task(cluster=original_cluster, cluster_id="bad name")
            def f():
                pass

        self.assertEqual("Cluster 'name' and cluster_id are different.",
                         str(ctx.exception))

        @on_cluster(cluster=original_cluster)
        @task(cluster=original_cluster, cluster_id=_id)
        def f():
            pass

        self._basic_tests(cluster_type=cluster_type, id_=original_cluster.name)
        controller.delete_cluster(original_cluster.name)
    def test_task_by_cluster_local(self):
        _id = "test1"
        cluster_type = LocalCluster
        original_cluster = cluster_type()

        with self.assertRaises(ClusterException) as ctx:

            @on_cluster(cluster=original_cluster, cluster_id=_id)
            @task(cluster=original_cluster)
            def f():
                pass

        self.assertEqual("'cluster_id' argument is required for LocalCluster.",
                         str(ctx.exception))

        @on_cluster(cluster=original_cluster, cluster_id=_id)
        @task(cluster=original_cluster, cluster_id=_id)
        def f():
            pass

        self._basic_tests(cluster_type=cluster_type, id_=_id)
        controller.delete_cluster(_id)
예제 #19
0
 def test_mpi_complex_job_cluster_fail(self):
     # Now a few more variables
     with self.assertRaises(ValueError):
         # When we provide ntasks_per_node, cpus_per_tasks is derived (in this case
         # 24/2 = 12). For an MPI job we expect the core count (which is the total
         # number of cores to be used) to be divisible by cpus_per_tasks but that is
         # not true in this case resulting in a ValueError
         get_cluster(queue_type="gpus",
                     mpi_mode=True,
                     cores=2,
                     ntasks_per_node=2,
                     **self.kwargs)
     # If you really want this you ask for it explicitly
     cluster = get_cluster(queue_type="gpus",
                           mpi_mode=True,
                           cores=2,
                           ntasks_per_node=2,
                           cpus_per_task=1,
                           **self.kwargs)
     self.assertIn("#SBATCH --cpus-per-task=1", cluster.job_header)
     self.assertIn("#SBATCH --ntasks-per-node=2", cluster.job_header)
     self.assertIn("#SBATCH -n 2", cluster.job_header)
     self.assertNotIn("#SBATCH --nodes", cluster.job_header)
     self.assertIn("#SBATCH --gres=gpu:4", cluster.job_header)
     controller.delete_cluster(cluster.name)
     # For memory pinning stuff that may be done by the scheduler, it is probably
     # better to ask for it like this (even if you don't intend to use OpenMP)
     cluster = get_cluster(queue_type="gpus",
                           mpi_mode=True,
                           nodes=1,
                           ntasks_per_node=2,
                           **self.kwargs)
     self.assertIn("#SBATCH --cpus-per-task=12", cluster.job_header)
     self.assertIn("#SBATCH --ntasks-per-node=2", cluster.job_header)
     self.assertIn("#SBATCH -n 2", cluster.job_header)
     self.assertIn("#SBATCH --nodes=1", cluster.job_header)
     self.assertIn("#SBATCH --gres=gpu:4", cluster.job_header)
     controller.delete_cluster(cluster.name)
    def test_init_only_cluster(self):
        original_cluster = CustomSLURMCluster()

        @on_cluster(cluster=original_cluster)
        def f():
            pass

        cluster, client = controller.get_cluster(original_cluster.name)
        self.assertEqual(original_cluster, cluster)
        self.assertIsInstance(client, Client)
        controller.delete_cluster(cluster.name)

        _id = "test1"
        original_cluster = CustomSLURMCluster(name=_id)

        @on_cluster(cluster=original_cluster)
        def f():
            pass

        cluster, client = controller.get_cluster(_id)
        self.assertEqual(original_cluster, cluster)
        self.assertIsInstance(client, Client)
        controller.delete_cluster(cluster.name)
예제 #21
0
    def test_mpi_job_cluster(self):
        # First do a simple mpi job
        cluster = get_cluster(queue_type="knl",
                              mpi_mode=True,
                              cores=64,
                              **self.kwargs)
        self.assertIn("#SBATCH --cpus-per-task=1", cluster.job_header)
        self.assertIn("#SBATCH --ntasks-per-node=64", cluster.job_header)
        self.assertIn("#SBATCH -n 64", cluster.job_script())
        self.assertIn(MPI_DASK_WRAPPER_MODULE,
                      cluster._dummy_job._command_template)
        self.assertEqual(cluster._dummy_job.worker_cores, 1)
        self.assertEqual(cluster._dummy_job.worker_processes, 1)
        self.assertEqual(cluster._dummy_job.worker_process_threads, 1)
        self.assertIsInstance(cluster.client, Client)
        controller.delete_cluster(cluster.name)
        # Now check our command template when we use different MPI runtimes
        remaining_launchers = SUPPORTED_MPI_LAUNCHERS.copy()
        # Don't recheck SRUN since we did it above (and it takes no args)
        remaining_launchers.remove(SRUN)
        expected_outputs = [
            "-n 64", "-np 64 --map-by ppr:64:node", "-n 64 -perhost 64"
        ]
        for launcher, args in zip(remaining_launchers, expected_outputs):
            temp_kwargs = self.kwargs.copy()
            temp_kwargs.update({"mpi_launcher": launcher})
            cluster = get_cluster(queue_type="knl",
                                  mpi_mode=True,
                                  cores=64,
                                  **temp_kwargs)
            self.assertIn(
                " ".join([launcher["launcher"], args, sys.executable]),
                cluster._dummy_job._command_template,
            )
            controller.delete_cluster(cluster.name)

        # Finally check that we catch the assumption that cores means total CPU
        # elements for the job (in MPI mode)
        with self.assertRaises(ValueError):
            cluster = get_cluster(queue_type="knl",
                                  mpi_mode=True,
                                  nodes=64,
                                  cores=64,
                                  **self.kwargs)
            controller.delete_cluster(cluster.name)
예제 #22
0
 def test_cluster_pure_functions(self):
     # Check that the attribute is not set for non-MPI mode
     cluster = get_cluster(queue_type="knl", **self.kwargs)
     self.assertEqual(hasattr(cluster, "pure"), False)
     controller.delete_cluster(cluster.name)