Beispiel #1
0
    def setUp(self):
        from itertools import takewhile, product

        hello = find_spec("tests.benchmarks.concurrent_insert.hello").origin
        create_app(name="hello", executable=hello)
        self.launcherInfo = util.launcher_info()

        max_workers = self.launcherInfo.num_workers
        worker_counts = list(
            takewhile(lambda x: x <= max_workers, (2**i for i in range(20))))
        if max_workers not in worker_counts:
            worker_counts.append(max_workers)
        worker_counts = list(reversed(worker_counts))
        # ranks_per_node = [4, 8, 16, 32]
        ranks_per_node = [32]
        self.experiments = product(worker_counts, ranks_per_node)

        # Load mpi4py/Balsam on compute nodes prior to experiments
        hello = find_spec("tests.benchmarks.concurrent_insert.hello").origin
        python = sys.executable
        app_cmd = f"{python} {hello}"
        mpi_str = self.launcherInfo.mpi_cmd(
            self.launcherInfo.workerGroup.workers,
            app_cmd=app_cmd,
            envs={},
            num_ranks=max_workers,
            ranks_per_node=1,
            threads_per_rank=1,
            threads_per_core=1)
        stdout, elapsed_time = util.cmdline(mpi_str)
Beispiel #2
0
    def test_Theta(self):
        '''MPI/OMP C binary for Theta: check thread/rank placement'''
        launcherInfo = util.launcher_info()

        if launcherInfo.host_type != 'CRAY':
            self.skipTest('did not recognize Cray environment')
        if launcherInfo.num_workers < 2:
            self.skipTest('need at least two nodes reserved to run this test')

        binary = glob.glob(os.path.join(self.app_path, 'omp.theta.x'))
        self.app.executable = binary[0]
        self.app.save()

        def check():
            jobs = BalsamJob.objects.all()
            return all(j.state == 'JOB_FINISHED' for j in jobs)

        util.run_launcher_until(check)
        self.job0.refresh_from_db()
        self.job1.refresh_from_db()
        self.job2.refresh_from_db()

        self.assertEqual(self.job0.state, 'JOB_FINISHED')
        self.assertEqual(self.job1.state, 'JOB_FINISHED')
        self.assertEqual(self.job2.state, 'JOB_FINISHED')

        # Check output of dummy MPI/OpenMP C program
        self.check_omp_exe_output(self.job0)
        self.check_omp_exe_output(self.job1)
        self.check_omp_exe_output(self.job2)
Beispiel #3
0
    def setUp(self):
        launcherInfo = util.launcher_info()
        self.num_nodes = len(launcherInfo.workerGroup.workers)

        hello_path = find_spec("tests.ft_apps.concurrent.hello").origin
        insert_path = find_spec("tests.ft_apps.concurrent.mpi_insert").origin
        interpreter = sys.executable
        hello_path = f"{sys.executable} {hello_path}"
        insert_path = f"{sys.executable} {insert_path}"
        create_app(name="hello", executable=hello_path)
        create_app(name="mpi4py-insert", executable=insert_path)
Beispiel #4
0
 def test_mpi_can_run(self):
     '''The system-detected mpirun works'''
     launchInfo = util.launcher_info()
     worker_group = launchInfo.workerGroup
     mpi_cmd = launchInfo.mpi_cmd
     
     app_path = f"{sys.executable}  {find_spec('tests.mock_mpi_app').origin}"
     mpi_str = mpi_cmd([worker_group[0]], app_cmd=app_path, envs={},
                            num_ranks=2, ranks_per_node=2,
                            threads_per_rank=1, threads_per_core=1)
     stdout, _ = util.cmdline(mpi_str)
     self.assertIn('Rank 0', stdout)
     self.assertIn('Rank 1', stdout)
Beispiel #5
0
    def setUp(self):
        from itertools import product

        self.launcherInfo = util.launcher_info()
        max_workers = self.launcherInfo.num_workers

        num_nodes = [2**n for n in range(0, 13) if 2**n <= max_workers]
        if num_nodes[-1] != max_workers:
            num_nodes.append(max_workers)

        rpn = [64]
        jpn = [64, 512]
        #jpn = [16]
        self.experiments = product(num_nodes, rpn, jpn)
Beispiel #6
0
    def test_kill_during_execution_mpi(self):
        '''Parallel MPIRunner job is properly terminated'''
        launcherInfo = util.launcher_info()
        if len(launcherInfo.workerGroup.workers) < 2:
            self.skipTest("Need at least 2 workers to run this test")

        killer_job = create_job(name="killer", app="killer")
        slow_job = create_job(name="slow_job",
                              app="slow",
                              ranks_per_node=2,
                              args="30 parallel")

        success = util.run_launcher_until_state(killer_job, 'JOB_FINISHED')
        self.assertTrue(success)

        slow_job.refresh_from_db()
        self.assertEqual(slow_job.state, "USER_KILLED")
        stdout = slow_job.read_file_in_workdir('slow_job.out')
        self.assertIn("Rank 0 Sleeping for a long time", stdout)
        self.assertIn("Rank 1 Sleeping for a long time", stdout)
        self.assertIn("RUNNING", slow_job.state_history)
        self.assertIn("USER_KILLED", slow_job.state_history)
        self.assertNotIn("RUN_DONE", slow_job.state_history)