Esempio n. 1
0
File: soak.py Progetto: morsiee/daos
    def test_soak_3(self):
        """
        Test ID: DAOS-2192
        Test Description: this time try a dmg command combined with IOR run
        Use Cases:
        :avocado: tags=soak3
        """

        script1 = None
        script2 = None
        try:
            # retrieve IOR job parameters
            script1 = self.build_ior_script('job1')
            job_id1 = slurm_utils.run_slurm_script(script1)
            slurm_utils.register_for_job_results(job_id1, self, maxwait=3600)

            # now do the dmg job
            dmgcmds = dmg_utils.get_dmg_script("dmg1", self.params,
                                               self.basepath)

            s3_job2_name = self.params.get("name", '/run/job3/')
            s3_job2_nodes = self.params.get("nodes", '/run/job3/')
            output = os.path.join(self.tmpdir, s3_job2_name + "_results.out")
            script2 = slurm_utils.write_slurm_script(self.tmpdir, s3_job2_name,
                                                     output, s3_job2_nodes,
                                                     dmgcmds)
            job_id2 = slurm_utils.run_slurm_script(script2)
            slurm_utils.register_for_job_results(job_id2, self, maxwait=3600)

            # wait for all the jobs to finish
            while len(self.soak_results) < 2:
                time.sleep(10)

            for job, result in self.soak_results.iteritems():
                if result != "COMPLETED":
                    self.fail(
                        "Soak job: {} didn't complete as expected: {}".format(
                            job, result))

        except (DaosApiError, ior_utils.IorFailed) as error:
            self.fail("Soak Test 3 Failed\n {}".format(error))
        finally:
            try:
                os.remove(script1)
            except StandardError:
                pass
            try:
                os.remove(script2)
            except StandardError:
                pass
Esempio n. 2
0
    def test_soak_3(self):
        """
        Test ID: DAOS-2192
        Test Description: this time try a dmg command combined with IOR run
        Use Cases:
        :avocado: tags=soak3
        """

        script1 = None
        script2 = None
        try:
            # retrieve IOR job parameters
            script1 = self.build_ior_script('job1')
            job_id1 = slurm_utils.run_slurm_script(script1)
            slurm_utils.register_for_job_results(job_id1, self, maxwait=3600)

            # now do the dmg job
            dmgcmds = dmg_utils.get_dmg_script("dmg1", self.params,
                                               self.basepath)

            s3_job2_name = self.params.get("name", '/run/job3/')
            s3_job2_nodes = self.params.get("nodes", '/run/job3/')
            output = os.path.join(self.tmpdir, s3_job2_name + "_results.out")
            script2 = slurm_utils.write_slurm_script(self.tmpdir, s3_job2_name,
                                                     output, s3_job2_nodes,
                                                     dmgcmds)
            job_id2 = slurm_utils.run_slurm_script(script2)
            slurm_utils.register_for_job_results(job_id2, self, maxwait=3600)

            # wait for all the jobs to finish
            while len(self.soak_results) < 2:
                time.sleep(10)

            for job, result in self.soak_results.iteritems():
                if result != "COMPLETED":
                    self.fail("Soak job: {} didn't complete as expected: {}".
                              format(job, result))

        except (DaosApiError, ior_utils.IorFailed) as error:
            self.fail("Soak Test 3 Failed\n {}".format(error))
        finally:
            try:
                os.remove(script1)
            except StandardError:
                pass
            try:
                os.remove(script2)
            except StandardError:
                pass
Esempio n. 3
0
File: soak.py Progetto: morsiee/daos
    def test_soak_1(self):
        """
        Test ID: DAOS-2192
        Test Description: This test runs 2 DAOS API IOR jobs.
        :avocado: tags=soak1
        """

        try:
            # turn job parameters into slurm script
            script1 = self.build_ior_script('job1')

            # queue it up to run and register a callback to retrieve results
            job_id1 = slurm_utils.run_slurm_script(script1)
            slurm_utils.register_for_job_results(job_id1, self, maxwait=3600)

            # queue up a second job
            script2 = self.build_ior_script('job2')
            job_id2 = slurm_utils.run_slurm_script(script2)
            slurm_utils.register_for_job_results(job_id2, self, maxwait=3600)

            # wait for all the jobs to finish
            while len(self.soak_results) < 2:
                time.sleep(10)

            for job, result in self.soak_results.iteritems():
                if result != "COMPLETED":
                    self.fail(
                        "Soak job: {} didn't complete as expected: {}".format(
                            job, result))

        except (DaosApiError, ior_utils.IorFailed) as error:
            self.fail("<Soak Test 1 Failed>\n {}".format(error))
        finally:
            try:
                os.remove(script1)
            except StandardError:
                pass
            try:
                os.remove(script2)
            except StandardError:
                pass
Esempio n. 4
0
    def job_startup(self, job_cmdlist):
        """Submit job batch script.

        Args:
            job_cmdlist (list): list of jobs to execute
        Returns:
            job_id_list: IDs of each job submitted to slurm.

        """
        self.log.info("<<Job Startup - %s >> at %s", self.test_name,
                      time.ctime())
        job_id_list = []
        # before submitting the jobs to the queue, check the job timeout;
        if time.time() > self.end_time:
            self.log.info("<< SOAK test timeout in Job Startup>>")
            return job_id_list
        # job_cmdlist is a list of batch script files

        for script in job_cmdlist:
            try:
                job_id = slurm_utils.run_slurm_script(str(script))
            except slurm_utils.SlurmFailed as error:
                self.log.error(error)
                # Force the test to exit with failure
                job_id = None
            if job_id:
                self.log.info("<<Job %s started with %s >> at %s", job_id,
                              script, time.ctime())
                slurm_utils.register_for_job_results(job_id,
                                                     self,
                                                     maxwait=self.test_timeout)
                # keep a list of the job_id's
                job_id_list.append(int(job_id))
            else:
                # one of the jobs failed to queue; exit on first fail for now.
                err_msg = "Slurm failed to submit job for {}".format(script)
                job_id_list = []
                raise SoakTestError("<<FAILED:  Soak {}: {}>>".format(
                    self.test_name, err_msg))
        return job_id_list
Esempio n. 5
0
File: soak.py Progetto: morsiee/daos
    def test_soak_2(self):
        """
        Test ID: DAOS-2192
        Test Description: This test verifies that a dmg script can be submitted.
        :avocado: tags=soak2
        """

        script = None
        try:
            dmgcmds = dmg_utils.get_dmg_script("dmg1", self.params,
                                               self.basepath)

            s2_job1_name = self.params.get("name", '/run/job3/')
            s2_job1_nodes = self.params.get("nodes", '/run/job3/')

            output = os.path.join(self.tmpdir, s2_job1_name + "_results.out")

            script = slurm_utils.write_slurm_script(self.tmpdir, s2_job1_name,
                                                    output, s2_job1_nodes,
                                                    dmgcmds)
            job_id = slurm_utils.run_slurm_script(script)
            slurm_utils.register_for_job_results(job_id, self, maxwait=3600)

            # wait for all the jobs to finish
            while len(self.soak_results) < 1:
                time.sleep(10)

            for job, result in self.soak_results.iteritems():
                if result != "COMPLETED":
                    self.fail(
                        "Soak job: {} didn't complete as expected: {}".format(
                            job, result))

        except (DaosApiError, ior_utils.IorFailed) as error:
            self.fail("Soak Test 2 Failed/n {}".format(error))
        finally:
            try:
                os.remove(script)
            finally:
                pass
Esempio n. 6
0
    def test_soak_1(self):
        """
        Test ID: DAOS-2192
        Test Description: This test runs 2 DAOS API IOR jobs.
        :avocado: tags=soak1
        """

        try:
            # turn job parameters into slurm script
            script1 = self.build_ior_script('job1')

            # queue it up to run and register a callback to retrieve results
            job_id1 = slurm_utils.run_slurm_script(script1)
            slurm_utils.register_for_job_results(job_id1, self, maxwait=3600)

            # queue up a second job
            script2 = self.build_ior_script('job2')
            job_id2 = slurm_utils.run_slurm_script(script2)
            slurm_utils.register_for_job_results(job_id2, self, maxwait=3600)            

            # wait for all the jobs to finish
            while len(self.soak_results) < 2:
                time.sleep(10)

            for job, result in self.soak_results.iteritems():
                if result != "COMPLETED":
                    self.fail("Soak job: {} didn't complete as expected: {}".
                              format(job, result))

        except (DaosApiError, ior_utils.IorFailed) as error:
            self.fail("<Soak Test 1 Failed>\n {}".format(error))
        finally:
            try:
                os.remove(script1)
            except StandardError:
                pass
            try:
                os.remove(script2)
            except StandardError:
                pass
Esempio n. 7
0
    def test_soak_2(self):
        """
        Test ID: DAOS-2192
        Test Description: This test verifies that a dmg script can be submitted.
        :avocado: tags=soak2
        """

        script = None
        try:
            dmgcmds = dmg_utils.get_dmg_script("dmg1", self.params,
                                               self.basepath)

            s2_job1_name = self.params.get("name", '/run/job3/')
            s2_job1_nodes = self.params.get("nodes", '/run/job3/')

            output = os.path.join(self.tmpdir, s2_job1_name + "_results.out")

            script = slurm_utils.write_slurm_script(self.tmpdir, s2_job1_name,
                                                    output,
                                                    s2_job1_nodes, dmgcmds)
            job_id = slurm_utils.run_slurm_script(script)
            slurm_utils.register_for_job_results(job_id, self, maxwait=3600)

            # wait for all the jobs to finish
            while len(self.soak_results) < 1:
                time.sleep(10)

            for job, result in self.soak_results.iteritems():
                if result != "COMPLETED":
                    self.fail("Soak job: {} didn't complete as expected: {}".
                              format(job, result))

        except (DaosApiError, ior_utils.IorFailed) as error:
            self.fail("Soak Test 2 Failed/n {}".format(error))
        finally:
            try:
                os.remove(script)
            finally:
                pass
Esempio n. 8
0
File: soak.py Progetto: yulujia/daos
    def job_startup(self, scripts):
        """Submit job batch script.

        Args:
            scripts (list): list of slurm batch scripts to submit to queue
        Returns:
            job_id_list: IDs of each job submitted to slurm.

        """
        self.log.info("<<Job Startup - %s >> at %s", self.test_name,
                      time.ctime())
        job_id_list = []
        # scripts is a list of batch script files
        for script in scripts:
            try:
                job_id = slurm_utils.run_slurm_script(str(script))
            except slurm_utils.SlurmFailed as error:
                self.log.error(error)
                # Force the test to exit with failure
                job_id = None

            if job_id:
                print("<<Job {} started with {} >> at {}".format(
                    job_id, script, time.ctime()))
                slurm_utils.register_for_job_results(job_id,
                                                     self,
                                                     maxwait=self.test_timeout)
                # keep a list of the job_id's
                job_id_list.append(int(job_id))
            else:
                # one of the jobs failed to queue; exit on first fail for now.
                err_msg = "Slurm failed to submit job for {}".format(script)
                job_id_list = []
                raise SoakTestError("<<FAILED:  Soak {}: {}>>".format(
                    self.test_name, err_msg))
        return job_id_list