Example #1
0
    def test_jobMem(self):
        """Test job virual memory evaluation
		"""
        worktime = _TEST_LATENCY * 6  # Note: should be larger than 3*latency
        timeout = worktime * 2  # Note: should be larger than 3*latency
        #etimeout = max(1, _TEST_LATENCY) + (worktime * 2) // 1  # Job work time
        etimeout = (
            max(1, _TEST_LATENCY) + timeout
        ) * 3  # Execution pool timeout; Note: *3 because nonstarted jobs exist here nad postponed twice
        assert _TEST_LATENCY * 3 < worktime < timeout and timeout < etimeout, 'Testcase parameters validation failed'

        # Start not more than 3 simultaneous workers
        with ExecPool(max(_WPROCSMAX, 3), latency=_TEST_LATENCY) as xpool:
            amem = 0.02  # Direct allocating memory in the process
            camem = 0.07  # Allocatinf memory in the child process
            duration = worktime / 3  # Duration in sec
            job = Job('jmem_proc',
                      args=(PYEXEC, '-c',
                            TestProcMemTree.allocAndSpawnProg(
                                allocDelayProg(inBytes(amem), duration),
                                allocDelayProg(inBytes(camem), duration))),
                      timeout=timeout,
                      memkind=0,
                      ondone=mock.MagicMock())
            jobx = Job('jmem_max-subproc',
                       args=(PYEXEC, '-c',
                             TestProcMemTree.allocAndSpawnProg(
                                 allocDelayProg(inBytes(amem), duration),
                                 allocDelayProg(inBytes(camem), duration))),
                       timeout=timeout,
                       memkind=1,
                       ondone=mock.MagicMock())
            jobtr = Job('jmem_tree',
                        args=(PYEXEC, '-c',
                              TestProcMemTree.allocAndSpawnProg(
                                  allocDelayProg(inBytes(amem), duration),
                                  allocDelayProg(inBytes(camem), duration))),
                        timeout=timeout,
                        memkind=2,
                        ondone=mock.MagicMock())

            # Verify that non-started job raises exception on memory update request
            if _LIMIT_WORKERS_RAM:
                self.assertRaises(AttributeError, job._updateMem)
            else:
                self.assertRaises(NameError, job._updateMem)

            tstart = time.perf_counter()
            xpool.execute(job)
            xpool.execute(jobx)
            xpool.execute(jobtr)
            time.sleep(duration * 1.9)
            pmem = job._updateMem()
            xmem = jobx._updateMem()
            tmem = jobtr._updateMem()
            # Verify memory consumption
            print(
                'Memory consumption in Mb,  proc_mem: {pmem:.3g}, max_procInTree_mem: {xmem:.3g}, procTree_mem: {tmem:.3g}'
                .format(pmem=pmem * 1000, xmem=xmem * 1000, tmem=tmem * 1000))
            self.assertTrue(pmem < xmem < tmem)
            # Verify exec pool completion before the timeout
            time.sleep(worktime /
                       3)  # Wait for the Job starting and memory allocation
            self.assertTrue(xpool.join(etimeout))
            etime = time.perf_counter() - tstart  # Execution time
            # Verify jobs execution time
            self.assertLessEqual(jobtr.tstop - jobtr.tstart, etime)
Example #2
0
    def test_psutilPTMem(self):
        """Test psutil process tree memory consumpotion"""
        amem = 0.02  # Direct allocating memory in the process
        camem = 0.07  # Allocatinf memory in the child process
        duration = 0.2  # Duration in sec
        proc = subprocess.Popen(
            args=(PYEXEC, '-c',
                  TestProcMemTree.allocAndSpawnProg(
                      allocDelayProg(inBytes(amem), duration),
                      allocDelayProg(inBytes(camem), duration))))
        time.sleep(duration * 2)
        #proc.wait()  # Wait for the process termination
        try:
            up = psutil.Process(proc.pid)
        except psutil.Error as err:
            print('WARNING, psutil.Process() failed: ', err, file=sys.stderr)
            return
        mem = inGigabytes(
            up.memory_info().vms) * 1000  # Mb; Virtual Memory Size
        rmem = inGigabytes(
            up.memory_info().rss) * 1000  # Mb; Resident Set Size
        umem = inGigabytes(
            up.memory_full_info().vms) * 1000  # Mb; Unique Set Size
        urmem = inGigabytes(
            up.memory_full_info().rss) * 1000  # Mb; Unique Set Size

        acmem = mem
        armem = rmem
        aumem = umem
        aurmem = urmem
        cxmem = 0
        cxrmem = 0
        cxumem = 0
        cxurmem = 0
        cxpid = None
        cnum = 0  # The number of child processes
        for ucp in up.children(recursive=True):
            cnum += 1
            cmem = inGigabytes(
                ucp.memory_info().vms) * 1000  # Mb; Virtual Memory Size
            crmem = inGigabytes(
                ucp.memory_info().rss) * 1000  # Mb; Resident Set Size
            cumem = inGigabytes(
                ucp.memory_full_info().vms) * 1000  # Mb; Unique Set Size
            curmem = inGigabytes(
                ucp.memory_full_info().rss) * 1000  # Mb; Unique Set Size
            print(
                'Memory in Mb of "{pname}" #{pid}: (mem: {mem:.2f}, rmem: {rmem:.2f}, umem: {umem:.2f}, urmem: {urmem:.2f})'
                .format(pname=ucp.name(),
                        pid=ucp.pid,
                        mem=cmem,
                        rmem=crmem,
                        umem=cumem,
                        urmem=curmem))
            # Identify consumption by the heaviest child (by absolute mem)
            if cxmem < cmem:
                cxmem = cmem
                cxrmem = crmem
                cxumem = cumem
                cxurmem = curmem
                cxpid = ucp.pid
            acmem += cmem
            armem += crmem
            aumem += cumem
            aurmem += curmem

        amem *= 1000  # Mb
        camem *= 1000  # Mb
        proc.wait()  # Wait for the process termination

        print(
            'Memory in Mb:\n  allocated for the proc #{pid}: {amem}, child: {camem}, total: {tamem}'
            '\n  psutil proc #{pid} (mem: {mem:.2f}, rmem: {rmem:.2f}, umem: {umem:.2f}, urmem: {urmem:.2f})'
            '\n  psutil proc #{pid} tree ({cnum} subprocs) heaviest child #{cxpid}'
            ' (mem: {cxmem:.2f}, rmem: {cxrmem:.2f}, umem: {cxumem:.2f}, urmem: {cxurmem:.2f})'
            '\n  psutil proc #{pid} tree (mem: {acmem:.2f}, rmem: {armem:.2f}, umem: {aumem:.2f}, urmem: {aurmem:.2f})'
            ''.format(pid=proc.pid,
                      amem=amem,
                      camem=camem,
                      tamem=amem + camem,
                      mem=mem,
                      rmem=rmem,
                      umem=umem,
                      urmem=urmem,
                      cnum=cnum,
                      cxpid=cxpid,
                      cxmem=cxmem,
                      cxrmem=cxrmem,
                      cxumem=cxumem,
                      cxurmem=cxurmem,
                      acmem=acmem,
                      armem=armem,
                      aumem=aumem,
                      aurmem=aurmem))
Example #3
0
    def test_jobMemlimGroupSimple(self):
        """Verify memory violations caused by group of workers but without chained jobs

		Reduction of the number of worker processes when their total memory consumption
		exceeds the dedicated limit and there are
			1) either no any nonstarted jobs
			2) or the nonstarted jobs were already rescheduled by the related worker (absence of chained constraints)
		"""
        worktime = _TEST_LATENCY * 10  # Note: should be larger than 3*latency
        timeout = worktime * 2  # Note: should be larger than 3*latency
        #etimeout = max(1, _TEST_LATENCY) + (worktime * 2) // 1  # Job work time
        etimeout = (
            max(1, _TEST_LATENCY) + timeout
        ) * 3  # Execution pool timeout; Note: *3 because nonstarted jobs exist here nad postponed twice
        assert _TEST_LATENCY * 3 < worktime < timeout and timeout < etimeout, 'Testcase parameters validation failed'

        # Note: we need another execution pool to set memlimit (10 Mb) there
        epoolMem = 0.15  # Execution pool mem limit, Gb
        msmall = inBytes(
            0.025
        )  # Small amount of memory for a job; Note: actual Python app consumes ~51 Mb for the allocated ~25 Mb
        # Start not more than 3 simultaneous workers
        with ExecPool(max(_WPROCSMAX, 3),
                      latency=_TEST_LATENCY,
                      memlimit=epoolMem) as xpool:
            tstart = time.perf_counter()
            jgms1 = Job('jgroup_mem_small_1',
                        args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)),
                        size=9,
                        timeout=timeout,
                        onstart=mock.MagicMock())
            jgms2 = Job('jgroup_mem_small_2',
                        args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)),
                        size=9,
                        timeout=timeout)
            jgms3 = Job('jgroup_mem_small_3',
                        args=(PYEXEC, '-c',
                              allocDelayProg(msmall * 1.25, worktime)),
                        size=5,
                        timeout=timeout,
                        onstart=mock.MagicMock(),
                        ondone=mock.MagicMock())
            jgmsp1 = Job('jgroup_mem_small_postponed_1',
                         args=(PYEXEC, '-c',
                               allocDelayProg(msmall * 0.85, worktime)),
                         size=4,
                         timeout=timeout,
                         onstart=mock.MagicMock())
            jgmsp2 = Job('jgroup_mem_small_postponed_2_to',
                         args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)),
                         timeout=worktime / 2,
                         ondone=mock.MagicMock())

            xpool.execute(jgms1)
            xpool.execute(jgms2)
            xpool.execute(jgms3)
            xpool.execute(jgmsp1)
            xpool.execute(jgmsp2)

            time.sleep(worktime /
                       3)  # Wait for the Job starting and memory allocation
            # Verify exec pool completion before the timeout
            self.assertTrue(xpool.join(etimeout))
            # All jobs should be completed
            etime = time.perf_counter() - tstart  # Execution time

            # Verify timings, gracefull copletion of all jobs except the last one
            self.assertLess(etime, etimeout)
            self.assertGreaterEqual(jgms1.tstop - jgms1.tstart, worktime)
            self.assertFalse(jgms1.proc.returncode)
            self.assertGreaterEqual(jgms2.tstop - jgms2.tstart, worktime)
            self.assertFalse(jgms2.proc.returncode)
            self.assertGreaterEqual(jgms3.tstop - jgms3.tstart, worktime)
            self.assertFalse(jgms3.proc.returncode)
            self.assertGreaterEqual(jgmsp1.tstop - jgmsp1.tstart, worktime)
            self.assertFalse(jgmsp1.proc.returncode)
            self.assertLess(jgmsp2.tstop - jgmsp2.tstart, worktime)
            self.assertTrue(jgmsp2.proc.returncode)
            # Check the last comleted job
            self.assertTrue(jgms3.tstop <= tstart + etime)

            # Verify handlers calls
            jgms1.onstart.assert_called_once_with(jgms1)
            jgms3.onstart.assert_called_once_with(jgms3)
            jgms3.ondone.assert_called_once_with(jgms3)
            jgmsp1.onstart.assert_called_with(jgmsp1)
            self.assertTrue(1 <= jgmsp1.onstart.call_count <= 2)
            jgmsp2.ondone.assert_not_called()
Example #4
0
    def test_jobMemlimGroupChained(self):
        """Verify memory violations caused by group of workers having chained jobs
		Rescheduling of the worker processes when their total memory consumption
		exceeds the dedicated limit and there are some nonstarted jobs of smaller
		size and the same category that
			1) were not rescheduled by the non-heavier worker.
			2) were rescheduled by the non-heavier worker.
		"""
        # Note: for one of the tests timeout=worktime/2 is used, so use multiplier of at least *3*2 = 6
        worktime = _TEST_LATENCY * 10  # Note: should be larger than 3*latency
        timeout = worktime * 2  # Note: should be larger than 3*latency
        #etimeout = max(1, _TEST_LATENCY) + (worktime * 2) // 1  # Job work time
        etimeout = (
            max(1, _TEST_LATENCY) + timeout
        ) * 4  # Execution pool timeout; Note: *3 because nonstarted jobs exist here nad postponed twice
        assert _TEST_LATENCY * 3 < worktime / 2 and worktime < timeout and timeout < etimeout, 'Testcase parameters validation failed'

        # Note: we need another execution pool to set memlimit (10 Mb) there
        epoolMem = 0.15  # Execution pool mem limit, Gb
        msmall = inBytes(
            0.025
        )  # Small amount of memory for a job; Note: actual Python app consumes ~51 Mb for the allocated ~25 Mb
        # Start not more than 3 simultaneous workers
        with ExecPool(max(_WPROCSMAX, 4),
                      latency=_TEST_LATENCY,
                      memlimit=epoolMem) as xpool:
            tstart = time.perf_counter()

            jgms1 = Job('jcgroup_mem_small_1',
                        args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)),
                        size=5,
                        timeout=timeout)
            tjms2 = worktime / 3
            jgms2 = Job('jcgroup_mem_small_2s',
                        args=(PYEXEC, '-c', allocDelayProg(msmall, tjms2)),
                        size=5,
                        timeout=timeout,
                        onstart=mock.MagicMock())
            jgms3 = Job('jcgroup_mem_small_3g',
                        args=(PYEXEC, '-c',
                              allocDelayProg(msmall * 1.5, worktime)),
                        category="cat_sa",
                        size=5,
                        timeout=timeout,
                        onstart=mock.MagicMock(),
                        ondone=mock.MagicMock())
            jgmsp1 = Job('jcgroup_mem_small_postponed_1m',
                         args=(PYEXEC, '-c',
                               allocDelayProg(msmall * 1.2, worktime * 1.25)),
                         category="cat_toch",
                         size=6,
                         timeout=timeout,
                         onstart=mock.MagicMock())
            jgmsp2 = Job('jcgroup_mem_small_postponed_2_to',
                         args=(PYEXEC, '-c',
                               allocDelayProg(msmall * 0.8, worktime)),
                         category="cat_toch",
                         size=4,
                         timeout=worktime / 2,
                         ondone=mock.MagicMock())
            jgmsp3 = Job('jcgroup_mem_small_postponed_3',
                         args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)),
                         size=9,
                         timeout=worktime,
                         onstart=mock.MagicMock())

            xpool.execute(jgms1)
            xpool.execute(jgms2)
            xpool.execute(jgms3)
            xpool.execute(jgmsp1)
            xpool.execute(jgmsp2)
            xpool.execute(jgmsp3)

            time.sleep(worktime /
                       4)  # Wait for the Job starting and memory allocation
            # Verify exec pool completion before the timeout
            self.assertTrue(xpool.join(etimeout))
            etime = time.perf_counter() - tstart  # Execution time

            # Verify timings, gracefull copletion of all jobs except the last one
            self.assertLess(etime, etimeout)
            self.assertGreaterEqual(jgms1.tstop - jgms1.tstart, worktime)
            self.assertFalse(jgms1.proc.returncode)
            self.assertGreaterEqual(jgms2.tstop - jgms2.tstart, tjms2)
            self.assertFalse(jgms2.proc.returncode)
            self.assertGreaterEqual(jgms3.tstop - jgms3.tstart, worktime)
            self.assertFalse(jgms3.proc.returncode)
            if jgmsp1.tstop > jgmsp2.tstop + _TEST_LATENCY:
                self.assertLessEqual(
                    jgmsp1.tstop - jgmsp1.tstart, worktime * 1.25 +
                    _TEST_LATENCY * 3)  # Canceled by chained timeout
                self.assertTrue(jgmsp1.proc.returncode)
            self.assertLessEqual(jgmsp2.tstop - jgmsp2.tstart, worktime)
            self.assertTrue(jgmsp2.proc.returncode)
            self.assertGreaterEqual(
                jgmsp3.tstop - jgmsp3.tstart,
                worktime)  # Execution time a bit exceeds te timeout
            # Note: jgmsp3 may complete gracefully or may be terminated by timeout depending on the wrkers revision time.
            # Most likely the completion is graceful
            ## Check the last comleted job
            #self.assertTrue(jgms3.tstop < jgmsp1.tstop < tstart + etime)  # Note: heavier job is rescheduled after the more lightweight one

            # Verify handlers calls
            jgms2.onstart.assert_called_with(jgms2)
            jgms3.onstart.assert_called_with(jgms3)
            self.assertTrue(2 <= jgms3.onstart.call_count <= 3)
            jgms3.ondone.assert_called_once_with(jgms3)
            jgmsp1.onstart.assert_called_with(jgmsp1)
            self.assertTrue(1 <= jgmsp1.onstart.call_count <= 2)
            jgmsp2.ondone.assert_not_called()
            jgmsp3.onstart.assert_called_with(jgmsp3)
            self.assertTrue(1 <= jgmsp3.onstart.call_count <= 2)
Example #5
0
    def test_jobMemlimSimple(self):
        """Verify memory violations caused by the single worker:
		1. Absence of side effects on the remained jobs after bad_alloc
			(exception of the external app) caused termination of the worker process
		2. Termination of the worker process that exceeds limit of the dedicated memory
		3. Termination of the worker process that exceeds limit of the dedicated memory
	 		or had bad_alloc and termination of all related non-smaller jobs
		"""
        worktime = _TEST_LATENCY * 5  # Note: should be larger than 3*latency; 400 ms can be insufficient for the Python 3
        timeout = worktime * 2  # Note: should be larger than 3*latency
        #etimeout = max(1, _TEST_LATENCY) + (worktime * 2) // 1  # Job work time
        etimeout = (
            max(1, _TEST_LATENCY) + timeout
        ) * 3  # Execution pool timeout; Note: *3 because nonstarted jobs exist here
        assert _TEST_LATENCY * 3 < worktime < timeout and timeout < etimeout, 'Testcase parameters validation failed'

        # Note: we need another execution pool to set memlimit (10 Mb) there
        epoolMem = 0.2  # Execution pool mem limit, Gb
        msmall = 256  # Small amount of memory for a job, bytes
        # Start not more than 3 simultaneous workers
        with ExecPool(max(_WPROCSMAX, 3),
                      latency=_TEST_LATENCY,
                      memlimit=epoolMem) as xpool:
            tstart = time.perf_counter()

            jmsDb = Job('jmem_small_ba',
                        args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)),
                        category='cat1',
                        size=9,
                        timeout=timeout)
            jmb = Job('jmem_badalloc',
                      args=(PYEXEC, '-c',
                            allocDelayProg(inBytes(_RAM_SIZE * 2), worktime)),
                      category='cat1',
                      size=9,
                      timeout=timeout)

            jmvsize = 5  # Size of the task violating memory contraints
            jmv = Job('jmem_violate',
                      args=(PYEXEC, '-c',
                            allocDelayProg(inBytes(epoolMem * 2), worktime)),
                      category='cat2',
                      size=jmvsize,
                      timeout=timeout)
            jmsDvs = Job('jmem_small_v1',
                         args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)),
                         category='cat2',
                         size=jmvsize - 1,
                         timeout=timeout)
            jms1 = Job('jmem_small_1',
                       args=(PYEXEC, '-c', allocDelayProg(None, worktime)),
                       category='cat3',
                       size=7,
                       timeout=timeout)
            jmsDvl1 = Job('jmem_large_v',
                          args=(PYEXEC, '-c', allocDelayProg(msmall,
                                                             worktime)),
                          category='cat2',
                          size=jmvsize,
                          timeout=timeout)
            jms2 = Job('jmem_small_2',
                       args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)),
                       size=7,
                       timeout=timeout)
            jmsDvl2 = Job('jmem_small_v1',
                          args=(PYEXEC, '-c', allocDelayProg(None, worktime)),
                          category='cat2',
                          size=jmvsize * 2,
                          timeout=timeout)

            xpool.execute(jmsDb)
            xpool.execute(jmb)

            xpool.execute(jmv)
            xpool.execute(jmsDvs)
            xpool.execute(jms1)
            xpool.execute(jmsDvl1)
            xpool.execute(jms2)
            xpool.execute(jmsDvl2)

            time.sleep(worktime /
                       3)  # Wait for the Job starting and memory allocation
            # Verify exec pool completion before the timeout
            self.assertTrue(xpool.join(etimeout))
            etime = time.perf_counter() - tstart  # Execution time

            # Verify timings
            self.assertLess(etime, etimeout)
            self.assertGreaterEqual(
                jmsDb.tstop - jmsDb.tstart, worktime
            )  # Note: internal errors in the external processes should not effect related jobs
            self.assertTrue(jmb.proc.returncode
                            )  # bad_alloc causes non zero termintion code
            self.assertLess(
                jmb.tstop - jmb.tstart, worktime
            )  # Early termination cased by the bad_alloc (internal error in the external process)

            self.assertLess(
                jmv.tstop - jmv.tstart, worktime
            )  # Early termination by the memory constraints violation
            self.assertGreaterEqual(
                jmsDvs.tstop - jmsDvs.tstart, worktime
            )  # Smaller size of the ralted chained job to the vioated origin should not cause termination
            self.assertGreaterEqual(
                jms1.tstop - jms1.tstart,
                worktime)  # Independent job should have graceful completion
            self.assertFalse(jms1.proc.returncode
                             )  # Errcode code is 0 on the gracefull completion
            if _CHAINED_CONSTRAINTS:
                self.assertIsNone(
                    jmsDvl1.tstart
                )  # Postponed job should be terminated before being started by the chained relation on the memory-violating origin
                self.assertIsNone(
                    jmsDvl2.tstart
                )  # Postponed job should be terminated before being started by the chained relation on the memory-violating origin
            #self.assertLess(jmsDvl1.tstop - jmsDvl1.tstart, worktime)  # Early termination by the chained retalion to the mem violated origin
            self.assertGreaterEqual(
                jms2.tstop - jms2.tstart,
                worktime)  # Independent job should have graceful completion