def test_jobMem(self): """Test job virual memory evaluation """ worktime = _TEST_LATENCY * 6 # Note: should be larger than 3*latency timeout = worktime * 2 # Note: should be larger than 3*latency #etimeout = max(1, _TEST_LATENCY) + (worktime * 2) // 1 # Job work time etimeout = ( max(1, _TEST_LATENCY) + timeout ) * 3 # Execution pool timeout; Note: *3 because nonstarted jobs exist here nad postponed twice assert _TEST_LATENCY * 3 < worktime < timeout and timeout < etimeout, 'Testcase parameters validation failed' # Start not more than 3 simultaneous workers with ExecPool(max(_WPROCSMAX, 3), latency=_TEST_LATENCY) as xpool: amem = 0.02 # Direct allocating memory in the process camem = 0.07 # Allocatinf memory in the child process duration = worktime / 3 # Duration in sec job = Job('jmem_proc', args=(PYEXEC, '-c', TestProcMemTree.allocAndSpawnProg( allocDelayProg(inBytes(amem), duration), allocDelayProg(inBytes(camem), duration))), timeout=timeout, memkind=0, ondone=mock.MagicMock()) jobx = Job('jmem_max-subproc', args=(PYEXEC, '-c', TestProcMemTree.allocAndSpawnProg( allocDelayProg(inBytes(amem), duration), allocDelayProg(inBytes(camem), duration))), timeout=timeout, memkind=1, ondone=mock.MagicMock()) jobtr = Job('jmem_tree', args=(PYEXEC, '-c', TestProcMemTree.allocAndSpawnProg( allocDelayProg(inBytes(amem), duration), allocDelayProg(inBytes(camem), duration))), timeout=timeout, memkind=2, ondone=mock.MagicMock()) # Verify that non-started job raises exception on memory update request if _LIMIT_WORKERS_RAM: self.assertRaises(AttributeError, job._updateMem) else: self.assertRaises(NameError, job._updateMem) tstart = time.perf_counter() xpool.execute(job) xpool.execute(jobx) xpool.execute(jobtr) time.sleep(duration * 1.9) pmem = job._updateMem() xmem = jobx._updateMem() tmem = jobtr._updateMem() # Verify memory consumption print( 'Memory consumption in Mb, proc_mem: {pmem:.3g}, max_procInTree_mem: {xmem:.3g}, procTree_mem: {tmem:.3g}' .format(pmem=pmem * 1000, xmem=xmem * 1000, tmem=tmem * 1000)) self.assertTrue(pmem < xmem < tmem) # Verify exec pool completion before the timeout time.sleep(worktime / 3) # Wait for the Job starting and memory allocation self.assertTrue(xpool.join(etimeout)) etime = time.perf_counter() - tstart # Execution time # Verify jobs execution time self.assertLessEqual(jobtr.tstop - jobtr.tstart, etime)
def test_psutilPTMem(self): """Test psutil process tree memory consumpotion""" amem = 0.02 # Direct allocating memory in the process camem = 0.07 # Allocatinf memory in the child process duration = 0.2 # Duration in sec proc = subprocess.Popen( args=(PYEXEC, '-c', TestProcMemTree.allocAndSpawnProg( allocDelayProg(inBytes(amem), duration), allocDelayProg(inBytes(camem), duration)))) time.sleep(duration * 2) #proc.wait() # Wait for the process termination try: up = psutil.Process(proc.pid) except psutil.Error as err: print('WARNING, psutil.Process() failed: ', err, file=sys.stderr) return mem = inGigabytes( up.memory_info().vms) * 1000 # Mb; Virtual Memory Size rmem = inGigabytes( up.memory_info().rss) * 1000 # Mb; Resident Set Size umem = inGigabytes( up.memory_full_info().vms) * 1000 # Mb; Unique Set Size urmem = inGigabytes( up.memory_full_info().rss) * 1000 # Mb; Unique Set Size acmem = mem armem = rmem aumem = umem aurmem = urmem cxmem = 0 cxrmem = 0 cxumem = 0 cxurmem = 0 cxpid = None cnum = 0 # The number of child processes for ucp in up.children(recursive=True): cnum += 1 cmem = inGigabytes( ucp.memory_info().vms) * 1000 # Mb; Virtual Memory Size crmem = inGigabytes( ucp.memory_info().rss) * 1000 # Mb; Resident Set Size cumem = inGigabytes( ucp.memory_full_info().vms) * 1000 # Mb; Unique Set Size curmem = inGigabytes( ucp.memory_full_info().rss) * 1000 # Mb; Unique Set Size print( 'Memory in Mb of "{pname}" #{pid}: (mem: {mem:.2f}, rmem: {rmem:.2f}, umem: {umem:.2f}, urmem: {urmem:.2f})' .format(pname=ucp.name(), pid=ucp.pid, mem=cmem, rmem=crmem, umem=cumem, urmem=curmem)) # Identify consumption by the heaviest child (by absolute mem) if cxmem < cmem: cxmem = cmem cxrmem = crmem cxumem = cumem cxurmem = curmem cxpid = ucp.pid acmem += cmem armem += crmem aumem += cumem aurmem += curmem amem *= 1000 # Mb camem *= 1000 # Mb proc.wait() # Wait for the process termination print( 'Memory in Mb:\n allocated for the proc #{pid}: {amem}, child: {camem}, total: {tamem}' '\n psutil proc #{pid} (mem: {mem:.2f}, rmem: {rmem:.2f}, umem: {umem:.2f}, urmem: {urmem:.2f})' '\n psutil proc #{pid} tree ({cnum} subprocs) heaviest child #{cxpid}' ' (mem: {cxmem:.2f}, rmem: {cxrmem:.2f}, umem: {cxumem:.2f}, urmem: {cxurmem:.2f})' '\n psutil proc #{pid} tree (mem: {acmem:.2f}, rmem: {armem:.2f}, umem: {aumem:.2f}, urmem: {aurmem:.2f})' ''.format(pid=proc.pid, amem=amem, camem=camem, tamem=amem + camem, mem=mem, rmem=rmem, umem=umem, urmem=urmem, cnum=cnum, cxpid=cxpid, cxmem=cxmem, cxrmem=cxrmem, cxumem=cxumem, cxurmem=cxurmem, acmem=acmem, armem=armem, aumem=aumem, aurmem=aurmem))
def test_jobMemlimGroupSimple(self): """Verify memory violations caused by group of workers but without chained jobs Reduction of the number of worker processes when their total memory consumption exceeds the dedicated limit and there are 1) either no any nonstarted jobs 2) or the nonstarted jobs were already rescheduled by the related worker (absence of chained constraints) """ worktime = _TEST_LATENCY * 10 # Note: should be larger than 3*latency timeout = worktime * 2 # Note: should be larger than 3*latency #etimeout = max(1, _TEST_LATENCY) + (worktime * 2) // 1 # Job work time etimeout = ( max(1, _TEST_LATENCY) + timeout ) * 3 # Execution pool timeout; Note: *3 because nonstarted jobs exist here nad postponed twice assert _TEST_LATENCY * 3 < worktime < timeout and timeout < etimeout, 'Testcase parameters validation failed' # Note: we need another execution pool to set memlimit (10 Mb) there epoolMem = 0.15 # Execution pool mem limit, Gb msmall = inBytes( 0.025 ) # Small amount of memory for a job; Note: actual Python app consumes ~51 Mb for the allocated ~25 Mb # Start not more than 3 simultaneous workers with ExecPool(max(_WPROCSMAX, 3), latency=_TEST_LATENCY, memlimit=epoolMem) as xpool: tstart = time.perf_counter() jgms1 = Job('jgroup_mem_small_1', args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)), size=9, timeout=timeout, onstart=mock.MagicMock()) jgms2 = Job('jgroup_mem_small_2', args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)), size=9, timeout=timeout) jgms3 = Job('jgroup_mem_small_3', args=(PYEXEC, '-c', allocDelayProg(msmall * 1.25, worktime)), size=5, timeout=timeout, onstart=mock.MagicMock(), ondone=mock.MagicMock()) jgmsp1 = Job('jgroup_mem_small_postponed_1', args=(PYEXEC, '-c', allocDelayProg(msmall * 0.85, worktime)), size=4, timeout=timeout, onstart=mock.MagicMock()) jgmsp2 = Job('jgroup_mem_small_postponed_2_to', args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)), timeout=worktime / 2, ondone=mock.MagicMock()) xpool.execute(jgms1) xpool.execute(jgms2) xpool.execute(jgms3) xpool.execute(jgmsp1) xpool.execute(jgmsp2) time.sleep(worktime / 3) # Wait for the Job starting and memory allocation # Verify exec pool completion before the timeout self.assertTrue(xpool.join(etimeout)) # All jobs should be completed etime = time.perf_counter() - tstart # Execution time # Verify timings, gracefull copletion of all jobs except the last one self.assertLess(etime, etimeout) self.assertGreaterEqual(jgms1.tstop - jgms1.tstart, worktime) self.assertFalse(jgms1.proc.returncode) self.assertGreaterEqual(jgms2.tstop - jgms2.tstart, worktime) self.assertFalse(jgms2.proc.returncode) self.assertGreaterEqual(jgms3.tstop - jgms3.tstart, worktime) self.assertFalse(jgms3.proc.returncode) self.assertGreaterEqual(jgmsp1.tstop - jgmsp1.tstart, worktime) self.assertFalse(jgmsp1.proc.returncode) self.assertLess(jgmsp2.tstop - jgmsp2.tstart, worktime) self.assertTrue(jgmsp2.proc.returncode) # Check the last comleted job self.assertTrue(jgms3.tstop <= tstart + etime) # Verify handlers calls jgms1.onstart.assert_called_once_with(jgms1) jgms3.onstart.assert_called_once_with(jgms3) jgms3.ondone.assert_called_once_with(jgms3) jgmsp1.onstart.assert_called_with(jgmsp1) self.assertTrue(1 <= jgmsp1.onstart.call_count <= 2) jgmsp2.ondone.assert_not_called()
def test_jobMemlimGroupChained(self): """Verify memory violations caused by group of workers having chained jobs Rescheduling of the worker processes when their total memory consumption exceeds the dedicated limit and there are some nonstarted jobs of smaller size and the same category that 1) were not rescheduled by the non-heavier worker. 2) were rescheduled by the non-heavier worker. """ # Note: for one of the tests timeout=worktime/2 is used, so use multiplier of at least *3*2 = 6 worktime = _TEST_LATENCY * 10 # Note: should be larger than 3*latency timeout = worktime * 2 # Note: should be larger than 3*latency #etimeout = max(1, _TEST_LATENCY) + (worktime * 2) // 1 # Job work time etimeout = ( max(1, _TEST_LATENCY) + timeout ) * 4 # Execution pool timeout; Note: *3 because nonstarted jobs exist here nad postponed twice assert _TEST_LATENCY * 3 < worktime / 2 and worktime < timeout and timeout < etimeout, 'Testcase parameters validation failed' # Note: we need another execution pool to set memlimit (10 Mb) there epoolMem = 0.15 # Execution pool mem limit, Gb msmall = inBytes( 0.025 ) # Small amount of memory for a job; Note: actual Python app consumes ~51 Mb for the allocated ~25 Mb # Start not more than 3 simultaneous workers with ExecPool(max(_WPROCSMAX, 4), latency=_TEST_LATENCY, memlimit=epoolMem) as xpool: tstart = time.perf_counter() jgms1 = Job('jcgroup_mem_small_1', args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)), size=5, timeout=timeout) tjms2 = worktime / 3 jgms2 = Job('jcgroup_mem_small_2s', args=(PYEXEC, '-c', allocDelayProg(msmall, tjms2)), size=5, timeout=timeout, onstart=mock.MagicMock()) jgms3 = Job('jcgroup_mem_small_3g', args=(PYEXEC, '-c', allocDelayProg(msmall * 1.5, worktime)), category="cat_sa", size=5, timeout=timeout, onstart=mock.MagicMock(), ondone=mock.MagicMock()) jgmsp1 = Job('jcgroup_mem_small_postponed_1m', args=(PYEXEC, '-c', allocDelayProg(msmall * 1.2, worktime * 1.25)), category="cat_toch", size=6, timeout=timeout, onstart=mock.MagicMock()) jgmsp2 = Job('jcgroup_mem_small_postponed_2_to', args=(PYEXEC, '-c', allocDelayProg(msmall * 0.8, worktime)), category="cat_toch", size=4, timeout=worktime / 2, ondone=mock.MagicMock()) jgmsp3 = Job('jcgroup_mem_small_postponed_3', args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)), size=9, timeout=worktime, onstart=mock.MagicMock()) xpool.execute(jgms1) xpool.execute(jgms2) xpool.execute(jgms3) xpool.execute(jgmsp1) xpool.execute(jgmsp2) xpool.execute(jgmsp3) time.sleep(worktime / 4) # Wait for the Job starting and memory allocation # Verify exec pool completion before the timeout self.assertTrue(xpool.join(etimeout)) etime = time.perf_counter() - tstart # Execution time # Verify timings, gracefull copletion of all jobs except the last one self.assertLess(etime, etimeout) self.assertGreaterEqual(jgms1.tstop - jgms1.tstart, worktime) self.assertFalse(jgms1.proc.returncode) self.assertGreaterEqual(jgms2.tstop - jgms2.tstart, tjms2) self.assertFalse(jgms2.proc.returncode) self.assertGreaterEqual(jgms3.tstop - jgms3.tstart, worktime) self.assertFalse(jgms3.proc.returncode) if jgmsp1.tstop > jgmsp2.tstop + _TEST_LATENCY: self.assertLessEqual( jgmsp1.tstop - jgmsp1.tstart, worktime * 1.25 + _TEST_LATENCY * 3) # Canceled by chained timeout self.assertTrue(jgmsp1.proc.returncode) self.assertLessEqual(jgmsp2.tstop - jgmsp2.tstart, worktime) self.assertTrue(jgmsp2.proc.returncode) self.assertGreaterEqual( jgmsp3.tstop - jgmsp3.tstart, worktime) # Execution time a bit exceeds te timeout # Note: jgmsp3 may complete gracefully or may be terminated by timeout depending on the wrkers revision time. # Most likely the completion is graceful ## Check the last comleted job #self.assertTrue(jgms3.tstop < jgmsp1.tstop < tstart + etime) # Note: heavier job is rescheduled after the more lightweight one # Verify handlers calls jgms2.onstart.assert_called_with(jgms2) jgms3.onstart.assert_called_with(jgms3) self.assertTrue(2 <= jgms3.onstart.call_count <= 3) jgms3.ondone.assert_called_once_with(jgms3) jgmsp1.onstart.assert_called_with(jgmsp1) self.assertTrue(1 <= jgmsp1.onstart.call_count <= 2) jgmsp2.ondone.assert_not_called() jgmsp3.onstart.assert_called_with(jgmsp3) self.assertTrue(1 <= jgmsp3.onstart.call_count <= 2)
def test_jobMemlimSimple(self): """Verify memory violations caused by the single worker: 1. Absence of side effects on the remained jobs after bad_alloc (exception of the external app) caused termination of the worker process 2. Termination of the worker process that exceeds limit of the dedicated memory 3. Termination of the worker process that exceeds limit of the dedicated memory or had bad_alloc and termination of all related non-smaller jobs """ worktime = _TEST_LATENCY * 5 # Note: should be larger than 3*latency; 400 ms can be insufficient for the Python 3 timeout = worktime * 2 # Note: should be larger than 3*latency #etimeout = max(1, _TEST_LATENCY) + (worktime * 2) // 1 # Job work time etimeout = ( max(1, _TEST_LATENCY) + timeout ) * 3 # Execution pool timeout; Note: *3 because nonstarted jobs exist here assert _TEST_LATENCY * 3 < worktime < timeout and timeout < etimeout, 'Testcase parameters validation failed' # Note: we need another execution pool to set memlimit (10 Mb) there epoolMem = 0.2 # Execution pool mem limit, Gb msmall = 256 # Small amount of memory for a job, bytes # Start not more than 3 simultaneous workers with ExecPool(max(_WPROCSMAX, 3), latency=_TEST_LATENCY, memlimit=epoolMem) as xpool: tstart = time.perf_counter() jmsDb = Job('jmem_small_ba', args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)), category='cat1', size=9, timeout=timeout) jmb = Job('jmem_badalloc', args=(PYEXEC, '-c', allocDelayProg(inBytes(_RAM_SIZE * 2), worktime)), category='cat1', size=9, timeout=timeout) jmvsize = 5 # Size of the task violating memory contraints jmv = Job('jmem_violate', args=(PYEXEC, '-c', allocDelayProg(inBytes(epoolMem * 2), worktime)), category='cat2', size=jmvsize, timeout=timeout) jmsDvs = Job('jmem_small_v1', args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)), category='cat2', size=jmvsize - 1, timeout=timeout) jms1 = Job('jmem_small_1', args=(PYEXEC, '-c', allocDelayProg(None, worktime)), category='cat3', size=7, timeout=timeout) jmsDvl1 = Job('jmem_large_v', args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)), category='cat2', size=jmvsize, timeout=timeout) jms2 = Job('jmem_small_2', args=(PYEXEC, '-c', allocDelayProg(msmall, worktime)), size=7, timeout=timeout) jmsDvl2 = Job('jmem_small_v1', args=(PYEXEC, '-c', allocDelayProg(None, worktime)), category='cat2', size=jmvsize * 2, timeout=timeout) xpool.execute(jmsDb) xpool.execute(jmb) xpool.execute(jmv) xpool.execute(jmsDvs) xpool.execute(jms1) xpool.execute(jmsDvl1) xpool.execute(jms2) xpool.execute(jmsDvl2) time.sleep(worktime / 3) # Wait for the Job starting and memory allocation # Verify exec pool completion before the timeout self.assertTrue(xpool.join(etimeout)) etime = time.perf_counter() - tstart # Execution time # Verify timings self.assertLess(etime, etimeout) self.assertGreaterEqual( jmsDb.tstop - jmsDb.tstart, worktime ) # Note: internal errors in the external processes should not effect related jobs self.assertTrue(jmb.proc.returncode ) # bad_alloc causes non zero termintion code self.assertLess( jmb.tstop - jmb.tstart, worktime ) # Early termination cased by the bad_alloc (internal error in the external process) self.assertLess( jmv.tstop - jmv.tstart, worktime ) # Early termination by the memory constraints violation self.assertGreaterEqual( jmsDvs.tstop - jmsDvs.tstart, worktime ) # Smaller size of the ralted chained job to the vioated origin should not cause termination self.assertGreaterEqual( jms1.tstop - jms1.tstart, worktime) # Independent job should have graceful completion self.assertFalse(jms1.proc.returncode ) # Errcode code is 0 on the gracefull completion if _CHAINED_CONSTRAINTS: self.assertIsNone( jmsDvl1.tstart ) # Postponed job should be terminated before being started by the chained relation on the memory-violating origin self.assertIsNone( jmsDvl2.tstart ) # Postponed job should be terminated before being started by the chained relation on the memory-violating origin #self.assertLess(jmsDvl1.tstop - jmsDvl1.tstart, worktime) # Early termination by the chained retalion to the mem violated origin self.assertGreaterEqual( jms2.tstop - jms2.tstart, worktime) # Independent job should have graceful completion