forked from tobixen/thrash-protect
/
thrash-protect.py
executable file
·426 lines (372 loc) · 14.9 KB
/
thrash-protect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
#!/usr/bin/python
"""
Simple-Stupid user-space program protecting a linux host from thrashing.
See the README for details.
Project home: https://github.com/tobixen/thrash-protect
This is a rapid prototype implementation. I'm considering to implement in C.
This was written for python3 (there exists a python24-branch, but
it won't be maintained). python3 is not available on a lot of
servers, and those seems to be the only snags when running on
python 2.5:
"""
from __future__ import with_statement
try:
ProcessLookupError
except NameError:
ProcessLookupError = OSError
try:
FileNotFoundError
except NameError:
FileNotFoundError = IOError
__version__ = "0.8.1"
__author__ = "Tobias Brox"
__copyright__ = "Copyright 2013, Tobias Brox"
__license__ = "GPL"
__maintainer__ = "Tobias Brox"
__email__ = "tobias@redpill-linpro.com"
__status__ = "Development"
__product__ = "thrash-protect"
#########################
## Configuration section
#########################
import os
import time
import glob
import signal
import logging
import random # for the test_mode
def getenv(name, default=None, cvt=None):
"Get an environment value and pass it through the `cvt` function."
try:
val = os.environ[name]
return cvt(val) if cvt else val
except KeyError:
return default
def intenv(name, default=None):
"Return an integer value from the environment."
return getenv(name, default, int)
def floatenv(name, default=None):
"Return a float value from the environment."
return getenv(name, default, float)
## Sleep interval, in seconds
interval = floatenv('THRASH_PROTECT_INTERVAL', 0.5)
## Number of acceptable page swaps during the above interval
swap_page_threshold = intenv('THRASH_PROTECT_SWAP_PAGE_THRESHOLD', 512)
## After X number of major pagefaults, we should initiate a process scanning
pgmajfault_scan_threshold = intenv('THRASH_PROTECT_PGMAJFAULT_SCAN_THRESHOLD',
swap_page_threshold)
## process name whitelist
cmd_whitelist = os.getenv('THRASH_PROTECT_CMD_WHITELIST', '').split()
_default_whitelist = ['sshd', 'bash', 'xinit', 'X', 'spectrwm', 'screen',
'SCREEN', 'mutt', 'ssh', 'xterm', 'rxvt', 'urxvt']
cmd_whitelist = cmd_whitelist if cmd_whitelist else _default_whitelist
cmd_blacklist = os.getenv('THRASH_PROTECT_CMD_BLACKLIST', '').split(' ')
blacklist_score_multiplier = intenv('THRASH_PROTECT_BLACKLIST_SCORE_MULTIPLIER',
16)
whitelist_score_divider = intenv('THRASH_PROTECT_BLACKLIST_SCORE_MULTIPLIER',
blacklist_score_multiplier * 4)
## Unfreezing processes: Ratio of POP compared to GET (integer)
unfreeze_pop_ratio = int(os.getenv('THRASH_PROTECT_UNFREEZE_POP_RATIO', '5'))
## test_mode - if test_mode and not random.getrandbits(test_mode),
## then pretend we're thrashed
test_mode = int(os.getenv('THRASH_PROTECT_TEST_MODE', '0'))
## Poor mans logging. Should eventually set up the logging module
#debug = print
debug = lambda foo: None
def get_pagefaults():
for line in open('/proc/vmstat'):
if line.startswith('pgmajfault '):
return int(line[12:])
def get_swapcount():
return tuple(int(line.split(' ', 1)[1])
for line in open('/proc/vmstat')
if line.startswith('pswp'))
def check_swap_threshold(curr, prev):
#global swap_page_threshold
global busy_runs
if test_mode and not random.getrandbits(test_mode):
busy_runs += 1
return True
## will return True if we have bidirectional traffic to swap, or if we have
## a big one-directional flow of data
ret = (curr[0] - prev[0] + 1.0 / swap_page_threshold) * (curr[1] - prev[1] + 1.0 / swap_page_threshold) > 1.0
## Increase or decrese the busy-counter
if ret:
busy_runs += 1
elif busy_runs:
busy_runs -= 1
return ret
def scan_processes():
debug("scan_processes")
global scan_method_count
## sorted from cheap to expensive. Also, it is surely smart to be quick
## on refreezing a recently unfrozen process if host starts thrashing again.
scan_methods = [find_last_unfrozen_process,
scan_processes_oom_score,
scan_processes_pagefaults]
## a for loop here to make sure we fall back on the next method if
## the first method fails to find anything.
for _i in range(len(scan_methods)):
debug("scan method: %s" % (scan_method_count % len(scan_methods)))
ret = scan_methods[scan_method_count % len(scan_methods)]()
scan_method_count += 1
if ret:
return ret
debug("found nothing to stop!? :-(")
def scan_processes_oom_score():
oom_scores = glob.glob('/proc/*/oom_score')
_max = 0
worstpid = None
for fn in oom_scores:
try:
pid = int(fn.split('/')[2])
except ValueError:
continue
try:
with open(fn, 'r') as oom_score_file:
oom_score = int(oom_score_file.readline())
with open("/proc/%d/stat" % pid, 'r') as stat_file:
stats = stat_file.readline().split(' ')
state = stats[2]
cmd = stats[1][1:].split('/')[0].split(')')[0]
if 'T' in state:
debug("oom_score: %s, cmd: %s, pid: %s, state: %s - no touch" %
(oom_score, cmd, pid, state))
continue
except FileNotFoundError:
continue
if oom_score > 0:
debug("oom_score: %s, cmd: %s, pid: %s" % (oom_score, cmd, pid))
if cmd in cmd_whitelist:
oom_score /= whitelist_score_divider
if cmd in cmd_blacklist:
oom_score *= blacklist_score_multiplier
if oom_score > _max:
## ignore self
if pid == os.getpid():
continue
_max = oom_score
worstpid = pid
debug("oom scan completed - selected pid: %s" % worstpid)
return worstpid
def find_last_unfrozen_process():
"""
If a process was just resumed and the system start thrashing again, it
would probably be smart to freeze that process again. This is also
a very cheap operation.
"""
#global last_unfrozen_pid
debug("last unfrozen_pid is %s" % last_unfrozen_pid)
if last_unfrozen_pid in frozen_pids:
debug("last unfrozen_pid is already frozen")
return None
debug("last unfrozen process return - selected pid: %s" % last_unfrozen_pid)
return last_unfrozen_pid
def scan_processes_pagefaults():
## TODO: consider using oom_score instead of major page faults?
## TODO: garbage collection
#global pagefault_by_pid
global last_scan_pagefaults
#global busy_runs
last_scan_pagefaults = get_pagefaults()
stat_files = glob.glob('/proc/*/stat')
_max = 0
worstpid = None
for fn in stat_files:
try:
pid = int(fn.split('/')[2])
except ValueError:
continue
try:
# double try to keep it compatible with both python 2.5
# and python 3.0
try:
with open(fn, 'r') as stat_file:
stats = stat_file.readline().split(' ')
majflt = int(stats[11])
cmd = stats[1][1:].split('/')[0].split(')')[0]
except FileNotFoundError:
continue
except ProcessLookupError:
continue
if majflt > 0:
prev = pagefault_by_pid.get(pid, 0)
pagefault_by_pid[pid] = majflt
diff = majflt - prev
if test_mode:
diff += random.getrandbits(3)
if not diff:
continue
if cmd in cmd_blacklist:
diff *= blacklist_score_multiplier
if cmd in cmd_whitelist:
diff /= whitelist_score_divider
if diff > _max:
## ignore self
if pid == os.getpid():
continue
_max = diff
worstpid = pid
debug("pagefault score: %s, cmd: %s, pid: %s" % (diff, cmd, pid))
debug("pagefault scan completed - selected pid: %s" % worstpid)
## give a bit of protection against whitelisted and innocent processes
## being stopped
## (TODO: hardcoded constants)
if _max > 4.0 / (busy_runs + 1.0):
return worstpid
## hard coded logic as for now. One state file and one log file.
## state file can be monitored, i.e. through nagios. todo: support
## smtp etc.
def log_frozen(pid):
with open("/var/log/thrash-protect.log", 'a') as logfile:
logfile.write("%s - frozen pid %s - frozen list: %s\n" %
(time.time(), pid, frozen_pids))
with open("/tmp/thrash-protect-frozen-pid-list", "w") as logfile:
logfile.write(" ".join([str(x) for x in frozen_pids]))
def log_unfrozen(pid):
with open("/var/log/thrash-protect.log", 'a') as logfile:
logfile.write("%s - unfrozen pid %s\n" % (time.time(), pid))
if frozen_pids:
with open("/tmp/thrash-protect-frozen-pid-list", "w") as logfile:
logfile.write(" ".join([str(pid) for pid in frozen_pids]) + "\n")
else:
try:
os.unlink("/tmp/thrash-protect-frozen-pid-list")
except FileNotFoundError:
pass
def freeze_something():
#global frozen_pids
global num_freezes
pid_to_freeze = scan_processes()
if not pid_to_freeze:
## process disappeared. ignore failure
return
try:
os.kill(pid_to_freeze, signal.SIGSTOP)
except ProcessLookupError:
return
if not pid_to_freeze in frozen_pids:
frozen_pids.append(pid_to_freeze)
## Logging after freezing, as logging itself may be resource
## and timeconsuming. Perhaps we should even fork it out.
debug("going to freeze %s" % pid_to_freeze)
log_frozen(pid_to_freeze)
num_freezes += 1
def unfreeze_something():
global frozen_pids
global num_unfreezes
global last_unfrozen_pid
if frozen_pids:
## queue or stack? Seems like both approaches are problematic
if num_unfreezes % unfreeze_pop_ratio:
pid_to_unfreeze = frozen_pids.pop()
else:
## no list.get() in python?
pid_to_unfreeze = frozen_pids[0]
frozen_pids = frozen_pids[1:]
try:
debug("going to unfreeze %s" % pid_to_unfreeze)
os.kill(pid_to_unfreeze, signal.SIGCONT)
## Sometimes the parent process also gets suspended.
## TODO: we're doing some simple assumptions here:
## 1) this problem only applies to process group id or session id
## (we probably need to walk through all the parents - or maybe
## just the ppid?)
## 2) it is harmless to CONT the pgid and sid. This may not
## always be so.
## To correct this, we may need to traverse parents
## (peeking into /proc/<pid>/status recursively) prior to freezing
## the proc.
## all parents that aren't already frozen should be added to the
## unfreeze stack
os.kill(os.getpgid(pid_to_unfreeze), signal.SIGCONT)
os.kill(os.getsid(pid_to_unfreeze), signal.SIGCONT)
except ProcessLookupError:
## ignore failure
pass
last_unfrozen_pid = pid_to_unfreeze
log_unfrozen(pid_to_unfreeze)
num_unfreezes += 1
def thrash_protect():
global last_observed_swapcount
#global last_scan_pagefaults
global busy_runs
global last_time
#global frozen_pids
global scan_method_count
## A best-effort attempt on running mlockall()
try:
import ctypes
assert not ctypes.cdll.LoadLibrary('libc.so.6').mlockall(ctypes.c_int(3))
except:
logging.warning(
"failed to do mlockall() - this makes the program vulnerable of "
"being swapped out in an extreme thrashing event", exc_info=True)
while True:
busy = False
current_swapcount = get_swapcount()
current_pagefaults = get_pagefaults()
busy = check_swap_threshold(current_swapcount, last_observed_swapcount)
## If we're thrashing, then freeze something.
if busy:
freeze_something()
elif not busy_runs and current_swapcount == last_observed_swapcount:
## If no swapping has been observed for a while then
## unfreeze something.
scan_method_count = 0
unfreeze_something()
diff_pagefaults = current_pagefaults - last_scan_pagefaults
if diff_pagefaults > pgmajfault_scan_threshold:
## If we've had a lot of major page faults, refresh our state
## on major page faults.
scan_processes_pagefaults()
last_observed_swapcount = current_swapcount
## If the script is significantly delayed it's most likely due to
## thrashing, and we should increase the busy counter and sleep less.
delay = time.time() - last_time
debug("delay in processing: %s" % delay)
## if delay is significant, bump busy_runs. TODO: hard-coded
## constants ... should be moved to configuration
if delay > interval / 16.0:
busy_runs += 1
last_time = time.time()
debug("interval: %s busy_runs: %s time: %s frozen pids: %s" % (
interval, busy_runs, time.time(), frozen_pids))
## If we haven't been busy for a while, or if this run apparently was
## non-busy, then sleep a bit.
if not busy_runs or not busy:
## TODO: bitshifting would probably be better;
## sleep_interval_microseconds = interval_microseconds >> busy_runs
sleep_interval = interval / (busy_runs + 1.0)
debug("going to sleep %s" % sleep_interval)
time.sleep(sleep_interval)
delay = time.time() - last_time - sleep_interval
last_time = time.time()
debug("slept: %s + delay %s" % (sleep_interval, delay))
## if delay is significant, bump busy_runs.
## TODO: hard-coded constants
if delay > interval / 16.0:
busy_runs += 1
if __name__ == '__main__':
## Globals
last_observed_swapcount = get_swapcount()
last_scan_pagefaults = 0
last_unfrozen_pid = None
scan_method_count = 0
pagefault_by_pid = {}
frozen_pids = []
num_freezes = 0
num_unfreezes = 0
busy_runs = 0
last_time = time.time()
try:
import argparse
p = argparse.ArgumentParser(
description="protect a linux host from thrashing")
p.add_argument('--version', action='version',
version='%(prog)s ' + __version__)
args = p.parse_args()
except ImportError:
## argparse is only available from 2.7 and up
args = None
thrash_protect()