-
Notifications
You must be signed in to change notification settings - Fork 0
/
SimDisk.py
executable file
·452 lines (379 loc) · 15.9 KB
/
SimDisk.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
#!/usr/bin/python
#
# Ceph - scalable distributed file system
#
# Copyright (C) Inktank
#
# This is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 2.1, as published by the Free Software
# Foundation. See file COPYING.
#
"""
This is intended to be able to simulate disk drives with a range of
smarts. It is probably pretty accurate with respect to random access
performance (which is dominated by physics) but only a crude approximation
for small-block sequential access (which is dominated by caching.
Fortunately, small-block streaming is not (for us) a typical load.
"""
import math
from units import *
class Disk:
""" Performance Modeling Disk Simulation. """
# fundamental drive characterizing parameters
# default values represent enterprise state-of-the-art
settle_read = 800 # us: optimistic read settle-down
write_delta = 600 # us: penalty for full settle-down
max_seek = 13000 # us: full stroke seek time
avg_seek = 5500 # us: full stroke/3
nr_requests = 128 # max concurrent queued operations
do_writeback = True # drive does write-back (vs writethrough)
do_readahead = True # drive does read-ahead caching
sched_rotate = True # latency optimization scheduling
# pseudo-magic numbers to approximate complex behavior
cache_multiplier = 96 # ideal read-ahead
cache_max_tracks = 4 # max amount to cache
cache_max_depth = 5 # max depth multiplier
# FIX: it would be nice if this could be expressed in terms
# of max_sectors_kb and readahead_kb
def __init__(self, rpm=7200, size=2 * TERA,
bw=150 * MEG, heads=10):
""" Instantiate a disk simulation. """
self.rpm = rpm
self.size = size
self.media_speed = bw
self.heads = heads
self.desc = "%dRPM Disk" % rpm
# infer track/cylinder size from rpm and media speed
self.trk_size = bw / (rpm / 60)
self.cyl_size = self.trk_size * heads
self.cylinders = size / self.cyl_size
def cylinders_in(self, bytes):
""" determine how many cylinders a byte range spans """
return 1 + (bytes / self.cyl_size)
# Real seek time is quite complex, involving acceleration,
# deceleration, and settle-down. I approximate this by
# choosing the lesser of two functions:
#
# Long seeks are easy to do well because they are
# an afine-linear function of the distance. Compute the
# crusing speed based on the difference between the average
# and max seek times, and then extrapolate that back
# (from the maximum seek time).
#
# Short seeks are much harder. But we do know that a single
# cylinder seek takes about the same amount of time as a
# read settle-down. I arbitrarily attribute half of the
# read settle-down time to head motion, and multiply that
# by the number of cylinders
#
def seekTime(self, cyls, read=True):
""" Time (us) to perform a seek across # cylinders. """
if cyls < 1:
return 0
elif cyls >= self.cylinders:
travel = self.max_seek
else:
# compute equilibrium long seek rate and extrapolate back
delta_us = self.max_seek - self.avg_seek
delta_cyl = 2 * self.cylinders / 3
us_per_cyl = float(delta_us) / delta_cyl # marginal seek speed
long_seek = self.max_seek - ((self.cylinders - cyls) * us_per_cyl)
# crudely estimate short seek rate from the read settle_down time
short_seek = self.settle_read + ((cyls - 1) * self.settle_read / 2)
# choose the lesser of these two numbers
travel = min(short_seek, long_seek)
return travel if read else travel + self.write_delta
# Transfer time is also pretty easy. The main trick here is that
# I include the cost of operations that spill over into the next
# cylinder
def xferTime(self, bytes, read=True):
""" Time (us) to perform a read or write of # bytes. """
# to the extent that cache hits are the result of read-ahead, even
# cached data reads are ultimately limited to media speed
time = bytes * SECOND / self.media_speed
# consider possibility this operation will spill into the next cylinder
# (track skew makes track overflow a non-issue)
seeks = float(bytes) / self.cyl_size
time += seeks * \
(self.settle_read if read else self.settle_read + self.write_delta)
return time
#
# VOODO ALERT
#
# There are thousands of lines of code in a disk controller
# to implement read-ahead and write-back caching ... which
# dominate small sequential I/O and small random writes.
#
# This is a painfully poor approximation of that stuff, but it
# kind of matches some observed behavior and is only intended
# to put a box around the expected performance.
#
def cache_size(self, size, read, depth=1):
""" Estimate a non-aggressive read-ahead cache size """
# make sure that caching is enabled
if read and not self.do_readahead:
return 0
if not read and not self.do_writeback:
return 0
# 1. don't try to get more than a track ahead
if size > self.trk_size:
return 0
# 2. we cache up to a maximum mulltiplier
c = size * self.cache_multiplier
# 3. we are willing to go farther if we see more requests
c *= min(depth, self.cache_max_depth)
# 4. but only up to a total maximum amount
m = self.cache_max_tracks * self.trk_size
return min(c, m)
# this method tries to simulate the interplay of
# queue depth, read-ahead, and write back to figure
# out how often we can avoid rotational latency waits
def latency(self, size, read=True, seq=True, depth=1):
""" Time (us) a request is likely to incur awaiting rotation """
# start out with the average rotational latency
l = (SECOND / (self.rpm / 60)) / 2 if self.rpm > 0 else 0
# figure out how many of these operations I can cache
c = self.cache_size(size, read, depth)
n = (c / size) if c > size else 1
# sequential is about caching AND seek/latency optimization
if seq:
if n > 1:
return l / n # 1 op in N is spills out of the cache
if depth > 1:
return l / depth # latency optimize queued requests
# random is mostly seek/latency optimization
elif self.sched_rotate:
if read: # best among parallel requests
l /= depth
else:
if depth > n: # best among parallel requests
l /= depth
elif n > 1: # best among cached writes
l /= n
elif c > 0:
l /= 2 # mere writeback is two requeest queueing
return l
# this method ties all the rest together into a simulation
# of the average time to do a standard throughput test
# (for random I/O we ignore coincidental same-cylinder hits)
def avgTime(self, bsize, file_size, read=True, seq=True, depth=1):
""" average operation time (us) for a specified test. """
# transfer time includes intra-transfer-seeks
tXfer = self.xferTime(bsize, read)
# requests can't queue deeper than the drive supports
if depth > self.nr_requests:
depth = self.nr_requests
tLatency = self.latency(bsize, read, seq, depth)
if seq:
return tXfer + tLatency
else:
cyls = self.cylinders_in(file_size)
avgcyls = cyls / (depth + 2)
tSeek = self.seekTime(avgcyls, read)
return tXfer + tLatency + tSeek
# convenience functions to plug in operation (and optionally seq)
def avgRead(self, bsize, file_size, seq=False, depth=1):
""" average time (us) for a specified read test. """
return self.avgTime(bsize, file_size, read=True, seq=seq, depth=depth)
def avgWrite(self, bsize, file_size, seq=False, depth=1):
""" average time (us) for a specified write test. """
return self.avgTime(bsize, file_size, read=False, seq=seq, depth=depth)
#
# To save people the trouble of figuring out which parameters
# to cripple to create a dumb disk, I supply one
#
class DumbDisk(Disk):
def __init__(self, rpm=7200, size=2 * TERA,
bw=150 * MEG, heads=10):
""" Instantiate a dumb disk simulation. """
Disk.__init__(self, rpm, size, bw, heads)
self.do_writeback = False
self.do_readahead = False
self.sched_rotate = False
self.nr_requests = 1
self.settle_read = 1000
self.write_delta = 1000
self.max_seek = 20000
self.avg_seek = 8000
self.desc = "%dRPM Dumb Disk" % rpm
#
# SSD's are much simpler than disks
#
# FIX - the max IOPS modeling is way too simplistic
# it is depth dependent
# d=1 5000 * 4K/s
# d=8 32000 * 4K/s
# d=32 48000 * 4K/s (hit the b/w limit)
class SSD(Disk):
""" Performance Modeling SSD simulation. """
def __init__(self, size, bw=200 * MEG, iops=20000, streams=1):
self.size = size
self.media_speed = bw
self.max_iops = iops # single stream
self.nr_requests = streams
# magic numbers to model more complex behavior
self.write_penalty = 1.05 # allocation overhead
# tell a consistent story about the device
self.rpm = 0
self.settle_read = 0
self.write_delta = 0 # is there a write cost?
self.max_seek = 0
self.avg_seek = 0
self.heads = 1
self.cylinders = 1
self.cyl_size = self.size / self.cylinders
self.trk_size = self.cyl_size / self.heads
self.desc = "SSD"
def avgTime(self, bsize, file_size, read=True, seq=True, depth=1):
""" average operation time (us) for a specified test. """
tXfer = self.xferTime(bsize, read)
if not read:
tXfer *= self.write_penalty
# IOPS limitations ... which depend on the number of streams
setup = SECOND / self.max_iops
setup /= depth if depth < self.nr_requests else self.nr_requests
return setup + tXfer
#
# helper function to instantiate a disk simulation from a dict
#
def makedisk(dict):
""" instantiate the disk described by a configuration dict
device -- type of device to create (default disk)
size -- usable space (default 2TB)
rpm -- rotational speed (default 7200 RPM)
speed -- max transfer speed (default 150MB/s)
iops -- max iops
heads -- number of heads
streams -- max concurrent streams
"""
disk_parms = { # default parameters for spinning disks
'device': 'disk',
'size': 2 * TERA,
'speed': 150 * MEG,
'rpm': 7200,
'heads': 10,
}
ssd_parms = { # default parameters for SSDs
'device': 'ssd',
'size': 20 * GIG,
'speed': 200 * MEG,
'iops': 20000,
'streams': 1,
}
# figure out what type of device this is
dev = dict['device'] if 'device' in dict else 'disk'
if dev == 'ssd':
dflt = ssd_parms
sz = dict['size'] if 'size' in dict else dflt['size']
spd = dict['speed'] if 'speed' in dict else dflt['speed']
iops = dict['iops'] if 'iops' in dict else dflt['iops']
strm = dict['streams'] if 'streams' in dict else dflt['streams']
disk = SSD(sz, spd, iops=iops, streams=strm)
else:
dflt = disk_parms
sz = dict['size'] if 'size' in dict else dflt['size']
spd = dict['speed'] if 'speed' in dict else dflt['speed']
rpm = dict['rpm'] if 'rpm' in dict else dflt['rpm']
heads = dict['heads'] if 'heads' in dict else dflt['heads']
if dev == "dumb":
disk = DumbDisk(rpm, sz, spd, heads=heads)
else:
disk = Disk(rpm, sz, spd, heads=heads)
return disk
#
# test methods
#
from Report import Report
#
# dump out the parameters and selected transfer/seek times
#
def diskparms(disk):
""" compute & display basic performance data for a simulated disk
disk -- device to be tested
"""
print(" basic disk parameters:")
print("\tdrive size\t%d GB" % gig(disk.size))
print("\trpm \t%d" % disk.rpm)
print("\txfer rate \t%d MB/s" % meg(disk.media_speed))
print("\tseek time \t%d-%dus, avg %dus" %
(disk.settle_read, disk.max_seek, disk.avg_seek))
print("\twrite back\t%s" % ("True" if disk.do_writeback else "False"))
print("\tread ahead\t%s" % ("True" if disk.do_readahead else "False"))
print("\tmax depth \t%d" % disk.nr_requests)
print("\n computed performance parameters:")
rot = 0 if disk.rpm == 0 else (MEG / (disk.rpm / 60))
print("\trotation \t%dus" % (rot))
print("\ttrack size \t%d bytes" % disk.trk_size)
print("\theads \t%d" % disk.heads)
print("\tcylinders \t%d" % disk.cylinders)
print("\n data transfer times:")
print("\t size time iops")
for bs in (4096, 128 * 1024, 4096 * 1024):
t = disk.xferTime(bs)
r = 1000000 / t
print("\t%6dK %7dus %7d" % (kb(bs), t, r))
print("\n seek times:")
print("\t cyls read write")
cyls = 1
while cyls < disk.cylinders * 10:
print("\t%7d %7dus %7dus" %
(cyls, disk.seekTime(cyls), disk.seekTime(cyls, read=False)))
cyls *= 10
print("")
#
# a basic throughput serries, driven by a dict
#
def tptest(disk, dict, descr="Estimated Throughput"):
"""
run a standard set of throughputs against a specified device
disk -- device to be tested
dict --
FioRsize ... size of test file
FioRdepths ... list of request depths
FioRbs ... list of block sizes
filesize -- size of the file used for the test
depth -- number of queued parallel operations
"""
dflt = { # default throughput test parameters
'FioRsize': 16 * GIG,
'FioRdepth': [1, 32],
'FioRbs': [4096, 128 * 1024, 4096 * 1024],
}
sz = dict['FioRsize'] if 'FioRsize' in dict else dflt['FioRsize']
depths = dict['FioRdepth'] if 'FioRdepth' in dict else dflt['FioRdepth']
bsizes = dict['FioRbs'] if 'FioRbs' in dict else dflt['FioRbs']
r = Report(("seq read", "seq write", "rnd read", "rnd write"))
for depth in depths:
print("%s (%s), depth=%d" % (descr, disk.desc, depth))
r.printHeading()
for bs in bsizes:
# run the simulations
tsr = disk.avgTime(bs, sz, read=True, seq=True, depth=depth)
tsw = disk.avgTime(bs, sz, read=False, seq=True, depth=depth)
trr = disk.avgTime(bs, sz, read=True, seq=False, depth=depth)
trw = disk.avgTime(bs, sz, read=False, seq=False, depth=depth)
# compute the corresponding bandwidths
bsr = bs * SECOND / tsr
bsw = bs * SECOND / tsw
brr = bs * SECOND / trr
brw = bs * SECOND / trw
r.printBW(bs, (bsr, bsw, brr, brw))
# compute the corresponding IOPS
isr = SECOND / tsr
isw = SECOND / tsw
irr = SECOND / trr
irw = SECOND / trw
r.printIOPS(0, (isr, isw, irr, irw))
# print out the latencies
r.printLatency(0, (tsr, tsw, trr, trw))
print("")
#
# run a standard test series
#
if __name__ == '__main__':
for d in ('disk', 'ssd'):
disk = makedisk({'device': d})
print("\nDefault %s simulation" % (d))
diskparms(disk)
tptest(disk, {})