forked from tadeck/twicecache
-
Notifications
You must be signed in to change notification settings - Fork 0
/
storage.py
580 lines (492 loc) · 21.8 KB
/
storage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
from twisted.internet import reactor, protocol, defer
from twisted.enterprise import adbapi
from twisted.protocols.memcache import MemCacheProtocol, DEFAULT_PORT
from twisted.python import log
import traceback, urllib, time
import cache, http, mail
import random # for ab testing
import cPickle as pickle
class DataStore:
# Status codes
uncacheable_status = [500, 502, 503, 504, 304, 307]
uncacheable_methods = ['POST', 'PUT', 'DELETE']
short_status = [404]
def __init__(self, config):
self.config = config
# Mecache Backend
servers = config.get('backend_memcache').split(',')
log.msg('Creating connections to backend_memcache servers %s...' % ','.join(servers))
try:
import mc
except:
log.msg('Failed to import memcache helper library!')
log.msg(traceback.format_exc())
return
try:
self.proto = mc.Mc(servers)
log.msg('backend_memcache OK')
except:
log.msg('Failed ot create memcache object!')
log.msg(traceback.format_exc())
# Viewdb Backend
servers = config.get('backend_viewdb').split(',')
log.msg('Creating connections to backend_viewdb servers %s...' % ','.join(servers))
try:
import mc
except:
log.msg('Failed to import memcache helper library!')
log.msg(traceback.format_exc())
return
try:
self.viewdb = mc.Mc(servers)
log.msg('backend_viewdb OK')
except:
log.msg('Failed ot create memcache object!')
log.msg(traceback.format_exc())
# Database Backend
try:
self.db = adbapi.ConnectionPool("pyPgSQL.PgSQL",
database=config['backend_dbname'],
host=config['backend_dbhost'],
user=config['backend_dbuser'],
password=config['backend_dbpass'],
cp_noisy=True,
cp_reconnect=True,
cp_min=5,
cp_max=20,
)
log.msg("Connected to db.")
except ImportError:
mail.error("Could not import PyPgSQL!\n%s" % traceback.format_exc())
except:
mail.error("Unable to connect to backend database.\n%s" % traceback.format_exc())
# AB Testing Groups
self.abTestingGroups = {}
self.loadAbTestingGroups()
# HTTP Backend
try:
self.backend_host, self.backend_port = self.config['backend_webserver'].split(':')
self.backend_port = int(self.backend_port)
except:
self.backend_host = self.config['backend_webserver']
self.backend_port = 80
# Cache Backend
log.msg('Initializing cache...')
cache_type = config['cache_type'].capitalize() + 'Cache'
self.cache = getattr(cache, cache_type)(config)
# Memorize variants of a uri
self.uri_lookup = {}
# Request pileup queue
self.pending_requests = {}
# Init status
def dbConnected(self, db):
log.msg('Database connection success.')
self.db = db
# Main methods
def get(self, keys, request, force=False):
if not isinstance(keys, list): keys = [keys]
if force:
d = self.handleMisses(dict(zip(keys, [None for key in keys])), request)
else:
d = defer.maybeDeferred(self.cache.get, keys)
d.addCallback(self.handleMisses, request)
d.addErrback(self.getError)
return d
def delete(self, keys):
if not isinstance(keys, list): keys = [keys]
self.cache.delete(keys)
def flush(self):
"Flush entire cache"
self.cache.flush()
def handleMisses(self, dictionary, request):
"Process hits, check for validity, and fetch misses / invalids"
missing_deferreds = []
missing_elements = []
for key, value in dictionary.items():
fetch = False
if value is None:
log.msg('MISS [%s]' % key)
fetch = True
elif not getattr(self, 'valid_' + self.elementType(key))(request, self.elementId(key), value):
log.msg('INVALID [%s]' % key)
fetch = True
else:
log.msg('HIT [%s]' % key)
if fetch:
d = defer.maybeDeferred(getattr(self, 'fetch_' + self.elementType(key)), request, self.elementId(key))
d.addErrback(self.fetchError, key)
missing_deferreds.append(d)
missing_elements.append(key)
# Wait for all items to be fetched
if missing_deferreds:
deferredList = defer.DeferredList(missing_deferreds)
deferredList.addCallback(self.returnElements, dictionary, missing_elements)
return deferredList
else:
return defer.succeed(dictionary)
def returnElements(self, results, dictionary, missing_elements):
if not isinstance(results, list): results = [results]
uncached_elements = dict([(key, results.pop(0)[1]) for key in missing_elements])
dictionary.update(uncached_elements)
return dictionary
def fetchError(self, result, key):
log.msg('Error calling fetch_%s for key %s' % (self.elementType(key), self.elementId(key)))
log.msg(result.getErrorMessage().replace('\n', ' '))
result.printTraceback()
return {}
def getError(self, dictionary):
log.msg('uh oh! %s' % dictionary)
traceback.print_exc()
# Hashing
def elementHash(self, request, element_type, element_id = None):
"Hash function for elements"
return getattr(self, 'hash_' + element_type.lower())(request, element_id)
# elementType's are not allowed to have _ in them because we use those in composing ids
def elementType(self, key):
return key.split('_')[0]
def elementId(self, key):
return '_'.join(key.split('_')[1:])
# Expirations
def hash_expiration(self, request, id):
return 'expiration_' + request.uri.rstrip("?")
def fetch_expiration(self, request, id):
return False
def valid_expiration(self, request, id, value):
return True
# Page
def hash_page(self, request, id=None, cookies = [], abdependency = [], abvalue = {}):
# Hash the request key
key = 'page_' + (request.getHeader('x-real-host') or request.getHeader('host') or '') + request.uri.rstrip("?")
# Internationalization salt
if self.config.get('hash_lang_header'):
header = request.getHeader('accept-language') or self.config.get('hash_lang_default', 'en-us')
if header:
try:
lang = header.replace(' ', '').split(';')[0].split(',')[0].lower()
#log.msg('lang: %s' % lang)
key += '//' + lang
except:
traceback.print_exc()
if abdependency:
ab_identifier = ','.join(':'.join([key, abvalue.get(key, '')]) for key in abdependency)
key += '//' + ab_identifier
if cookies:
# Grab the cookies we care about from the request
found_cookies = []
for cookie in cookies:
val = request.getCookie(cookie)
if val:
found_cookies.append('%s=%s' % (cookie, val))
# Update key based on cookies we care about
if found_cookies:
key += '//' + ','.join(found_cookies)
log.msg("HASHED PAGE %s" % key)
return key
def fetch_page(self, request, id, ignoreResult=False):
# Prevent idental request pileup
key = self.hash_page(request)
if key in self.pending_requests and ignoreResult:
log.msg('PENDING: Request is already pending for %s' % request.uri)
return True
# Tell backend that we are Twice and strip cache-control headers
request.setHeader(self.config.get('twice_header'), 'true')
request.removeHeader('cache-control')
# Make the request
sender = http.HTTPRequestSender(request)
#sender.noisy = False
reactor.connectTCP(self.backend_host, self.backend_port, sender)
# Defer the result
d = sender.deferred.addCallback(self.extract_page, request).addErrback(self.page_failed, request)
self.pending_requests[key] = d
return d
def valid_page(self, request, id, value):
"Determine whether the page can be served from the cache"
# Force refetch of very stale (3x cache_control value) pages
now = time.time()
if now > value['rendered_on'] + value['cache_control'] * 3:
log.msg('STALE-HARD [%s]' % id)
return False
# Sevre semi-stale pages but refresh in the background
elif now > value['rendered_on'] + value['cache_control']:
log.msg('STALE-SOFT [%s]' % id)
# Extend the valid cache length by 30s so we can fetch it
response = value['response']
cookies = sorted((response.getHeader(self.config.get('cookies_header')) or '').split(','))
key = self.hash_page(request, cookies = cookies)
cache_control = response.getCacheControlHeader(self.config.get('cache_header')) or 0
if response.status in self.uncacheable_status:
log.msg('NO-CACHE (Status is %s) [%s]' % (response.status, key))
cache = False
elif response.status in self.short_status:
log.msg('SHORT-CACHE (Status is %s) [%s]' % (response.status, key))
cache = True
cache_control = 30
elif cache_control and cache_control > 0:
log.msg('CACHE [%s] (for %ss)' % (key, cache_control))
cache = True
else:
log.msg('NO-CACHE (No cache data) [%s]' % key)
cache = False
value['rendered_on'] += 30
if cache:
self.cache.set({key : value}, 60) # Give 60s to refresh the page
# Now fetch a fresh copy in the background
self.fetch_page(request, id, ignoreResult=True)
return True
# Do not serve cached versions of pages if the method is not cacheable
elif request.method.upper() in self.uncacheable_methods:
log.msg('PASS-THROUGH [%s]' % request.method.upper())
return False
# Valid page
else:
return True
def page_failed(self, response, request):
# Release pending lock
key = self.hash_page(request)
if key in self.pending_requests:
del self.pending_requests[key]
log.msg('ERROR: Could not retrieve [%s]' % request.uri.rstrip("?h"))
response.printBriefTraceback()
# TODO: Return something meaningful!
return ''
def extract_page(self, response, request):
# Release pending lock
key = self.hash_page(request)
if key in self.pending_requests:
del self.pending_requests[key]
# Extract uniqueness info
cookies = sorted((response.getHeader(self.config.get('cookies_header')) or '').split(','))
# Extract AB test dependencies
abvalue_string = request.getHeader(self.config.get('abvalue_header'))
if abvalue_string:
abvalue = dict(value.split(':') for value in abvalue_string.split(','))
else:
abvalue = {}
abdependency = sorted((response.getHeader(self.config.get('abdependency_header')) or '').split(','))
key = self.hash_page(request, cookies = cookies, abdependency = abdependency, abvalue = abvalue)
# Store uri variant
if key not in self.uri_lookup.setdefault(request.uri.rstrip("?"), []):
log.msg('Added new variant for %s: %s' % (request.uri.rstrip("?"), key))
self.uri_lookup[request.uri.rstrip("?")].append(key)
# Override for non GET's
if request.method.upper() in self.uncacheable_methods:
log.msg('NO-CACHE (Method is %s) [%s]' % (request.method, key))
cache = False
cache_control = 0
else:
# Cache logic
cache_control = response.getCacheControlHeader(self.config.get('cache_header')) or 0
if response.status in self.uncacheable_status:
log.msg('NO-CACHE (Status is %s) [%s]' % (response.status, key))
cache = False
elif response.status in self.short_status:
log.msg('SHORT-CACHE (Status is %s) [%s]' % (response.status, key))
cache = True
cache_control = 30
elif cache_control and cache_control > 0:
log.msg('CACHE [%s] (for %ss)' % (key, cache_control))
cache = True
else:
log.msg('NO-CACHE (No cache data) [%s]' % key)
cache = False
# Actual return value
value = {
'response' : response,
'rendered_on' : time.time(),
'cache_control' : cache_control,
}
if cache:
# save page
response.cookies = []
self.cache.set({key : value}, cache_control * 10) # 10x cache control length
if abdependency:
# save abdependency
abdependency_key = self.hash_abdependency(request)
self.cache.set({abdependency_key: abdependency}, cache_control * 10)
return value
# Memcache
def hash_memcache(self, request, id):
return 'memcache_' + id
def fetch_memcache(self, request, id):
#log.msg('Looking up memcache %s' % id)
return self.proto.get(id).addCallback(self.extract_memcache, request, id)
def extract_memcache(self, result, request, id):
# Un-comment for the twisted memcached library
#value = result and result[1]
value = result
key = self.hash_memcache(request, id)
self.cache.set({key: value}, 30) # 30 seconds
return value
def valid_memcache(self, request, id, value):
return True
def incr_memcache(self, key):
log.msg('Incrementing memcache %s' % key)
return self.proto.increment(key)
def decr_memcache(self, key):
log.msg('Decrementing memcache %s' % key)
return self.proto.decrement(key)
def delete_memcache(self, key):
self.cache.delete(key)
return self.proto.delete(key)
def set_memcache(self, key, val):
log.msg('Setting memcache %s' % key)
return self.proto.set(key, val)
# Viewdb
def hash_viewdb(self, request, id):
return 'viewdb_' + id
def fetch_viewdb(self, request, key):
#log.msg("Looking up viewdb %s" % key)
return self.viewdb.get(key).addCallback(self.extract_viewdb, request, key)
def extract_viewdb(self, result, request, id):
# Un-comment for the twisted memcached library
#value = result and result[1]
value = result
key = self.hash_viewdb(request, id)
self.cache.set({key: value}, 30) # 30 seconds
#log.msg('Set twice cache key %s as %s' % (key, value))
return value
def valid_viewdb(self, request, id, value):
return True
def incr_viewdb(self, key):
log.msg('Incrementing viewdb %s' % key)
self.viewdb.add(key, "0")
return self.viewdb.increment(key)
def set_viewdb(self, key, val):
log.msg('Setting viewdb %s' % key)
return self.viewdb.add(key, val)
# viewdb for unread message counts per user
def fetch_unread(self, request, id):
id = self._read_session(request)
key = "unread%s" % id
#log.msg("Looking up unread messages count for session user %s from viewdb" % key)
return self.viewdb.get(key).addCallback(self.extract_unread, request, key)
def hash_unread(self, request, id):
id = self._read_session(request)
return 'unread_%s' % id
def extract_unread(self, result, request, id):
log.msg("Extracting unread count from %s" % repr(result))
if len(result) > 1:
value = result and result[1] or "0"
else:
value = result or "0"
output = {'count' : value}
key = self.hash_unread(request, id)
self.cache.set({key: output}, 60) # one minute
#log.msg("Set twice cache key %s as %s" % ('count', value))
return output
def valid_unread(self, request, id, value):
return True
# geo location
def hash_geo(self, request, id):
return 'ip'
# ip address
def hash_ip(self, request, id):
return 'ip'
# User Session
def hash_session(self, request, id):
id = self._read_session(request)
if id:
return 'session_' + id
else:
return ''
def fetch_session(self, request, id):
id = self._read_session(request)
return self.db.runInteraction(self._session, id).addCallback(self.extract_session, request, id)
def extract_session(self, result, request, id):
if len(result) and len(result[0]):
session = dict(zip(result[0][0].keys(), result[0][0].values()))
session.update(dict(result[1]))
output = session
else:
output = {}
key = self.hash_session(request, id)
self.cache.set({key : output}, 86400) # 24 hours
return output
def valid_session(self, request, id, value):
return True
def _read_session(self, request):
return urllib.unquote(request.getCookie(self.config['session_cookie']) or '')
def _session(self, txn, id):
#log.msg('Looking up session %s' % id)
users_query = "select * from users where id = %s" % id
log.msg('Running query: %s' % users_query)
txn.execute(users_query)
users_result = txn.fetchall()
return [users_result]
# AB testing calls
def loadAbTestingGroups(self):
query = "select test_name, values_list from ab_testing_groups;"
self.db.runQuery(query).addCallback(self.addAbTestingGroups)
reactor.callLater(60.0, self.loadAbTestingGroups)
def addAbTestingGroups(self, rows):
self.abTestingGroups = {}
for row in rows:
try:
test_name, values_list = row[0], row[1]
values = [value.split(":") for value in values_list.split(",")]
self.abTestingGroups[test_name] = values
except:
log.msg("Error parsing ab test row %s" % repr(row))
log.msg("Finished adding ab testing groups from %s" % repr(rows))
def hash_abdependency(self, request, ignored=None):
return "abdependency_%s" % request.uri.rstrip("?")
def fetch_abdependency(self, request, ignored=None):
return None
def valid_abdependency(self, request, id, value):
return True
# ab tests are hashed by the current session and the test_name
def hash_abvalue(self, request, ignored=None):
return "abvalue_%s" % self.read_ab_cookie(request)
def fetch_abvalue(self, request, ignored=None):
log.msg('Looking up ab group: %s' % self.hash_abvalue(request))
if not hasattr(self, 'viewdb'):
log.msg("Not connected to the viewdb")
return self.extract_abvalue(None, request)
# we permanently save your ab group to the memcachedb box so that it's never lost
return self.viewdb.get(self.hash_abvalue(request)).addCallback(self.extract_abvalue, request)
def valid_abvalue(self, request, id, value):
return True
def extract_abvalue(self, result, request):
log.msg("Extracting abvalue from %s" % repr(result))
if result and result[1]:
try:
output = pickle.loads(result[1])
except:
log.msg("Exceptin depickling result")
traceback.print_exc()
output = {}
else:
output = {}
updated = False # whether we've assigned any new ab groups and need to resave to memcache
for test_name, value_list in self.abTestingGroups.iteritems():
if test_name not in output:
output[test_name] = self.pick_ab(value_list)
updated = True # picked a new ab value, need to resave
key = self.hash_abvalue(request)
self.cache.set({key: output}, 300) # 5 minutes
if updated and hasattr(self, 'viewdb'):
self.viewdb.set(key, pickle.dumps(output)) # 30 days
log.msg("EXTRACTED ABVALUE %s" % repr(output))
return output
def read_ab_cookie(self, request):
ab_id = request.getCookie(self.config['ab_cookie'])
if not ab_id:
ab_id = self.gen_ab_id()
log.msg("Generating new ab cookie: %s" % ab_id)
request.addCookie(self.config['ab_cookie'], ab_id)
request.addCookie(self.config['new_ab_cookie'], 'True')
return ab_id
def new_ab_cookie(self, request):
return request.getCookie(self.config['new_ab_cookie']) == 'True'
def gen_ab_id(self):
return ''.join(random.sample('abcdefghijklmnopqrstuvwxyz123456790', 25))
# choose a random item by weights for a test
def pick_ab(self, value_list):
n = sum(float(weight) for value, weight in value_list) * random.uniform(0, 1)
test_value = ""
for value, weight in value_list:
test_value = value
if n < float(weight):
return test_value
n = n - float(weight)
return test_value