def dump_now(cls): """ Flush hits to Whale and increment """ # Get the incoming hits from Hail r=cls.hail_driver() set_number_name = 'hail_number' r.setnx(set_number_name, 0) set_number = r.incr(set_number_name) - 1 set_name = 'hail_%s' % set_number try: keys_from_hail = r.smembers(set_name) except: return if not len(keys_from_hail): r.delete(set_name) return def get_keys_from_json(k): try: class_name, pk, dimensions, metrics, at = json.loads(r[k]) #at = datetime.datetime.fromtimestamp(float(t)) return (pk, dimensions, metrics, at) except Exception as e: print e return False keys_to_update = map(get_keys_from_json, keys_from_hail) for packed in keys_to_update: if packed: pk, dimensions, metrics, at = packed Whale.count_now(pk, dimensions, metrics, at=at) # Delete the hits map(r.delete, keys_from_hail) r.delete(set_name)
def dump_now(cls): """ Flush hits to Whale and increment """ # Get the incoming hits from Hail r = cls.hail_driver() set_number_name = 'hail_number' r.setnx(set_number_name, 0) set_number = r.incr(set_number_name) - 1 set_name = 'hail_%s' % set_number try: keys_from_hail = r.smembers(set_name) except: return if not len(keys_from_hail): r.delete(set_name) return def get_keys_from_json(k): try: class_name, pk, dimensions, metrics, at = json.loads(r[k]) #at = datetime.datetime.fromtimestamp(float(t)) return (pk, dimensions, metrics, at) except Exception as e: print e return False keys_to_update = map(get_keys_from_json, keys_from_hail) for packed in keys_to_update: if packed: pk, dimensions, metrics, at = packed Whale.count_now(pk, dimensions, metrics, at=at) # Delete the hits map(r.delete, keys_from_hail) r.delete(set_name)
def dump_now(cls): """ Flush hits to Whale and increment """ # Get the incoming hits from Hail from whale import Whale whale = Whale() r=cls.driver() _s_n_n = 'hail_number' r.setnx(_s_n_n, 0) set_number = r.incr(_s_n_n) - 1 set_name = 'hail_%s'%set_number try: keys_from_hail = r.smembers(set_name) except: return if len(keys_from_hail) is 0: r.delete(set_name) return def get_keys_from_json(k): try: class_name, categories, dimensions, metrics, t = json.loads(r[k]) at = datetime.datetime.fromtimestamp(float(t)) return (categories, dimensions, metrics, at) except Exception as e: print e return False keys_to_update = map(get_keys_from_json, keys_from_hail) for packed in keys_to_update: if not packed: continue categories, dimensions, metrics, at = packed whale.count_now(categories, dimensions, metrics, at=at) # Delete the hits map(r.delete, keys_to_update) r.delete(set_name)
class TestHailWHale(unittest.TestCase): def setUp(self): from hail import Hail from whale import Whale self.hail = Hail() self.whale = Whale() def testGetSubdimensions(self): self.whale.count_now("test", {"a": 1, "b": 2}) subs = self.whale.get_subdimensions("test") assert ["a"] in subs assert ["b"] in subs def testGetAllSubdimensions(self): self.whale.count_now("test", {"a": 1, "b": 2}) subs = self.whale.all_subdimensions("test") assert ["a"] in subs assert ["a", "1"] in subs assert ["b"] in subs assert ["b", "2"] in subs def testCrunch(self): # Unique key for every test t = str(time.time()) self.whale.count_now("test_crunch", [t, "a"], {"value": 5}) self.whale.count_now("test_crunch", [t, "b"], {"value": 1}) self.whale.count_now("test_crunch", [t, "c"], {"value": 15}) data = self.whale.crunch("test_crunch", [t], "value")
def count_now(): whale = Whale() at = g('at', False) tzoffset = None if not at: at = times.now() else: from dateutil.parser import parse at = parse(g('at')) at = at.replace(tzinfo=None) val = whale.count_now(at=at, **default_params()) return 'OK'
def tracker(): from periods import Period import random params = default_params() # LOLOL THIS SHOULD REALLY CHANGE key = hashlib.sha256('hailwhale_weak_key').digest() if 'pk' not in req.GET and 'pixel' in req.GET: from Crypto.Cipher import AES from base64 import b64encode, b64decode from urllib import quote_plus mode = AES.MODE_CBC encryptor = AES.new(key, mode) text = g('pixel') INTERRUPT = u'\u0001' PAD = u'\u0000' # Since you need to pad your data before encryption, # create a padding function as well # Similarly, create a function to strip off the padding after decryption def AddPadding(data, interrupt, pad, block_size): new_data = ''.join([data, interrupt]) new_data_len = len(new_data) remaining_len = block_size - new_data_len to_pad_len = remaining_len % block_size pad_string = pad * to_pad_len return ''.join([new_data, pad_string]) def StripPadding(data, interrupt, pad): return data.rstrip(pad).rstrip(interrupt) def hw_encoded(t): return quote_plus( b64encode(encryptor.encrypt(AddPadding(t, INTERRUPT, PAD, 32)))) def hw_decoded(t): return StripPadding(encryptor.decrypt(b64decode(t)), INTERRUPT, PAD) params['pk'] = hw_decoded(text) pk = params['pk'] whale = Whale() hail = Hail() val = whale.count_now(at=times.now(), **params) #val = whale.count_now(**params) uid = g('uid') if not uid or uid == '_new': default = random.randrange(10**6, 10**9) uid = str(req.get_cookie('uid', str(default), key)) hail.spy_log(uid, params) response.set_cookie('uid', uid, key) return str(uid)
def count_now(): whale = Whale() vals = default_params() at = vals.get("at")#g('at', False) tzoffset = None if not at: at = times.now() else: from dateutil.parser import parse at = parse(at) val = whale.count_now(at= at, pk=vals.get("pk"), metrics=vals.get("metrics"), dimensions=vals.get("dimensions")) return 'OK'
def count_now(): whale = Whale() vals = default_params() at = vals.get("at") #g('at', False) tzoffset = None if not at: at = times.now() else: from dateutil.parser import parse at = parse(at) val = whale.count_now(at=at, pk=vals.get("pk"), metrics=vals.get("metrics"), dimensions=vals.get("dimensions")) return 'OK'
def tracker(): from periods import Period import random params = default_params() # LOLOL THIS SHOULD REALLY CHANGE key = hashlib.sha256('hailwhale_weak_key').digest() if 'pk' not in req.GET and 'pixel' in req.GET: from Crypto.Cipher import AES from base64 import b64encode, b64decode from urllib import quote_plus mode = AES.MODE_CBC encryptor = AES.new(key, mode) text = g('pixel') INTERRUPT = u'\u0001' PAD = u'\u0000' # Since you need to pad your data before encryption, # create a padding function as well # Similarly, create a function to strip off the padding after decryption def AddPadding(data, interrupt, pad, block_size): new_data = ''.join([data, interrupt]) new_data_len = len(new_data) remaining_len = block_size - new_data_len to_pad_len = remaining_len % block_size pad_string = pad * to_pad_len return ''.join([new_data, pad_string]) def StripPadding(data, interrupt, pad): return data.rstrip(pad).rstrip(interrupt) def hw_encoded(t): return quote_plus(b64encode(encryptor.encrypt(AddPadding(t, INTERRUPT, PAD, 32)))) def hw_decoded(t): return StripPadding(encryptor.decrypt(b64decode(t)), INTERRUPT, PAD) params['pk'] = hw_decoded(text) pk = params['pk'] whale = Whale() hail = Hail() val = whale.count_now(at=times.now(), **params) #val = whale.count_now(**params) uid = g('uid') if not uid or uid == '_new': default = random.randrange(10**6,10**9) uid = str(req.get_cookie('uid', str(default), key)) hail.spy_log(uid, params) response.set_cookie('uid', uid, key) return str(uid)
class TestHailWhale(unittest.TestCase): def setUp(self): from hail import Hail from whale import Whale self.hail = Hail() self.whale = Whale() def testGetSubdimensions(self): t = 'subs_%s' % str(time.time()) self.whale.count_now(t, {'a': 1, 'b': 2}) subs = self.whale.get_subdimensions(t) assert('a' in subs) assert('b' in subs) def testGetAllSubdimensions(self): t = 'all_subs_%s' % str(time.time()) self.whale.count_now(t, {'a': 1, 'b': 2}) subs = self.whale.all_subdimensions(t) assert('a' in subs) assert(['a', '1'] in subs) assert('b' in subs) assert(['b', '2'] in subs) def testPlotpoints(self): t = str(time.time()) for i in range(5): self.whale.count_now('test_plotpoints', t, {'hits': 1, 'values': 5}) plotpoints = self.whale.plotpoints('test_plotpoints', t, ['hits', 'values'], points_type=list) self.assertEqual(plotpoints[t]['hits'][-1][1], 5) self.assertEqual(plotpoints[t]['values'][-1][1], 25) def testPlotpointsDepth(self): t = str(time.time()) self.whale.count_now('test_depth', {t: 'a'}) self.whale.count_now('test_depth', {t: 'b'}) self.whale.count_now('test_depth', {t: 'b'}) self.whale.count_now('test_depth', {t: {'c': 'child'}}) # Test 1 level deep plotpoints = self.whale.plotpoints('test_depth', t, points_type=list, depth=1) self.assertEqual(plotpoints[maybe_dumps([t, 'a'])]['hits'][-1][1], 1) self.assertEqual(plotpoints[maybe_dumps([t, 'b'])]['hits'][-1][1], 2) self.assertEqual(plotpoints[maybe_dumps([t, 'c'])]['hits'][-1][1], 1) self.assertEqual(False, maybe_dumps([t, 'c', 'child']) in plotpoints) # Test 2 levels deep plotpoints = self.whale.plotpoints('test_depth', t, points_type=list, depth=2) self.assertEqual(True, maybe_dumps([t, 'c', 'child']) in plotpoints) self.assertEqual(plotpoints[maybe_dumps([t, 'c', 'child'])]['hits'][-1][1], 1) # Test ranking and limiting plotpoints = self.whale.plotpoints('test_depth', t, points_type=list, depth=1, limit=2) self.assertEqual(plotpoints[maybe_dumps([t, 'b'])]['hits'][-1][1], 2) self.assertEqual(True, maybe_dumps([t, 'a']) not in plotpoints) self.assertEqual(True, maybe_dumps([t, 'c']) not in plotpoints) def testRatioPlotpoints(self): t = str(time.time()) for i in range(5): self.whale.count_now('test_ratio', t, {'hit': 1, 'value': 5}) plotpoints = self.whale.plotpoints('test_ratio', t, ['hit', 'value', 'value/hit'], points_type=list) self.assertEqual(plotpoints[t]['hit'][-1][1], 5) self.assertEqual(plotpoints[t]['value'][-1][1], 25) self.assertEqual(plotpoints[t]['value/hit'][-1][1], 5) def testRankSubdimensionsScalar(self): t = str(time.time()) self.whale.count_now('test_rank', [t, 'a', 'asub1'], {'value': 1}) self.whale.count_now('test_rank', [t, 'a', 'asub2'], {'value': 30}) self.whale.count_now('test_rank', [t, 'b'], {'value': 80}) self.whale.count_now('test_rank', [t, 'c'], {'value': 10}) ranked = self.whale.rank_subdimensions_scalar('test_rank', t, 'value') self.assertEqual(ranked[maybe_dumps([t, 'a'])]['important'], False) self.assertEqual(ranked[maybe_dumps([t, 'a', 'asub1'])]['important'], False) self.assertEqual(ranked[maybe_dumps([t, 'a', 'asub2'])]['important'], True) self.assertEqual(ranked[maybe_dumps([t, 'b'])]['important'], True) self.assertEqual(ranked[maybe_dumps([t, 'c'])]['important'], False) def testRankSubdimensionsRatio(self): t = str(time.time()) pk = 'test_ratio_rank' # OVERALL STATS: 529,994 value, 50,000 visitors, 10.6 value per visitor # Not important, too close to overall self.whale.count_now(pk, [t, 'a', 'asub1'], {'value': 54989, 'visitors': 4999}) # 11 value per visitor # Important, high relative ratio self.whale.count_now(pk, [t, 'a', 'asub2'], {'value': 375000, 'visitors': 25000}) # 15 value per visitor # Important, low relative ratio self.whale.count_now(pk, [t, 'b'], {'value': 100000, 'visitors': 20000}) # 5 value per visitor # Not important, not enough visitors self.whale.count_now(pk, [t, 'c'], {'value': 5, 'visitors': 1}) # 5 value per visitor one_level = self.whale.rank_subdimensions_ratio('test_rank_ratio', 'value', 'visitors', t, recursive=False) all_levels = self.whale.rank_subdimensions_ratio(pk, 'value', 'visitors', t) self.assertEqual(True, maybe_dumps([t, 'a', 'asub1']) not in one_level) self.assertEqual(all_levels[maybe_dumps([t, 'a', 'asub1'])]['important'], False) self.assertEqual(all_levels[maybe_dumps([t, 'a', 'asub2'])]['important'], True) self.assertEqual(all_levels[maybe_dumps([t, 'b'])]['important'], True) self.assertEqual(all_levels[maybe_dumps([t, 'c'])]['important'], False) def testBasicDecision(self): pk = 'test_basic_decision' decision = str(time.time()) # Make a decision, any decision, from no information whatsoever good, bad, test = self.whale.weighted_reasons(pk, 'random', [1,2,3]) #_print_reasons(good, bad, test) any_one = self.whale.decide_from_reasons(good, bad, test) self.assertEqual(True, any_one in [1, 2, 3]) # OK, now how about something somewhat informed? # This will be easy. Slogan A makes us huge profit. Products B and C suck. # D looks promissing but isn't yet significant opts = ['a', 'b', 'c', 'd'] self.whale.count_now([pk, decision, 'a'], None, dict(dollars=5000, visitors=1000)) self.whale.count_now([pk, decision, 'b'], None, dict(dollars=0, visitors=2000)) self.whale.count_now([pk, decision, 'c'], None, dict(dollars=0, visitors=2000)) self.whale.count_now([pk, decision, 'd'], None, dict(dollars=50, visitors=10)) good, bad, test = self.whale.weighted_reasons(pk, decision, opts, formula='dollars/visitors') #_print_reasons(good, bad, test) self.assertEqual(True, 'a' in good.keys()) self.assertEqual(True, 'b' in bad.keys()) self.assertEqual(True, 'c' in bad.keys()) self.assertEqual(True, 'd' in test.keys()) which_one = self.whale.decide(pk, decision, opts, formula='dollars/visitors', bad_idea_threshold=0, test_idea_threshold=0) self.assertEqual(which_one, 'a') def testInformedDecision(self): pk = 'test_informed_decision' decision = str(time.time()) # A is the clear winner, except when country=UK, in which case B wins opts = ['a', 'b', 'c', 'd'] self.whale.count_now([pk, decision, 'a'], None, dict(dollars=50000, visitors=10000)) self.whale.count_now([pk, decision, 'b'], None, dict(dollars=0, visitors=2000)) self.whale.count_now([pk, decision, 'b'], {'country': 'uk'}, dict(dollars=10000, visitors=2000)) self.whale.count_now([pk, decision, 'c'], None, dict(dollars=0, visitors=7500)) self.whale.count_now([pk, decision, 'd'], None, dict(dollars=5, visitors=1)) # Here's a visitor with no info -- 'A' should win by far. good, bad, test = self.whale.weighted_reasons(pk, decision, opts, formula='dollars/visitors') #_print_reasons(good, bad, test) self.assertEqual(True, 'a' in good.keys()) self.assertEqual(True, 'b' in bad.keys()) self.assertEqual(True, 'c' in bad.keys()) self.assertEqual(True, 'd' in test.keys()) # How about when we know the country is "UK"? good, bad, test = self.whale.weighted_reasons(pk, decision, opts, formula='dollars/visitors', known_data={'country': 'uk'}) #_print_reasons(good, bad, test) self.assertEqual(True, 'a' in good.keys()) self.assertEqual(True, 'b' in good.keys()) self.assertEqual(True, 'c' in bad.keys()) self.assertEqual(True, 'd' in test.keys()) chosen = {'a': 0, 'b': 0} for k in range(100): choose = self.whale.decide(pk, decision, opts, formula='dollars/visitors', known_data={'country': 'uk'}, bad_idea_threshold=0, test_idea_threshold=0) chosen[choose] += 1 self.assertEqual(True, chosen['b'] > 70, """A decision made 100 times between weights .15 vs .85 should have around 85 votes for 'b', we got %s, which is unlikely enough to fail a test, but not definitely indicative of a problem. If this test passes again on the next run, ignore the failure.""" % chosen) def testTrickyDecision(self): pk = 'test_tricky_decision' decision = str(time.time()) opts = ['en', 'sp', 'pt'] def count(geo, lang, dollars, visitors): self.whale.count_decided_now(pk, decision, lang, geo, {'dollars': dollars, 'visitors': visitors}) def justify(geo): #print #print 'Picking reasons for ', geo good, bad, test = self.whale.weighted_reasons(pk, decision, opts, 'dollars/visitors', geo) #print good.keys(), bad.keys(), test.keys() #_print_reasons(good, bad, test) return self.whale.decide(pk, decision, opts, 'dollars/visitors', geo, bad_idea_threshold=0, test_idea_threshold=0) k = 1000 m = k * k # Sure, these results seem predictable to a human # But what will our philosopher whale friend make of it? count('us', 'en', 1.5 * m, 300 * k) # $5/visitor, alright! count('us', 'sp', 1 * k, 10 * k) # $.10/visitor, well that is not surprising count('us', 'pt', 300, 5 * k) # $.06/visitor, :( count('mx', 'en', 100 * k, 100 * k) # $1/visitor, this almost works count('mx', 'sp', 200 * k, 100 * k) # $2/visitor aww yah! count('mx', 'pt', 200, 10 * k) # $.02/visitor lol count('br', 'en', 300 * k, 100 * k) # $3/visitor is good count('br', 'sp', 150 * k, 50 * k) # $3/visitor as well count('br', 'pt', 500 * k, 50 * k) # $10 JACKPOT self.assertEqual('en', justify('us')) self.assertEqual(True, justify('mx') in ['sp', 'en']) self.assertEqual('pt', justify('br')) def testWhaleCacheWrapper(self): t = str(time.time()) count = lambda: self.whale.count_now('test_cached', t) cached_sum = lambda clear=False: sum(self.whale.cached_plotpoints('test_cached', t, period='fivemin', unmemoize=clear)[t]['hits'].values()) # Set hits to 1 count() self.assertEqual(cached_sum(), 1) # Should stay 1 for a while for i in range(3): count() self.assertEqual(cached_sum(), 1) self.assertEqual(cached_sum(clear=True), 4)
def count_now(): from datetime import datetime whale = Whale() val = whale.count_now(at=datetime.utcnow(), **default_params()) return 'OK'
def count_now(): whale = Whale() val = whale.count_now(at=datetime.now(), **default_params()) return 'OK'
class TestHailWhale(unittest.TestCase): def setUp(self): from hail import Hail from whale import Whale self.hail = Hail() self.whale = Whale() def testGetSubdimensions(self): t = 'subs_%s' % str(time.time()) self.whale.count_now(t, {'a': 1, 'b': 2}) subs = self.whale.get_subdimensions(t) assert('a' in subs) assert('b' in subs) def testGetAllSubdimensions(self): t = 'all_subs_%s' % str(time.time()) self.whale.count_now(t, {'a': 1, 'b': 2}) subs = self.whale.all_subdimensions(t) assert('a' in subs) assert(['a', '1'] in subs) assert('b' in subs) assert(['b', '2'] in subs) def testPlotpoints(self): t = str(time.time()) for i in range(5): self.whale.count_now('test_plotpoints', t, {'hits': 1, 'values': 5}) plotpoints = self.whale.plotpoints('test_plotpoints', t, ['hits', 'values'], points_type=list) self.assertEqual(plotpoints[t]['hits'][-1][1], 5) self.assertEqual(plotpoints[t]['values'][-1][1], 25) def testPlotpointsDepth(self): t = str(time.time()) self.whale.count_now('test_depth', {t: 'a'}) self.whale.count_now('test_depth', {t: 'b'}) self.whale.count_now('test_depth', {t: 'b'}) self.whale.count_now('test_depth', {t: {'c': 'child'}}) # Test 1 level deep plotpoints = self.whale.plotpoints('test_depth', t, points_type=list, depth=1) self.assertEqual(plotpoints[maybe_dumps([t, 'a'])]['hits'][-1][1], 1) self.assertEqual(plotpoints[maybe_dumps([t, 'b'])]['hits'][-1][1], 2) self.assertEqual(plotpoints[maybe_dumps([t, 'c'])]['hits'][-1][1], 1) self.assertEqual(False, maybe_dumps([t, 'c', 'child']) in plotpoints) # Test 2 levels deep plotpoints = self.whale.plotpoints('test_depth', t, points_type=list, depth=2) self.assertEqual(True, maybe_dumps([t, 'c', 'child']) in plotpoints) self.assertEqual(plotpoints[maybe_dumps([t, 'c', 'child'])]['hits'][-1][1], 1) # Test ranking and limiting i.e assign rank on the basis of value and then extract top limit candidate plotpoints = self.whale.plotpoints('test_depth', t, points_type=list,depth=1, limit=2) #self.assertEqual(plotpoints[maybe_dumps([t, 'b'])]['hits'][-1][1], 2) self.assertEqual(True, maybe_dumps([t, 'a']) not in plotpoints) self.assertEqual(True, maybe_dumps([t, 'c']) not in plotpoints) def testRatioPlotpoints(self): t = str(time.time()) for i in range(5): self.whale.count_now('test_ratio', t, {'hit': 1, 'value': 5}) plotpoints = self.whale.plotpoints('test_ratio', t, ['hit', 'value', 'value/hit'], points_type=list) self.assertEqual(plotpoints[t]['hit'][-1][1], 5) self.assertEqual(plotpoints[t]['value'][-1][1], 25) self.assertEqual(plotpoints[t]['value/hit'][-1][1], 5) def testRankSubdimensionsScalar(self): t = str(time.time()) self.whale.count_now('test_rank', [t, 'a', 'asub1'], {'value': 1}) self.whale.count_now('test_rank', [t, 'a', 'asub2'], {'value': 30}) self.whale.count_now('test_rank', [t, 'b'], {'value': 80}) self.whale.count_now('test_rank', [t, 'c'], {'value': 10}) ranked = self.whale.rank_subdimensions_scalar('test_rank', t, 'value') self.assertEqual(ranked[maybe_dumps([t, 'a'])]['important'], False) self.assertEqual(ranked[maybe_dumps([t, 'a', 'asub1'])]['important'], False) self.assertEqual(ranked[maybe_dumps([t, 'a', 'asub2'])]['important'], True) self.assertEqual(ranked[maybe_dumps([t, 'b'])]['important'], True) self.assertEqual(ranked[maybe_dumps([t, 'c'])]['important'], False) def testRankSubdimensionsRatio(self): t = str(time.time()) pk = 'test_ratio_rank' # OVERALL STATS: 529,994 value, 50,000 visitors, 10.6 value per visitor # Not important, too close to overall self.whale.count_now(pk, [t, 'a', 'asub1'], {'value': 54989, 'visitors': 4999}) # 11 value per visitor # Important, high relative ratio self.whale.count_now(pk, [t, 'a', 'asub2'], {'value': 375000, 'visitors': 25000}) # 15 value per visitor # Important, low relative ratio self.whale.count_now(pk, [t, 'b'], {'value': 100000, 'visitors': 20000}) # 5 value per visitor # Not important, not enough visitors self.whale.count_now(pk, [t, 'c'], {'value': 5, 'visitors': 1}) # 5 value per visitor one_level = self.whale.rank_subdimensions_ratio('test_rank_ratio', 'value', 'visitors', t, recursive=False) all_levels = self.whale.rank_subdimensions_ratio(pk, 'value', 'visitors', t) self.assertEqual(True, maybe_dumps([t, 'a', 'asub1']) not in one_level) self.assertEqual(all_levels[maybe_dumps([t, 'a', 'asub1'])]['important'], False) self.assertEqual(all_levels[maybe_dumps([t, 'a', 'asub2'])]['important'], True) self.assertEqual(all_levels[maybe_dumps([t, 'b'])]['important'], True) self.assertEqual(all_levels[maybe_dumps([t, 'c'])]['important'], False) def testBasicDecision(self): pk = 'test_basic_decision' decision = str(time.time()) # Make a decision, any decision, from no information whatsoever good, bad, test = self.whale.weighted_reasons(pk, 'random', [1,2,3]) #_print_reasons(good, bad, test) any_one = self.whale.decide_from_reasons(good, bad, test) self.assertEqual(True, any_one in [1, 2, 3]) # OK, now how about something somewhat informed? # This will be easy. Slogan A makes us huge profit. Products B and C suck. # D looks promissing but isn't yet significant opts = ['a', 'b', 'c', 'd'] self.whale.count_now([pk, decision, 'a'], None, dict(dollars=5000, visitors=1000)) self.whale.count_now([pk, decision, 'b'], None, dict(dollars=0, visitors=2000)) self.whale.count_now([pk, decision, 'c'], None, dict(dollars=0, visitors=2000)) self.whale.count_now([pk, decision, 'd'], None, dict(dollars=50, visitors=10)) good, bad, test = self.whale.weighted_reasons(pk, decision, opts, formula='dollars/visitors') #_print_reasons(good, bad, test) self.assertEqual(True, 'a' in good.keys()) self.assertEqual(True, 'b' in bad.keys()) self.assertEqual(True, 'c' in bad.keys()) self.assertEqual(True, 'd' in test.keys()) which_one = self.whale.decide(pk, decision, opts, formula='dollars/visitors', bad_idea_threshold=0, test_idea_threshold=0) self.assertEqual(which_one, 'a') def testInformedDecision(self): pk = 'test_informed_decision' decision = str(time.time()) # A is the clear winner, except when country=UK, in which case B wins opts = ['a', 'b', 'c', 'd'] self.whale.count_now([pk, decision, 'a'], None, dict(dollars=50000, visitors=10000)) self.whale.count_now([pk, decision, 'b'], None, dict(dollars=0, visitors=2000)) self.whale.count_now([pk, decision, 'b'], {'country': 'uk'}, dict(dollars=10000, visitors=2000)) self.whale.count_now([pk, decision, 'c'], None, dict(dollars=0, visitors=7500)) self.whale.count_now([pk, decision, 'd'], None, dict(dollars=5, visitors=1)) # Here's a visitor with no info -- 'A' should win by far. good, bad, test = self.whale.weighted_reasons(pk, decision, opts, formula='dollars/visitors') #_print_reasons(good, bad, test) self.assertEqual(True, 'a' in good.keys()) self.assertEqual(True, 'b' in bad.keys()) self.assertEqual(True, 'c' in bad.keys()) self.assertEqual(True, 'd' in test.keys()) # How about when we know the country is "UK"? good, bad, test = self.whale.weighted_reasons(pk, decision, opts, formula='dollars/visitors', known_data={'country': 'uk'}) #_print_reasons(good, bad, test) self.assertEqual(True, 'a' in good.keys()) self.assertEqual(True, 'b' in good.keys()) self.assertEqual(True, 'c' in bad.keys()) self.assertEqual(True, 'd' in test.keys()) chosen = {'a': 0, 'b': 0} for k in range(100): choose = self.whale.decide(pk, decision, opts, formula='dollars/visitors', known_data={'country': 'uk'}, bad_idea_threshold=0, test_idea_threshold=0) chosen[choose] += 1 self.assertEqual(True, chosen['b'] > 70, """A decision made 100 times between weights .15 vs .85 should have around 85 votes for 'b', we got %s, which is unlikely enough to fail a test, but not definitely indicative of a problem. If this test passes again on the next run, ignore the failure.""" % chosen) def testTrickyDecision(self): pk = 'test_tricky_decision' decision = str(time.time()) opts = ['en', 'sp', 'pt'] def count(geo, lang, dollars, visitors): self.whale.count_decided_now(pk, decision, lang, geo, {'dollars': dollars, 'visitors': visitors}) def justify(geo): #print #print 'Picking reasons for ', geo good, bad, test = self.whale.weighted_reasons(pk, decision, opts, 'dollars/visitors', geo) #print good.keys(), bad.keys(), test.keys() #_print_reasons(good, bad, test) return self.whale.decide(pk, decision, opts, 'dollars/visitors', geo, bad_idea_threshold=0, test_idea_threshold=0) k = 1000 m = k * k # Sure, these results seem predictable to a human # But what will our philosopher whale friend make of it? count('us', 'en', 1.5 * m, 300 * k) # $5/visitor, alright! count('us', 'sp', 1 * k, 10 * k) # $.10/visitor, well that is not surprising count('us', 'pt', 300, 5 * k) # $.06/visitor, :( count('mx', 'en', 100 * k, 100 * k) # $1/visitor, this almost works count('mx', 'sp', 200 * k, 100 * k) # $2/visitor aww yah! count('mx', 'pt', 200, 10 * k) # $.02/visitor lol count('br', 'en', 300 * k, 100 * k) # $3/visitor is good count('br', 'sp', 150 * k, 50 * k) # $3/visitor as well count('br', 'pt', 500 * k, 50 * k) # $10 JACKPOT self.assertEqual('en', justify('us')) self.assertEqual(True, justify('mx') in ['sp', 'en']) self.assertEqual('pt', justify('br')) def testWhaleCacheWrapper(self): t = str(time.time()) count = lambda: self.whale.count_now('test_cached', t) cached_sum = lambda clear=False: sum(self.whale.cached_plotpoints('test_cached', t, period='fivemin', unmemoize=clear)[t]['hits'].values()) # Set hits to 1 count() self.assertEqual(cached_sum(), 1) # Should stay 1 for a while for i in range(3): count() self.assertEqual(cached_sum(), 1) self.assertEqual(cached_sum(clear=True), 4)