def test_reftime(self): t = time.time() f = TimeFilter(rules={"days": 1}, reftime=t) assert f.reftime == t time.sleep(SHORTTIME) f = TimeFilter(rules={"days": 1}) assert f.reftime > t
def test_multicat_rules_yield_union_of_singlecat_rules(self): N = 10 MAXCOUNT = 16 def rndcount(): return randint(1, MAXCOUNT) for _ in range(N): rules = { "years": rndcount(), "months": rndcount(), "weeks": rndcount(), "days": rndcount(), "hours": rndcount(), "recent": rndcount() } single_results = set() for category, timecount in rules.iteritems(): single_rule = dict.fromkeys(rules, 0) single_rule[category] = timecount a, _ = TimeFilter(single_rule, self.now).filter(self.fses) assert len(a) == timecount single_results.update(a) multi_result, _ = TimeFilter(rules, self.now).filter(self.fses) assert len(multi_result) == len(single_results) assert set(multi_result) == single_results
def test_periodic_filtering_is_stable(self): start = datetime(2016, 1, 1, 23, 0) items = [] # create items every 57 minutes, old enough, so they don't fall in the # 'recent' category. Choose the times so that they initially fall into # separate buckets when the timefilter is run at start. previous = start - timedelta(hours=3, minutes=20) for _ in range(3): items.append(FilterItem(moddate=previous)) previous = previous - timedelta(minutes=57) rules = { "hours": 50 } # within the test period, everything should be # retained. # check precondition: when filtered at start, each item falls ito a # separate bucket -- in other words, items is what could have been left # by the 50-hours rule run at start time, even though the individual # items are less than 60 minutes apart. a, _ = TimeFilter(rules, start).filter(items) assert len(a) == len(items) # check stability: given that the oldest item is far from being dropped # (way newer than 50 hours), every run within the following hour should # leave what was left by the first run. current = start for _ in range(60): a, _ = TimeFilter(rules, current).filter(items) assert len(a) == len(items) current = current + timedelta(minutes=1)
def test_requesting_less_than_available_retrieves_most_recent(self): for category, fses in self.fses10.iteritems(): f = TimeFilter({category: 5}, self.reftime) a, r = f.filter(fses) assert len(a) == 5 assert len(r) == 5 def moddates(fses): return map(lambda fse: fse.moddate, fses) assert min(moddates(a)) > max(moddates(r))
def test_requesting_one_retrieves_most_recent(self): for category, fses in self.fses10.iteritems(): f = TimeFilter({category: 1}, self.reftime) a, r = f.filter(fses) assert len(a) == 1 def moddates(fses): return map(lambda fse: fse.moddate, fses) assert a[0].moddate == max(moddates(fses)) assert len(r) == 9 assert a[0] not in r
def test_hours_one_accepted_one_rejected(self): f = TimeFilter(rules={"hours": 1}) fse1 = FilterItem(modtime=time.time()-60*60*1.5) fse2 = FilterItem(modtime=time.time()-60*60*1.6) a, r = f.filter(objs=[fse1, fse2]) r = list(r) # The younger one must be accepted. assert a[0] == fse1 assert len(a) == 1 assert r[0] == fse2 assert len(r) == 1
def test_minimal_functionality_and_types(self): # Create filter with reftime self.reftime f = TimeFilter(rules={"hours": 1}, reftime=self.reftime) # Create mock that is 1.5 hours old. Must end up in accepted list, # since it's 1 hour old and one item should be kept from the 1-hour- # old-category fse = FilterItem(moddate=self.reftime-timedelta(hours=1.5)) a, r = f.filter([fse]) # http://stackoverflow.com/a/1952655/145400 assert isinstance(a, collections.Iterable) assert isinstance(r, collections.Iterable) assert a[0] == fse assert len(r) == 0
def test_minimal_functionality_and_types(self): # Create filter with reftime NOW (if not specified otherwise) # and simple rules. f = TimeFilter(rules={"hours": 1}) # Create mock that is 1.5 hours old. Must end up in accepted list, # since it's 1 hour old and one item should be kept from the 1-hour- # old-category fse = FilterItem(modtime=time.time()-60*60*1.5) a, r = f.filter(objs=[fse]) # http://stackoverflow.com/a/1952655/145400 assert isinstance(a, collections.Iterable) assert isinstance(r, collections.Iterable) assert a[0] == fse # Rejected list `r` is expected to be an interator, so convert to # list before evaluating length. assert len(list(r)) == 0
def test_request_less_than_available_distant(self): # only distant items are present (at the beginning of the rule period) fses = [self.yearsago[10], self.yearsago[9]] rules = {"years": 10} a, r = TimeFilter(rules, self.reftime).filter(fses) assert set(a) == set(fses) assert len(r) == 0
def test_10_days_2_weeks(self): # Further define category 'overlap' behavior. {"days": 10, "weeks": 2} # -> week 0 is included in the 10 days, week 1 is only partially # included in the 10 days, and week 2 (14 days and older) is not # included in the 10 days. # Having 15 FSEs, 1 to 15 days in age, the first 10 of them must be # accepted according to the 10-day-rule. According to the 2-weeks-rule, # the 7th and 14th FSEs must be accepted. The 7th FSE is included in # the first 10, so items 1-10 and 14 are the accepted ones. now = datetime(2016, 1, 3) nowminusXdays = (now - timedelta(days=i) for i in range(1, 16)) fses = [FilterItem(moddate=d) for d in nowminusXdays] rules = {"days": 10, "weeks": 2} a, r = TimeFilter(rules, now).filter(fses) r = list(r) assert len(a) == 11 # Check if first 11 fses are in accepted list (order can be predicted # according to current implementation, but should not be tested, as it # is not guaranteed according to the current specification). for fse in fses[:10]: assert fse in a # Check if 14th FSE is accepted. assert fses[13] in a # Check if FSEs 12, 13, 15 are rejected. assert len(r) == 4 for i in (10, 11, 12, 14): assert fses[i] in r
def main(): t0 = time.time() now = datetime.now() fses = list(fsegen(ref=now, N_per_cat=5 * 10**4, max_timecount=9)) shuffle(fses) nbr_fses = len(fses) n = 8 rules = { "years": n, "months": n, "weeks": n, "days": n, "hours": n, "recent": n } sduration = time.time() - t0 log.info("Setup duration: %.3f s", sduration) log.info("Profiling...") pr = cProfile.Profile() pr.enable() a, r = TimeFilter(rules, now).filter(fses) pr.disable() s = StringIO.StringIO() ps = pstats.Stats(pr, stream=s).sort_stats('time') ps.print_stats(20) print s.getvalue()
def test_request_less_than_available_close(self): # only close items are present (at the end of the rule period) fses = [self.yearsago[1], self.yearsago[2]] rules = {"years": 10} a, r = TimeFilter(rules, self.reftime).filter(fses) assert set(a) == set(fses) assert len(r) == 0
def test_10_days_2_weeks(self): # Further define category 'overlap' behavior. {"days": 10, "weeks": 2} # -> week 0 is included in the 10 days, week 1 is only partially # included in the 10 days, and week 2 (14 days and older) is not # included in the 10 days. # Having 15 FSEs, 1 to 15 days in age, the first 10 of them must be # accepted according to the 10-day-rule. The 11th, 12th, 13th FSE (11, # 12, 13 days old) are categorized as 1 week old (their age A fulfills # 7 days <= A < 14 days). According to the 2-weeks-rule, the most # recent 1-week-old not affected by younger categories has to be # accepted, which is the 11th FSE. Also according to the 2-weeks-rule, # the most recent 2-week-old (not affected by a younger category, this # is always condition) has to be accepted, which is the 14th FSE. # In total FSEs 1-11,14 must be accepted, i.e. 12 FSEs. 15 FSEs are # used as input (1-15 days old), i.e. 3 are to be rejected (FSEs 12, # 13, 15). now = time.time() nowminusXdays = (now-(60*60*24*i+1) for i in range(1, 16)) fses = [FilterItem(modtime=t) for t in nowminusXdays] rules = {"days": 10, "weeks": 2} a, r = TimeFilter(rules, now).filter(fses) r = list(r) assert len(a) == 12 # Check if first 11 fses are in accepted list (order can be predicted # according to current implementation, but should not be tested, as it # is not guaranteed according to the current specification). for fse in fses[:11]: assert fse in a # Check if 14th FSE is accepted. assert fses[13] in a # Check if FSEs 12, 13, 15 are rejected. assert len(r) == 3 for i in (11, 12, 14): assert fses[i] in r
def test_1_recent_1_years(self): rules = { "years": 1, "recent": 1 } a, _ = TimeFilter(rules, self.now).filter(self.fses) assert len(a) == 2
def test_10_days_order(self): # Having 15 FSEs, 1 to 15 days in age, the first 10 of them must be # accepted according to the 10-day-rule. The last 5 must be rejected. # This test is focused on the right internal ordering when making the # decision to accept or reject an item. The newest ones are expected to # be accepted, while the oldest ones are expected to be rejected. # In order to test robustness against input order, the list of mock # FSEs is shuffled before filtering. The filtering and checks are # repeated a couple of times. # It is tested whether all of the youngest 10 FSEs are accepted. It is # not tested if these 10 FSEs have a certain order within the accepted- # list, because we don't make any guarantees about the # accepted-internal ordering. now = time.time() nowminusXdays = (now-(60*60*24*i+1) for i in range(1, 16)) fses = [FilterItem(modtime=t) for t in nowminusXdays] rules = {"days": 10} shuffledfses = fses[:] for _ in range(100): shuffle(shuffledfses) a, r = TimeFilter(rules, now).filter(shuffledfses) r = list(r) assert len(a) == 10 assert len(r) == 5 for fse in fses[:10]: assert fse in a for fse in fses[10:]: assert fse in r
def test_requesting_newer_than_available_retrieves_none(self): # excluding the "recent" category which will always accept the newest N # items. categories = ("hours", "days", "weeks", "months", "years") # generate items 6-10 per category, in reverse order to increase # the chance of discovering order dependencies in the filter. fses10to6 = {cat : sorted(self.fses10[cat], key=lambda x: x.moddate, reverse=True)[5:] for cat in categories} # now ask for the first 5 items of each category. for category, fses in fses10to6.iteritems(): f = TimeFilter({category: 5}, self.reftime) a, r = f.filter(fses) assert len(a) == 0 assert set(r) == set(fses)
def test_1_recent_1_years(self): rules = { "years": 1, "recent": 1 } a, r = TimeFilter(rules, self.now).filter(self.fses9) assert len(a) == 2 assert len(list(r)) == self.N*6 - 2
def test_create_recent_allow_old(self): now = time.time() nowminusXseconds = (now - (i + 1) for i in range(1, 16)) fses = [FilterItem(modtime=t) for t in nowminusXseconds] rules = {"years": 1} a, r = TimeFilter(rules, now).filter(fses) r = list(r) assert len(a) == 0 assert len(r) == 15
def test_create_recent_dont_request_recent(self): # Create a few young items (recent ones). Then don't request any. now = time.time() nowminusXseconds = (now - (i + 1) for i in range(1, 16)) fses = [FilterItem(modtime=t) for t in nowminusXseconds] rules = {"years": 1, "recent": 0} a, r = TimeFilter(rules, now).filter(fses) r = list(r) assert len(a) == 0 assert len(r) == 15
def test_overlapping_rules_dont_accept_additional_items(self): # check first rule: 24 hours, overlapping one day rules = { "hours": 24 } ref_time = datetime(2016, 1, 1) moddates = (ref_time - timedelta(hours=i) for i in range(1, 29)) items = [FilterItem(moddate=d) for d in moddates] a, _ = TimeFilter(rules, ref_time).filter(items) # expect the first 24 items to be accepted assert len(a) == 24 assert set(a) == set(items[:24]) # combine with an overlapping "days1" rule rules = { "hours": 24, "days": 1 } a, _ = TimeFilter(rules, ref_time).filter(items) # the result shouldn't change: the most recent 1-day old item is # the same as the most recent 24-hour old item assert len(a) == 24 assert set(a) == set(items[:24])
def test_requesting_newer_categories_than_available_retrieves_none(self): categories = ("years", "months", "weeks", "days", "hours", "recent") fses = [self.fses4[cat] for cat in categories] for n in range(1, len(categories)): older_fses = list(chain.from_iterable(islice(fses, n))) rules = {categories[n]: 4} a, r = TimeFilter(rules, self.reftime).filter(older_fses) assert len(a) == 0 assert set(r) == set(older_fses)
def test_create_old_allow_recent(self): # Create a few old items, between 1 and 15 years. Then only request one # recent item. This discovered a mean bug, where items to be rejected # ended up in the recent category. now = time.time() nowminusXyears = (now-(60*60*24*365 * i + 1) for i in range(1, 16)) fses = [FilterItem(modtime=t) for t in nowminusXyears] rules = {"recent": 1} a, r = TimeFilter(rules, now).filter(fses) r = list(r) assert len(a) == 0 assert len(r) == 15
def test_2_years_2_allowed(self): # Request to keep more than available. # Produce one 1 year old, one 2 year old, keep 10 years. nowminus10years = time.time() - (60*60*24*365 * 2 + 1) nowminus09years = time.time() - (60*60*24*365 * 1 + 1) fse1 = FilterItem(modtime=nowminus10years) fse2 = FilterItem(modtime=nowminus09years) a, r = TimeFilter(rules={"years": 2}).filter(objs=[fse1, fse2]) r = list(r) # All should be accepted. assert len(a) == 2 assert len(r) == 0
def test_singlecat_rules(self): n = 8 ryears = {"years": n} rmonths = {"months": n} rweeks = {"weeks": n} rdays = {"days": n} rhours = {"hours": n} rrecent = {"recent": n} # Run single-category filter on these fses. for rules in (ryears, rmonths, rweeks, rdays, rhours, rrecent): a, r = TimeFilter(rules, self.now).filter(self.fses) assert len(a) == n assert len(list(r)) == len(self.fses) - n
def test_realistic_scheme(self): rules = { "years": 4, "months": 11, "weeks": 6, "days": 10, "hours": 48, "recent": 5 } a, _ = TimeFilter(rules, self.now).filter(self.fses) # 4+11+6+10+48+5 = 84; there is 1 reducing overlap between days and weeks # and two more between hours and days -> 81 accepted items are expected. assert len(a) == 81
def test_two_recent(self): fse1 = FilterItem(modtime=time.time()) time.sleep(SHORTTIME) fse2 = FilterItem(modtime=time.time()) # fse2 is a little younger than fse1. time.sleep(SHORTTIME) # Make sure ref is newer than fse2.modtime. a, r = TimeFilter(rules={"recent": 1}).filter(objs=[fse1, fse2]) r = list(r) # The younger one must be accepted. assert a[0] == fse2 assert len(a) == 1 assert r[0] == fse1 assert len(r) == 1
def test_fixed_rules_week_month_overlap(self): n = 8 rules = { "years": n, "months": n, "weeks": n, "days": n, "hours": n, "recent": n } # See test_random_times_mass_singlecat_rules for likelihood discussion. # The rules say that we want 8 items accepted of each time category. # There are two time categories with a 'reducing overlap' in this case: # weeks and months. All other category pairs do not overlap at all or # overlap without reduction. Explanation/specification: # 8 hours: # 'Younger' categories can steal from older ones. The 'recent' # cat cannot steal anything: # -> 8 items expected for the hours category. # category. 8 hours have no overlap with days (8 hours are 0 days), # so the hours category cannot steal from the days category # -> 8 items expected for the days category. # 8 days: # day 7 and 8 could be categorized as 1 week, but become categorized # within the days dict (7 and 8 days are requested per days-rule). # Non-reducing overlap: 9 to 13 days are categorized as 1 week, which # is requested, and 9-day-old items actually are in the data set. # They are not stolen by younger categories (than week) and end up # in the 1-week-list. # -> 8 items expected from the weeks category. # 8 weeks: # 1-month-olds are all stolen by the 8-weeks-rule. # Items of age 8 weeks, i.e. 8*7 days = 56 days could be categorized # as 1 month, but become categorized within the weeks dictionary # (8 weeks old, which is requested per weeks-rule). # Reducing overlap: 9-week-old items in the data set, which are not # requested per weeks-rule are 9*7 days = 63 days old, i.e. 2 months # (2 months are 2*30 days = 60 days). These 2-month-old items are # not affected by younger data sets (than months), so they end up in # the 2-months-list. # -> In other words: there is no 1-month-list, since items of these # ages are *entirely* consumed by the weeks-rule. The oldest item # classified as 8 weeks old is already 2 months old: # 8.99~ weeks == 62.00~ days > 60 days == 2 months. # -> the months-rule returns only 7 items (not 8, like the others) # 8 months: # no overlap with years (0 years for all requested months) a, r = TimeFilter(rules, self.now).filter(self.fses9) # 8 items for all categories except for months (7 items expected). assert len(a) == 6*8-1 assert len(list(r)) == self.N*6 - (6*8-1)
def test_realistic_scheme(self): rules = { "years": 4, "months": 12, "weeks": 6, "days": 10, "hours": 48, "recent": 5 } a, r = TimeFilter(rules, self.now).filter(self.fses62) # 4+12+6+10+48+5 = 85; there is 1 reducing overlap between hours and # days -> 84 accepted items are expected. assert len(a) == 84 assert len(list(r)) == self.N*6 - 84
def test_2_recent_10_allowed(self): # Request to keep more than available. fse1 = FilterItem(modtime=time.time()) time.sleep(SHORTTIME) fse2 = FilterItem(modtime=time.time()) time.sleep(SHORTTIME) a, r = TimeFilter(rules={"recent": 10}).filter(objs=[fse1, fse2]) r = list(r) # All should be accepted. Within `recent` category, # items must be sorted by modtime, with the newest element being the # last element. assert a[0] == fse1 assert a[1] == fse2 assert len(a) == 2 assert len(r) == 0
def test_10_days_overlap(self): # Category 'overlap' must be possible (10 days > 1 week). # Having 15 FSEs, 1 to 15 days in age, the first 10 of them must be # accepted according to the 10-day-rule. The last 5 must be rejected. now = time.time() nowminusXdays = (now-(60*60*24*i+1) for i in range(1, 16)) fses = [FilterItem(modtime=t) for t in nowminusXdays] rules = {"days": 10} a, r = TimeFilter(rules, now).filter(fses) r = list(r) assert len(a) == 10 assert len(r) == 5 for fse in fses[:10]: assert fse in a for fse in fses[10:]: assert fse in r
def test_invalid_object(self): f = TimeFilter(rules={"days": 1}) with raises(AttributeError): # AttributeError: 'NoneType' object has no attribute 'modtime' f.filter([None])
def test_not_iterable(self): f = TimeFilter(rules={"days": 1}) with raises(TypeError): # TypeError: 'NoneType' object is not iterable f.filter(None)