예제 #1
0
 def test_reftime(self):
     t = time.time()
     f = TimeFilter(rules={"days": 1}, reftime=t)
     assert f.reftime == t
     time.sleep(SHORTTIME)
     f = TimeFilter(rules={"days": 1})
     assert f.reftime > t
예제 #2
0
    def test_multicat_rules_yield_union_of_singlecat_rules(self):
        N = 10
        MAXCOUNT = 16

        def rndcount(): return randint(1, MAXCOUNT)

        for _ in range(N):
            rules = {
                "years": rndcount(),
                "months": rndcount(),
                "weeks": rndcount(),
                "days": rndcount(),
                "hours": rndcount(),
                "recent": rndcount()
            }

            single_results = set()
            for category, timecount in rules.iteritems():
                single_rule = dict.fromkeys(rules, 0)
                single_rule[category] = timecount
                a, _ = TimeFilter(single_rule, self.now).filter(self.fses)
                assert len(a) == timecount
                single_results.update(a)

            multi_result, _ = TimeFilter(rules, self.now).filter(self.fses)
            assert len(multi_result) == len(single_results)
            assert set(multi_result) == single_results
예제 #3
0
    def test_periodic_filtering_is_stable(self):
        start = datetime(2016, 1, 1, 23, 0)
        items = []

        # create items every 57 minutes, old enough, so they don't fall in the
        # 'recent' category. Choose the times so that they initially fall into
        # separate buckets when the timefilter is run at start.
        previous = start - timedelta(hours=3, minutes=20)
        for _ in range(3):
            items.append(FilterItem(moddate=previous))
            previous = previous - timedelta(minutes=57)

        rules = { "hours": 50 } # within the test period, everything should be
                                # retained.

        # check precondition: when filtered at start, each item falls ito a
        # separate bucket -- in other words, items is what could have been left
        # by the 50-hours rule run at start time, even though the individual
        # items are less than 60 minutes apart.
        a, _ = TimeFilter(rules, start).filter(items)
        assert len(a) == len(items)

        # check stability: given that the oldest item is far from being dropped
        # (way newer than 50 hours), every run within the following hour should
        # leave what was left by the first run.
        current = start
        for _ in range(60):
            a, _ = TimeFilter(rules, current).filter(items)
            assert len(a) == len(items)
            current = current + timedelta(minutes=1)
예제 #4
0
 def test_requesting_less_than_available_retrieves_most_recent(self):
     for category, fses in self.fses10.iteritems():
         f = TimeFilter({category: 5}, self.reftime)
         a, r = f.filter(fses)
         assert len(a) == 5
         assert len(r) == 5
         def moddates(fses): return map(lambda fse: fse.moddate, fses)
         assert min(moddates(a)) > max(moddates(r))
예제 #5
0
 def test_requesting_one_retrieves_most_recent(self):
     for category, fses in self.fses10.iteritems():
         f = TimeFilter({category: 1}, self.reftime)
         a, r = f.filter(fses)
         assert len(a) == 1
         def moddates(fses): return map(lambda fse: fse.moddate, fses)
         assert a[0].moddate == max(moddates(fses))
         assert len(r) == 9
         assert a[0] not in r
예제 #6
0
 def test_hours_one_accepted_one_rejected(self):
     f = TimeFilter(rules={"hours": 1})
     fse1 = FilterItem(modtime=time.time()-60*60*1.5)
     fse2 = FilterItem(modtime=time.time()-60*60*1.6)
     a, r = f.filter(objs=[fse1, fse2])
     r = list(r)
     # The younger one must be accepted.
     assert a[0] == fse1
     assert len(a) == 1
     assert r[0] == fse2
     assert len(r) == 1
예제 #7
0
 def test_minimal_functionality_and_types(self):
     # Create filter with reftime self.reftime
     f = TimeFilter(rules={"hours": 1}, reftime=self.reftime)
     # Create mock that is 1.5 hours old. Must end up in accepted list,
     # since it's 1 hour old and one item should be kept from the 1-hour-
     # old-category
     fse = FilterItem(moddate=self.reftime-timedelta(hours=1.5))
     a, r = f.filter([fse])
     # http://stackoverflow.com/a/1952655/145400
     assert isinstance(a, collections.Iterable)
     assert isinstance(r, collections.Iterable)
     assert a[0] == fse
     assert len(r) == 0
예제 #8
0
 def test_minimal_functionality_and_types(self):
     # Create filter with reftime NOW (if not specified otherwise)
     # and simple rules.
     f = TimeFilter(rules={"hours": 1})
     # Create mock that is 1.5 hours old. Must end up in accepted list,
     # since it's 1 hour old and one item should be kept from the 1-hour-
     # old-category
     fse = FilterItem(modtime=time.time()-60*60*1.5)
     a, r = f.filter(objs=[fse])
     # http://stackoverflow.com/a/1952655/145400
     assert isinstance(a, collections.Iterable)
     assert isinstance(r, collections.Iterable)
     assert a[0] == fse
     # Rejected list `r` is expected to be an interator, so convert to
     # list before evaluating length.
     assert len(list(r)) == 0
예제 #9
0
 def test_request_less_than_available_distant(self):
     # only distant items are present (at the beginning of the rule period)
     fses = [self.yearsago[10], self.yearsago[9]]
     rules = {"years": 10}
     a, r = TimeFilter(rules, self.reftime).filter(fses)
     assert set(a) == set(fses)
     assert len(r) == 0
예제 #10
0
 def test_10_days_2_weeks(self):
     # Further define category 'overlap' behavior. {"days": 10, "weeks": 2}
     # -> week 0 is included in the 10 days, week 1 is only partially
     # included in the 10 days, and week 2 (14 days and older) is not
     # included in the 10 days.
     # Having 15 FSEs, 1 to 15 days in age, the first 10 of them must be
     # accepted according to the 10-day-rule. According to the 2-weeks-rule,
     # the 7th and 14th FSEs must be accepted. The 7th FSE is included in
     # the first 10, so items 1-10 and 14 are the accepted ones.
     now = datetime(2016, 1, 3)
     nowminusXdays = (now - timedelta(days=i)
                      for i in range(1, 16))
     fses = [FilterItem(moddate=d) for d in nowminusXdays]
     rules = {"days": 10, "weeks": 2}
     a, r = TimeFilter(rules, now).filter(fses)
     r = list(r)
     assert len(a) == 11
     # Check if first 11 fses are in accepted list (order can be predicted
     # according to current implementation, but should not be tested, as it
     # is not guaranteed according to the current specification).
     for fse in fses[:10]:
         assert fse in a
     # Check if 14th FSE is accepted.
     assert fses[13] in a
     # Check if FSEs 12, 13, 15 are rejected.
     assert len(r) == 4
     for i in (10, 11, 12, 14):
         assert fses[i] in r
예제 #11
0
def main():
    t0 = time.time()
    now = datetime.now()
    fses = list(fsegen(ref=now, N_per_cat=5 * 10**4, max_timecount=9))
    shuffle(fses)
    nbr_fses = len(fses)
    n = 8
    rules = {
        "years": n,
        "months": n,
        "weeks": n,
        "days": n,
        "hours": n,
        "recent": n
    }
    sduration = time.time() - t0
    log.info("Setup duration: %.3f s", sduration)
    log.info("Profiling...")
    pr = cProfile.Profile()
    pr.enable()
    a, r = TimeFilter(rules, now).filter(fses)
    pr.disable()
    s = StringIO.StringIO()
    ps = pstats.Stats(pr, stream=s).sort_stats('time')
    ps.print_stats(20)
    print s.getvalue()
예제 #12
0
 def test_request_less_than_available_close(self):
     # only close items are present (at the end of the rule period)
     fses = [self.yearsago[1], self.yearsago[2]]
     rules = {"years": 10}
     a, r = TimeFilter(rules, self.reftime).filter(fses)
     assert set(a) == set(fses)
     assert len(r) == 0
예제 #13
0
 def test_10_days_2_weeks(self):
     # Further define category 'overlap' behavior. {"days": 10, "weeks": 2}
     # -> week 0 is included in the 10 days, week 1 is only partially
     # included in the 10 days, and week 2 (14 days and older) is not
     # included in the 10 days.
     # Having 15 FSEs, 1 to 15 days in age, the first 10 of them must be
     # accepted according to the 10-day-rule. The 11th, 12th, 13th FSE (11,
     # 12, 13 days old) are categorized as 1 week old (their age A fulfills
     # 7 days <= A < 14 days). According to the 2-weeks-rule, the most
     # recent 1-week-old not affected by younger categories has to be
     # accepted, which is the 11th FSE. Also according to the 2-weeks-rule,
     # the most recent 2-week-old (not affected by a younger category, this
     # is always condition) has to be accepted, which is the 14th FSE.
     # In total FSEs 1-11,14 must be accepted, i.e. 12 FSEs. 15 FSEs are
     # used as input (1-15 days old), i.e. 3 are to be rejected (FSEs 12,
     # 13, 15).
     now = time.time()
     nowminusXdays = (now-(60*60*24*i+1) for i in range(1, 16))
     fses = [FilterItem(modtime=t) for t in nowminusXdays]
     rules = {"days": 10, "weeks": 2}
     a, r = TimeFilter(rules, now).filter(fses)
     r = list(r)
     assert len(a) == 12
     # Check if first 11 fses are in accepted list (order can be predicted
     # according to current implementation, but should not be tested, as it
     # is not guaranteed according to the current specification).
     for fse in fses[:11]:
         assert fse in a
     # Check if 14th FSE is accepted.
     assert fses[13] in a
     # Check if FSEs 12, 13, 15 are rejected.
     assert len(r) == 3
     for i in (11, 12, 14):
         assert fses[i] in r
예제 #14
0
 def test_1_recent_1_years(self):
     rules = {
         "years": 1,
         "recent": 1
         }
     a, _ = TimeFilter(rules, self.now).filter(self.fses)
     assert len(a) == 2
예제 #15
0
 def test_10_days_order(self):
     # Having 15 FSEs, 1 to 15 days in age, the first 10 of them must be
     # accepted according to the 10-day-rule. The last 5 must be rejected.
     # This test is focused on the right internal ordering when making the
     # decision to accept or reject an item. The newest ones are expected to
     # be accepted, while the oldest ones are expected to be rejected.
     # In order to test robustness against input order, the list of mock
     # FSEs is shuffled before filtering. The filtering and checks are
     # repeated a couple of times.
     # It is tested whether all of the youngest 10 FSEs are accepted. It is
     # not tested if these 10 FSEs have a certain order within the accepted-
     # list, because we don't make any guarantees about the
     # accepted-internal ordering.
     now = time.time()
     nowminusXdays = (now-(60*60*24*i+1) for i in range(1, 16))
     fses = [FilterItem(modtime=t) for t in nowminusXdays]
     rules = {"days": 10}
     shuffledfses = fses[:]
     for _ in range(100):
         shuffle(shuffledfses)
         a, r = TimeFilter(rules, now).filter(shuffledfses)
         r = list(r)
         assert len(a) == 10
         assert len(r) == 5
         for fse in fses[:10]:
             assert fse in a
         for fse in fses[10:]:
             assert fse in r
예제 #16
0
    def test_requesting_newer_than_available_retrieves_none(self):
        # excluding the "recent" category which will always accept the newest N
        # items.
        categories = ("hours", "days", "weeks", "months", "years")
        # generate items 6-10 per category, in reverse order to increase
        # the chance of discovering order dependencies in the filter.
        fses10to6 = {cat : sorted(self.fses10[cat],
                                  key=lambda x: x.moddate,
                                  reverse=True)[5:]
                     for cat in categories}

        # now ask for the first 5 items of each category.
        for category, fses in fses10to6.iteritems():
            f = TimeFilter({category: 5}, self.reftime)
            a, r = f.filter(fses)
            assert len(a) == 0
            assert set(r) == set(fses)
예제 #17
0
 def test_1_recent_1_years(self):
     rules = {
         "years": 1,
         "recent": 1
         }
     a, r = TimeFilter(rules, self.now).filter(self.fses9)
     assert len(a) == 2
     assert len(list(r)) == self.N*6 - 2
예제 #18
0
 def test_create_recent_allow_old(self):
     now = time.time()
     nowminusXseconds = (now - (i + 1) for i in range(1, 16))
     fses = [FilterItem(modtime=t) for t in nowminusXseconds]
     rules = {"years": 1}
     a, r = TimeFilter(rules, now).filter(fses)
     r = list(r)
     assert len(a) == 0
     assert len(r) == 15
예제 #19
0
 def test_create_recent_dont_request_recent(self):
     # Create a few young items (recent ones). Then don't request any.
     now = time.time()
     nowminusXseconds = (now - (i + 1) for i in range(1, 16))
     fses = [FilterItem(modtime=t) for t in nowminusXseconds]
     rules = {"years": 1, "recent": 0}
     a, r = TimeFilter(rules, now).filter(fses)
     r = list(r)
     assert len(a) == 0
     assert len(r) == 15
예제 #20
0
    def test_overlapping_rules_dont_accept_additional_items(self):
        # check first rule: 24 hours, overlapping one day
        rules = { "hours": 24 }
        ref_time = datetime(2016, 1, 1)
        moddates = (ref_time - timedelta(hours=i)
                    for i in range(1, 29))
        items = [FilterItem(moddate=d) for d in moddates]
        a, _ = TimeFilter(rules, ref_time).filter(items)
        # expect the first 24 items to be accepted
        assert len(a) == 24
        assert set(a) == set(items[:24])

        # combine with an overlapping "days1" rule
        rules = { "hours":  24, "days": 1 }
        a, _ = TimeFilter(rules, ref_time).filter(items)
        # the result shouldn't change: the most recent 1-day old item is
        # the same as the most recent 24-hour old item
        assert len(a) == 24
        assert set(a) == set(items[:24])
예제 #21
0
    def test_requesting_newer_categories_than_available_retrieves_none(self):
        categories = ("years", "months", "weeks", "days", "hours", "recent")
        fses = [self.fses4[cat] for cat in categories]

        for n in range(1, len(categories)):
            older_fses = list(chain.from_iterable(islice(fses, n)))

            rules = {categories[n]: 4}
            a, r = TimeFilter(rules, self.reftime).filter(older_fses)
            assert len(a) == 0
            assert set(r) == set(older_fses)
예제 #22
0
 def test_create_old_allow_recent(self):
     # Create a few old items, between 1 and 15 years. Then only request one
     # recent item. This discovered a mean bug, where items to be rejected
     # ended up in the recent category.
     now = time.time()
     nowminusXyears = (now-(60*60*24*365 * i + 1) for i in range(1, 16))
     fses = [FilterItem(modtime=t) for t in nowminusXyears]
     rules = {"recent": 1}
     a, r = TimeFilter(rules, now).filter(fses)
     r = list(r)
     assert len(a) == 0
     assert len(r) == 15
예제 #23
0
 def test_2_years_2_allowed(self):
     # Request to keep more than available.
     # Produce one 1 year old, one 2 year old, keep 10 years.
     nowminus10years = time.time() - (60*60*24*365 * 2 + 1)
     nowminus09years = time.time() - (60*60*24*365 * 1 + 1)
     fse1 = FilterItem(modtime=nowminus10years)
     fse2 = FilterItem(modtime=nowminus09years)
     a, r = TimeFilter(rules={"years": 2}).filter(objs=[fse1, fse2])
     r = list(r)
     # All should be accepted.
     assert len(a) == 2
     assert len(r) == 0
예제 #24
0
 def test_singlecat_rules(self):
     n = 8
     ryears = {"years": n}
     rmonths = {"months": n}
     rweeks = {"weeks": n}
     rdays = {"days": n}
     rhours = {"hours": n}
     rrecent = {"recent": n}
     # Run single-category filter on these fses.
     for rules in (ryears, rmonths, rweeks, rdays, rhours, rrecent):
         a, r = TimeFilter(rules, self.now).filter(self.fses)
         assert len(a) == n
         assert len(list(r)) == len(self.fses) - n
예제 #25
0
 def test_realistic_scheme(self):
     rules = {
         "years": 4,
         "months": 11,
         "weeks": 6,
         "days": 10,
         "hours": 48,
         "recent": 5
         }
     a, _ = TimeFilter(rules, self.now).filter(self.fses)
     # 4+11+6+10+48+5 = 84; there is 1 reducing overlap between days and weeks
     # and two more between hours and days -> 81 accepted items are expected.
     assert len(a) == 81
예제 #26
0
 def test_two_recent(self):
     fse1 = FilterItem(modtime=time.time())
     time.sleep(SHORTTIME)
     fse2 = FilterItem(modtime=time.time())
     # fse2 is a little younger than fse1.
     time.sleep(SHORTTIME) # Make sure ref is newer than fse2.modtime.
     a, r = TimeFilter(rules={"recent": 1}).filter(objs=[fse1, fse2])
     r = list(r)
     # The younger one must be accepted.
     assert a[0] == fse2
     assert len(a) == 1
     assert r[0] == fse1
     assert len(r) == 1
예제 #27
0
 def test_fixed_rules_week_month_overlap(self):
     n = 8
     rules = {
         "years": n,
         "months": n,
         "weeks": n,
         "days": n,
         "hours": n,
         "recent": n
         }
     # See test_random_times_mass_singlecat_rules for likelihood discussion.
     # The rules say that we want 8 items accepted of each time category.
     # There are two time categories with a 'reducing overlap' in this case:
     # weeks and months. All other category pairs do not overlap at all or
     # overlap without reduction. Explanation/specification:
     # 8 hours:
     #   'Younger' categories can steal from older ones. The 'recent'
     #   cat cannot steal anything:
     #   -> 8 items expected for the hours category.
     #   category. 8 hours have no overlap with days (8 hours are 0 days),
     #   so the hours category cannot steal from the days category
     #   -> 8 items expected for the days category.
     # 8 days:
     #   day 7 and 8 could be categorized as 1 week, but become categorized
     #   within the days dict (7 and 8 days are requested per days-rule).
     #   Non-reducing overlap: 9 to 13 days are categorized as 1 week, which
     #   is requested, and 9-day-old items actually are in the data set.
     #   They are not stolen by younger categories (than week) and end up
     #   in the 1-week-list.
     #   -> 8 items expected from the weeks category.
     # 8 weeks:
     #   1-month-olds are all stolen by the 8-weeks-rule.
     #   Items of age 8 weeks, i.e. 8*7 days = 56 days could be categorized
     #   as 1 month, but become categorized within the weeks dictionary
     #   (8 weeks old, which is requested per weeks-rule).
     #   Reducing overlap: 9-week-old items in the data set, which are not
     #   requested per weeks-rule are 9*7 days = 63 days old, i.e. 2 months
     #   (2 months are 2*30 days = 60 days). These 2-month-old items are
     #   not affected by younger data sets (than months), so they end up in
     #   the 2-months-list.
     #   -> In other words: there is no 1-month-list, since items of these
     #   ages are *entirely* consumed by the weeks-rule. The oldest item
     #   classified as 8 weeks old is already 2 months old:
     #   8.99~ weeks == 62.00~ days > 60 days == 2 months.
     #   -> the months-rule returns only 7 items (not 8, like the others)
     # 8 months:
     #   no overlap with years (0 years for all requested months)
     a, r = TimeFilter(rules, self.now).filter(self.fses9)
     # 8 items for all categories except for months (7 items expected).
     assert len(a) == 6*8-1
     assert len(list(r)) == self.N*6 - (6*8-1)
예제 #28
0
 def test_realistic_scheme(self):
     rules = {
         "years": 4,
         "months": 12,
         "weeks": 6,
         "days": 10,
         "hours": 48,
         "recent": 5
         }
     a, r = TimeFilter(rules, self.now).filter(self.fses62)
     # 4+12+6+10+48+5 = 85; there is 1 reducing overlap between hours and
     # days -> 84 accepted items are expected.
     assert len(a) == 84
     assert len(list(r)) == self.N*6 - 84
예제 #29
0
 def test_2_recent_10_allowed(self):
     # Request to keep more than available.
     fse1 = FilterItem(modtime=time.time())
     time.sleep(SHORTTIME)
     fse2 = FilterItem(modtime=time.time())
     time.sleep(SHORTTIME)
     a, r = TimeFilter(rules={"recent": 10}).filter(objs=[fse1, fse2])
     r = list(r)
     # All should be accepted. Within `recent` category,
     # items must be sorted by modtime, with the newest element being the
     # last element.
     assert a[0] == fse1
     assert a[1] == fse2
     assert len(a) == 2
     assert len(r) == 0
예제 #30
0
 def test_10_days_overlap(self):
     # Category 'overlap' must be possible (10 days > 1 week).
     # Having 15 FSEs, 1 to 15 days in age, the first 10 of them must be
     # accepted according to the 10-day-rule. The last 5 must be rejected.
     now = time.time()
     nowminusXdays = (now-(60*60*24*i+1) for i in range(1, 16))
     fses = [FilterItem(modtime=t) for t in nowminusXdays]
     rules = {"days": 10}
     a, r = TimeFilter(rules, now).filter(fses)
     r = list(r)
     assert len(a) == 10
     assert len(r) == 5
     for fse in fses[:10]:
         assert fse in a
     for fse in fses[10:]:
         assert fse in r
예제 #31
0
 def test_invalid_object(self):
     f = TimeFilter(rules={"days": 1})
     with raises(AttributeError):
         # AttributeError: 'NoneType' object has no attribute 'modtime'
         f.filter([None])
예제 #32
0
 def test_not_iterable(self):
     f = TimeFilter(rules={"days": 1})
     with raises(TypeError):
         # TypeError: 'NoneType' object is not iterable
         f.filter(None)