def classify_saves(saves, period_start): """For each (utc, id) in saves, yield (True, (utc, id)) if the save should be kept and (False, (utc, id)) if the save should be removed. The ids are binary hashes. """ def retain_oldest_in_region(region): prev = None for save in region: if prev: yield False, prev prev = save if prev: yield True, prev matches, rest = partition(lambda s: s[0] >= period_start['all'], saves) for save in matches: yield True, save tm_ranges = ((period_start['dailies'], lambda s: localtime(s[0]).tm_yday), (period_start['monthlies'], lambda s: localtime(s[0]).tm_mon), (period_start['yearlies'], lambda s: localtime(s[0]).tm_year)) for pstart, time_region_id in tm_ranges: matches, rest = partition(lambda s: s[0] >= pstart, rest) for region_id, region_saves in groupby(matches, time_region_id): for action in retain_oldest_in_region(region_saves): yield action for save in rest: yield False, save
def classify_saves(saves, period_start): """For each (utc, id) in saves, yield (True, (utc, id)) if the save should be kept and (False, (utc, id)) if the save should be removed. The ids are binary hashes. """ def retain_newest_in_region(region): for save in region[0:1]: yield True, save for save in region[1:]: yield False, save matches, rest = partition(lambda s: s[0] >= period_start['all'], saves) for save in matches: yield True, save tm_ranges = ((period_start['dailies'], lambda s: localtime(s[0]).tm_yday), (period_start['monthlies'], lambda s: localtime(s[0]).tm_mon), (period_start['yearlies'], lambda s: localtime(s[0]).tm_year)) # Break the decreasing utc sorted saves up into the respective # period ranges (dailies, monthlies, ...). Within each range, # group the saves by the period scale (days, months, ...), and # then yield a "keep" action (True, utc) for the newest save in # each group, and a "drop" action (False, utc) for the rest. for pstart, time_region_id in tm_ranges: matches, rest = partition(lambda s: s[0] >= pstart, rest) for region_id, region_saves in groupby(matches, time_region_id): for action in retain_newest_in_region(list(region_saves)): yield action # Finally, drop any saves older than the specified periods for save in rest: yield False, save