def test_incr_and_curr_stats(self): """ Check that ichnaea.content.tasks.{incr_stat,get_curr_stat} work on the most-recent-stat associated with a given day, and copy forward each day's stat to the next day, as they go. """ from ichnaea.content.tasks import incr_stat, get_curr_stat session = self.db_master_session today = datetime.utcnow().date() yesterday = (today - timedelta(1)) two_days = (today - timedelta(2)) for stat in ['deleted_cell', 'deleted_wifi']: p = 0 for i in range(1, 10): p += i incr_stat(session, stat, i, two_days) self.assertEqual(get_curr_stat(session, stat, two_days), p) self.assertEqual(get_curr_stat(session, stat, yesterday), p) self.assertEqual(get_curr_stat(session, stat, today), p) self.assertEqual(get_curr_stat(session, stat), p) q = p for i in range(1, 10): q += i incr_stat(session, stat, i, yesterday) self.assertEqual(get_curr_stat(session, stat, two_days), p) self.assertEqual(get_curr_stat(session, stat, yesterday), q) self.assertEqual(get_curr_stat(session, stat, today), q) self.assertEqual(get_curr_stat(session, stat), q) r = q for i in range(1, 10): r += i incr_stat(session, stat, i, today) self.assertEqual(get_curr_stat(session, stat, two_days), p) self.assertEqual(get_curr_stat(session, stat, yesterday), q) self.assertEqual(get_curr_stat(session, stat, today), r) self.assertEqual(get_curr_stat(session, stat), r)
def trim_excessive_data(session, unique_model, measure_model, join_measure, delstat, max_measures, min_age_days, batch): """ Delete measurements of type `measure_model` when, for any given key-field `kname`, there are more than `max_measures` measurements. Avoid deleting any measurements at all younger than `min_age_days`, and only delete measurements from at most `batch` keys per call. Increment the deleted-measurements stat named `delstat` and decrement the `total_measurements` field of the associated `unique_model`, as side effects. """ from ichnaea.content.tasks import incr_stat # generally: only work with rows that are older than a # date threshold, so that we are definitely not interfering # with periodic recent-stat calculations on incoming new data utcnow = datetime.utcnow() age_threshold = utcnow - timedelta(days=min_age_days) age_cond = measure_model.created < age_threshold # initial (fast) query to pull out those uniques that have # total_measures larger than max_measures; will refine this # set of keys subsequently by date-window. query = session.query(unique_model).filter( unique_model.total_measures > max_measures).limit(batch) uniques = query.all() counts = [] # secondarily, refine set of candidate keys by explicitly # counting measurements on each key, within the expiration # date-window. for u in uniques: query = session.query(func.count(measure_model.id)).filter( *join_measure(u)).filter( age_cond) c = query.first() assert c is not None n = int(c[0]) if n > max_measures: counts.append((u, n)) if len(counts) == 0: return 0 # finally, for each definitely over-measured key, find a # cutoff row and trim measurements to it for (u, count) in counts: # determine the oldest measure (smallest (date,id) pair) to # keep for each key start = count - max_measures (smallest_date_to_keep, smallest_id_to_keep) = session.query( measure_model.time, measure_model.id).filter( *join_measure(u)).filter( age_cond).order_by( measure_model.time, measure_model.id).slice(start, count).first() # delete measures with (date,id) less than that, so long as they're # older than the date window. n = session.query( measure_model).filter( *join_measure(u)).filter( age_cond).filter( measure_model.time <= smallest_date_to_keep).filter( measure_model.id < smallest_id_to_keep).delete() # decrement model.total_measures; increment stats[delstat] assert u.total_measures >= 0 u.total_measures -= n # if there's a lot of unprocessed new measures, forget them # and only retain the ones we still have the underlying measures for if u.new_measures > u.total_measures: u.new_measures = u.total_measures incr_stat(session, delstat, n) session.commit() return n
def trim_excessive_data(session, unique_model, measure_model, join_measure, delstat, max_measures, min_age_days, batch): """ Delete measurements of type `measure_model` when, for any given key-field `kname`, there are more than `max_measures` measurements. Avoid deleting any measurements at all younger than `min_age_days`, and only delete measurements from at most `batch` keys per call. Increment the deleted-measurements stat named `delstat` and decrement the `total_measurements` field of the associated `unique_model`, as side effects. """ from ichnaea.content.tasks import incr_stat # generally: only work with rows that are older than a # date threshold, so that we are definitely not interfering # with periodic recent-stat calculations on incoming new data utcnow = datetime.utcnow() age_threshold = utcnow - timedelta(days=min_age_days) age_cond = measure_model.created < age_threshold # initial (fast) query to pull out those uniques that have # total_measures larger than max_measures; will refine this # set of keys subsequently by date-window. query = session.query(unique_model).filter( unique_model.total_measures > max_measures).limit(batch) uniques = query.all() counts = [] # secondarily, refine set of candidate keys by explicitly # counting measurements on each key, within the expiration # date-window. for u in uniques: query = session.query(func.count( measure_model.id)).filter(*join_measure(u)).filter(age_cond) c = query.first() assert c is not None n = int(c[0]) if n > max_measures: counts.append((u, n)) if len(counts) == 0: return 0 # finally, for each definitely over-measured key, find a # cutoff row and trim measurements to it for (u, count) in counts: # determine the oldest measure (smallest (date,id) pair) to # keep for each key start = count - max_measures (smallest_date_to_keep, smallest_id_to_keep) = session.query( measure_model.time, measure_model.id).filter( *join_measure(u)).filter(age_cond).order_by( measure_model.time, measure_model.id).slice(start, count).first() # delete measures with (date,id) less than that, so long as they're # older than the date window. n = session.query(measure_model).filter( *join_measure(u)).filter(age_cond).filter( measure_model.time <= smallest_date_to_keep).filter( measure_model.id < smallest_id_to_keep).delete() # decrement model.total_measures; increment stats[delstat] assert u.total_measures >= 0 u.total_measures -= n # if there's a lot of unprocessed new measures, forget them # and only retain the ones we still have the underlying measures for if u.new_measures > u.total_measures: u.new_measures = u.total_measures incr_stat(session, delstat, n) session.commit() return n