def __repr__(self): channel_num, time_slot = self.unpacked dt_value, dt_level = decode_timeslot(time_slot) return '<%s id=%s channel=%s timeslot=%s %s>' % ( self.__class__.__name__, self.id, channel_num, dt_value.strftime('%Y%m%d.%H'), dt_level)
def purge_stats(channel): "Task to purge topics for current time" t0 = datetime.now() discard_junk_stats = discard_junk(channel) discard_junk_timedelta = datetime.now() - t0 LOGGER.info( "purging summary:: channel: %s; applying discard junk; records number: %s timedelta: %s", channel.title, discard_junk_stats, discard_junk_timedelta) t0 = datetime.now() purged_months, month_topic_stats, month_trend_stats = purge_months(channel) purge_months_timedelta = datetime.now() - t0 LOGGER.info( "purging summary:: channel: %s; level: month; purge_months: %s; topics_stats: %s; trends_stats: %s; timedelta: %s", channel.title, [decode_timeslot(x) for x in purged_months], month_topic_stats, month_trend_stats, purge_months_timedelta) t0 = datetime.now() purged_days, day_topic_stats, day_trend_stats = purge_days(channel) purge_days_timedelta = datetime.now() - t0 LOGGER.info( "purging summary:: channel: %s; level: day; purge_days: %s; topics_stats: %s; trends_stats: %s; timedelta: %s", channel.title, [decode_timeslot(x) for x in purged_days], day_topic_stats, day_trend_stats, purge_days_timedelta) # Update channels last_purge field and save it Channel.objects.coll.update({"_id": channel.id}, {'$set': { 'ld': now() }}) # updating last_purged field stats = { "last_purged": channel.last_purged, "purge_months": [decode_timeslot(x) for x in purged_months], "purge_days": [decode_timeslot(x) for x in purged_days], "month_topic_stats": month_topic_stats, "month_trend_stats": month_trend_stats, "day_topic_stats": day_topic_stats, "day_trend_stats": day_trend_stats, "discard_junk_stats": discard_junk_stats, "timedeltas": { "discard_junk_timedelta": discard_junk_timedelta, "purge_months_timedelta": purge_months_timedelta, "purge_days_timedelta": purge_days_timedelta, } } return stats
def test_purge_none(self): TWO_DAYS_AGO = now() - timedelta(days=2) self._make_laptops_and_icecream(TWO_DAYS_AGO) stats = purge_stats(self.channel) last_purged = stats["last_purged"] days = stats["purge_days"] months = stats["purge_months"] self.channel.reload() self.assertEqual( datetime_to_timeslot(self.channel.last_purged, 'hour'), datetime_to_timeslot(last_purged, 'hour')) # Should have purged over 15 days for time slots since we never urged before self.assertEqual(len(days), 15) # Months purged depends on how far in we are to the month when we run the test self.assertTrue(len(months) in [2, 3]) import solariat_bottle.utils.purging class MockLocaltime(object): tm_mday = 6 solariat_bottle.utils.purging.localtime = MockLocaltime stats = purge_stats(self.channel) last_purged = stats["last_purged"] days = stats["purge_days"] months = stats["purge_months"] self.assertEqual(len(days), 1) self.assertEqual(days[0], decode_timeslot(Timeslot(level='day').timeslot)) self.assertEqual(len(months), 0) class MockLocaltime(object): tm_mday = 8 solariat_bottle.utils.purging.localtime = MockLocaltime stats = purge_stats(self.channel) last_purged = stats["last_purged"] days = stats["purge_days"] months = stats["purge_months"] self.assertEqual(len(days), 1) self.assertEqual(len(months), 1) self.assertEqual(months[0], decode_timeslot(Timeslot(level='month').timeslot))
def print_db_records(): # print "Topics:" # for row in ChannelHotTopics.objects(): # print "{0: ^14s} | {1: ^4s}".format(row.topic, decode_timeslot(row.time_slot)) # print print "Trends:" for row in ChannelTopicTrends.objects(): print u"{0: ^14s} | {1: ^4s}".format(row.topic, decode_timeslot(row.time_slot)) print print
def purge_corresponding_trends(channel, timeslot): ts_date, ts_level = decode_timeslot(timeslot) sub_level = {"month": "day", "day": "hour"}[ts_level] range_start = ts_date if "month" == ts_level: range_end = ts_date + relativedelta(months=1) else: range_end = ts_date + relativedelta(days=1) timeslots_to_purge = list( gen_timeslots(range_start, range_end, level=sub_level))[:-1] topics = trends_find_topics(timeslot, channel) trend_stats = [0, 0, 0] total_number = len(timeslots_to_purge) for i, ts in enumerate(timeslots_to_purge): LOGGER.info( 'timeslot info: channel: %s; current timeslot "%s"; %sth timeslot of %s timeslots', channel.title, decode_timeslot(ts), i, total_number) trend_res = mark_and_sweep_trends(channel, ts, topics) trend_stats = [x + y for x, y in zip(trend_stats, trend_res)] return tuple(trend_stats)
def trends_find_topics(time_slot, channel_or_tag): channel_num = get_channel_num(channel_or_tag) # import ipdb; ipdb.set_trace(); assert False t0 = datetime.now() records = ChannelHotTopics.objects.coll.find({ F('channel_num'): channel_num, F('time_slot'): time_slot, # F('gc_counter'): MARKED_TO_KEEP }) LOGGER.info( "purging Q:: channel: %s; collection: ChannelHotTopics; func: %s; timedelta: %s" % (channel_or_tag.title, inspect.stack()[0][3], datetime.now() - t0)) topics = [x["tc"] for x in records] LOGGER.info("FIND TOPICS RES: %s %s", len(topics), decode_timeslot(time_slot)) return topics
def level(self): return decode_timeslot(self.time_slot)[1]
def level(self): _, level = decode_timeslot(self.time_slot) return level