Beispiel #1
0
    def __repr__(self):
        channel_num, time_slot = self.unpacked
        dt_value, dt_level = decode_timeslot(time_slot)

        return '<%s id=%s channel=%s timeslot=%s %s>' % (
            self.__class__.__name__, self.id, channel_num,
            dt_value.strftime('%Y%m%d.%H'), dt_level)
Beispiel #2
0
def purge_stats(channel):
    "Task to purge topics for current time"

    t0 = datetime.now()
    discard_junk_stats = discard_junk(channel)
    discard_junk_timedelta = datetime.now() - t0
    LOGGER.info(
        "purging summary:: channel: %s; applying discard junk; records number: %s timedelta: %s",
        channel.title, discard_junk_stats, discard_junk_timedelta)

    t0 = datetime.now()
    purged_months, month_topic_stats, month_trend_stats = purge_months(channel)
    purge_months_timedelta = datetime.now() - t0
    LOGGER.info(
        "purging summary:: channel: %s; level: month; purge_months: %s; topics_stats: %s; trends_stats: %s; timedelta: %s",
        channel.title, [decode_timeslot(x) for x in purged_months],
        month_topic_stats, month_trend_stats, purge_months_timedelta)

    t0 = datetime.now()
    purged_days, day_topic_stats, day_trend_stats = purge_days(channel)
    purge_days_timedelta = datetime.now() - t0
    LOGGER.info(
        "purging summary:: channel: %s; level: day; purge_days: %s; topics_stats: %s; trends_stats: %s; timedelta: %s",
        channel.title, [decode_timeslot(x) for x in purged_days],
        day_topic_stats, day_trend_stats, purge_days_timedelta)

    # Update channels last_purge field and save it
    Channel.objects.coll.update({"_id": channel.id}, {'$set': {
        'ld': now()
    }})  # updating last_purged field

    stats = {
        "last_purged": channel.last_purged,
        "purge_months": [decode_timeslot(x) for x in purged_months],
        "purge_days": [decode_timeslot(x) for x in purged_days],
        "month_topic_stats": month_topic_stats,
        "month_trend_stats": month_trend_stats,
        "day_topic_stats": day_topic_stats,
        "day_trend_stats": day_trend_stats,
        "discard_junk_stats": discard_junk_stats,
        "timedeltas": {
            "discard_junk_timedelta": discard_junk_timedelta,
            "purge_months_timedelta": purge_months_timedelta,
            "purge_days_timedelta": purge_days_timedelta,
        }
    }
    return stats
Beispiel #3
0
    def test_purge_none(self):
        TWO_DAYS_AGO = now() - timedelta(days=2)
        self._make_laptops_and_icecream(TWO_DAYS_AGO)
        stats = purge_stats(self.channel)
        last_purged = stats["last_purged"]
        days = stats["purge_days"]
        months = stats["purge_months"]
        self.channel.reload()
        self.assertEqual(
            datetime_to_timeslot(self.channel.last_purged, 'hour'),
            datetime_to_timeslot(last_purged, 'hour'))

        # Should have purged over 15 days for time slots since we never urged before
        self.assertEqual(len(days), 15)
        # Months purged depends on how far in we are to the month when we run the test
        self.assertTrue(len(months) in [2, 3])

        import solariat_bottle.utils.purging

        class MockLocaltime(object):
            tm_mday = 6

        solariat_bottle.utils.purging.localtime = MockLocaltime
        stats = purge_stats(self.channel)
        last_purged = stats["last_purged"]
        days = stats["purge_days"]
        months = stats["purge_months"]
        self.assertEqual(len(days), 1)
        self.assertEqual(days[0],
                         decode_timeslot(Timeslot(level='day').timeslot))
        self.assertEqual(len(months), 0)

        class MockLocaltime(object):
            tm_mday = 8

        solariat_bottle.utils.purging.localtime = MockLocaltime
        stats = purge_stats(self.channel)
        last_purged = stats["last_purged"]
        days = stats["purge_days"]
        months = stats["purge_months"]
        self.assertEqual(len(days), 1)
        self.assertEqual(len(months), 1)
        self.assertEqual(months[0],
                         decode_timeslot(Timeslot(level='month').timeslot))
Beispiel #4
0
def print_db_records():
    # print "Topics:"
    # for row in ChannelHotTopics.objects():
    #     print "{0: ^14s} | {1: ^4s}".format(row.topic, decode_timeslot(row.time_slot))
    # print
    print "Trends:"
    for row in ChannelTopicTrends.objects():
        print u"{0: ^14s} | {1: ^4s}".format(row.topic,
                                             decode_timeslot(row.time_slot))
    print
    print
Beispiel #5
0
def purge_corresponding_trends(channel, timeslot):
    ts_date, ts_level = decode_timeslot(timeslot)
    sub_level = {"month": "day", "day": "hour"}[ts_level]
    range_start = ts_date

    if "month" == ts_level:
        range_end = ts_date + relativedelta(months=1)
    else:
        range_end = ts_date + relativedelta(days=1)

    timeslots_to_purge = list(
        gen_timeslots(range_start, range_end, level=sub_level))[:-1]
    topics = trends_find_topics(timeslot, channel)
    trend_stats = [0, 0, 0]

    total_number = len(timeslots_to_purge)
    for i, ts in enumerate(timeslots_to_purge):
        LOGGER.info(
            'timeslot info: channel: %s; current timeslot "%s"; %sth timeslot of %s timeslots',
            channel.title, decode_timeslot(ts), i, total_number)
        trend_res = mark_and_sweep_trends(channel, ts, topics)
        trend_stats = [x + y for x, y in zip(trend_stats, trend_res)]
    return tuple(trend_stats)
Beispiel #6
0
def trends_find_topics(time_slot, channel_or_tag):
    channel_num = get_channel_num(channel_or_tag)
    # import ipdb; ipdb.set_trace(); assert False
    t0 = datetime.now()
    records = ChannelHotTopics.objects.coll.find({
        F('channel_num'): channel_num,
        F('time_slot'): time_slot,
        # F('gc_counter'): MARKED_TO_KEEP
    })
    LOGGER.info(
        "purging Q:: channel: %s; collection: ChannelHotTopics; func: %s; timedelta: %s"
        % (channel_or_tag.title, inspect.stack()[0][3], datetime.now() - t0))
    topics = [x["tc"] for x in records]
    LOGGER.info("FIND TOPICS RES: %s %s", len(topics),
                decode_timeslot(time_slot))
    return topics
Beispiel #7
0
 def level(self):
     return decode_timeslot(self.time_slot)[1]
 def level(self):
     _, level = decode_timeslot(self.time_slot)
     return level