def test_set_day_offset_none(self): tr = TimeRange() self.assertIsNone(tr.start) self.assertIsNone(tr.start_org) tr.set_start_day_offset(5) self.assertIsNone(tr.start) self.assertIsNone(tr.start_org)
def test_tzinfo(self): start_dt = datetime(2020, 2, 14, 12, 1, 1, tzinfo=pytz.UTC) end_dt = datetime(2020, 2, 16, 12, 1, 1, tzinfo=pytz.UTC) tz = pytz.timezone("America/New_York") tr = TimeRange(start=start_dt, end=end_dt, tzinfo_=tz) self.assertAlmostEquals(0, abs((start_dt - tr.start).total_seconds()), 0) self.assertAlmostEquals(0, abs((start_dt - tr.start_org).total_seconds()), 0) self.assertAlmostEquals(0, abs((tr.end - end_dt).total_seconds()), 0) self.assertEqual(tz.utcoffset(datetime.now()), tr.tzinfo_.utcoffset(datetime.now())) self.assertTrue(tr.expandable) self.assertFalse(tr.expanded) self.assertFalse(tr.is_inf) self.assertAlmostEquals(48, tr.hr_length_org, 0) self.assertAlmostEquals(48, tr.hr_length, 0) self.assertEqual( f"{start_dt.strftime('%m-%d')} ~ {end_dt.strftime('%m-%d')}", tr.expr_period_short) self.assertAlmostEquals(tr.end_time_seconds, time_to_seconds(localtime(end_dt, tz).time()), 0) prd = tr.get_periods() self.assertListEqual([tr], prd)
def test_mult(self): start_mult_expected = datetime(2020, 4, 12, 1, 1, 1, tzinfo=pytz.UTC) start_dt = datetime(2020, 4, 14, 1, 1, 1, tzinfo=pytz.UTC) end_dt = datetime(2020, 4, 16, 1, 1, 1, tzinfo=pytz.UTC) tr = TimeRange(start=start_dt, end=end_dt, range_mult=2) self.assertAlmostEquals( 0, abs((start_mult_expected - tr.start).total_seconds()), 0) self.assertAlmostEquals(0, abs((start_dt - tr.start_org).total_seconds()), 0) self.assertAlmostEquals(0, abs((tr.end - end_dt).total_seconds()), 0) self.assertIsNone(tr.tzinfo_) self.assertTrue(tr.expandable) self.assertTrue(tr.expanded) self.assertFalse(tr.is_inf) self.assertAlmostEquals(48, tr.hr_length_org, 0) self.assertAlmostEquals(96, tr.hr_length, 0) self.assertEqual( f"{start_mult_expected.strftime('%m-%d')} ~ {end_dt.strftime('%m-%d')}", tr.expr_period_short) self.assertAlmostEquals(tr.end_time_seconds, time_to_seconds(end_dt.time()), 0) prd = tr.get_periods() tr2 = TimeRange(start=start_mult_expected, end=start_dt) tr3 = TimeRange(start=start_dt, end=end_dt) self.assertListEqual([tr2, tr3], prd)
def test_set_day_offset_neg(self): start_dt = datetime(2020, 2, 14, 1, 1, 1, tzinfo=pytz.UTC) tr = TimeRange(start=start_dt) self.assertEqual(start_dt, tr.start) self.assertEqual(start_dt, tr.start_org) tr.set_start_day_offset(-5) self.assertEqual(start_dt, tr.start_org) self.assertEqual(start_dt - timedelta(days=5), tr.start)
def mean_message_count( self, channel_oids: Union[ObjectId, List[ObjectId]], *, hours_within: Optional[int] = None, start: Optional[datetime] = None, end: Optional[datetime] = None, tzinfo_: PytzInfo = UTC.to_tzinfo(), max_mean_days: int = 5) -> \ MeanMessageResultGenerator: match_d = self._channel_oids_filter_(channel_oids) trange = TimeRange(range_hr=hours_within, start=start, end=end, tzinfo_=tzinfo_) trange.set_start_day_offset(-max_mean_days) self._attach_time_range_(match_d, trange=trange) pipeline = [{ "$match": match_d }, { "$group": { "_id": { MeanMessageResultGenerator.KEY_DATE: { "$dateToString": { "date": "$_id", "format": MeanMessageResultGenerator.FMT_DATE, "timezone": tzinfo_.tzidentifier } } }, MeanMessageResultGenerator.KEY_COUNT: { "$sum": 1 } } }, { "$sort": { "_id": pymongo.ASCENDING } }] return MeanMessageResultGenerator(list(self.aggregate(pipeline)), HourlyResult.data_days_collected( self, match_d, hr_range=hours_within, start=trange.start_org, end=end), tzinfo_, trange=trange, max_mean_days=max_mean_days)
def date_list(days_collected, tzinfo, *, start: Optional[datetime] = None, end: Optional[datetime] = None, trange: Optional[TimeRange] = None) -> List[date]: """ Returns the date list within the time range. Disregards ``start`` and ``end`` if ``trange`` is specified. """ ret = [] if not trange: trange = TimeRange(range_hr=days_collected * 24, start=start, end=end, tzinfo_=tzinfo) if trange.is_inf: raise ValueError("TimeRange length is infinity.") for i in range((trange.end.date() - trange.start.date()).days + 1): ret.append(trange.start.date() + timedelta(days=i)) return ret
def _attach_time_range_(filter_: dict, *, hours_within: Optional[int] = None, start: Optional[datetime] = None, end: Optional[datetime] = None, range_mult: Union[int, float] = 1.0, trange: Optional[TimeRange] = None): """ Attach parsed time range to the filter. Data which creation time (generation time of `_id`) is out of the given time range will be filtered out. If `trange` is specified, `hours_within`, `start`, `end`, `range_mult` will be ignored. """ id_filter = {} # Get the time range if not trange: trange = TimeRange( range_hr=hours_within, start=start, end=end, range_mult=range_mult, end_autofill_now=False) gt_oid = dt_to_objectid(trange.start) if trange.start and gt_oid: id_filter["$gt"] = gt_oid lt_oid = dt_to_objectid(trange.end) if trange.end and lt_oid: id_filter["$lt"] = lt_oid # Modifying filter if id_filter: if OID_KEY in filter_: filter_[OID_KEY] = {"$eq": filter_[OID_KEY]} filter_[OID_KEY].update(id_filter) else: filter_[OID_KEY] = id_filter
def test_fill_all_none(self): tr = TimeRange() now = now_utc_aware() self.assertIsNone(tr.start) self.assertIsNone(tr.start_org) self.assertAlmostEquals(0, abs((tr.end - now).total_seconds()), 0) self.assertIsNone(tr.tzinfo_) self.assertFalse(tr.expandable) self.assertFalse(tr.expanded) self.assertTrue(tr.is_inf) self.assertEqual(math.inf, tr.hr_length_org) self.assertEqual(math.inf, tr.hr_length) self.assertEqual(f"- ~ {now.strftime('%m-%d')}", tr.expr_period_short) self.assertAlmostEquals(tr.end_time_seconds, time_to_seconds(now), 0) prd = tr.get_periods() self.assertListEqual([tr], prd)
def trange_ensure_not_inf(days_collected, trange, tzinfo): """Ensure that time range are not :class:`math.inf` length.""" if trange.is_inf: return TimeRange(range_hr=days_collected * 24, start=trange.start, end=trange.end, tzinfo_=tzinfo) else: return trange
def test_set_day_tz_naive(self): start_dt = datetime(2020, 2, 14, 1, 1, 1) end_dt = datetime(2020, 2, 17, 1, 1, 1) tr = TimeRange(start=start_dt, end=end_dt, tzinfo_=pytz.UTC) self.assertFalse(is_tz_naive(tr.start)) self.assertEqual(pytz.UTC.localize(start_dt), tr.start) self.assertFalse(is_tz_naive(tr.end)) self.assertEqual(pytz.UTC.localize(end_dt), tr.end)
def test_end(self): end_dt = datetime(2020, 4, 4, 1, 1, 1, tzinfo=pytz.UTC) tr = TimeRange(end=end_dt) self.assertIsNone(tr.start) self.assertIsNone(tr.start_org) self.assertAlmostEquals(0, abs((tr.end - end_dt).total_seconds()), 0) self.assertIsNone(tr.tzinfo_) self.assertFalse(tr.expandable) self.assertFalse(tr.expanded) self.assertTrue(tr.is_inf) self.assertEqual(math.inf, tr.hr_length_org) self.assertEqual(math.inf, tr.hr_length) self.assertEqual(f"- ~ {end_dt.strftime('%m-%d')}", tr.expr_period_short) self.assertAlmostEquals(tr.end_time_seconds, time_to_seconds(end_dt.time()), 0) prd = tr.get_periods() self.assertListEqual([tr], prd)
def test_nfill_range_only(self): tr = TimeRange(range_hr=120, end_autofill_now=False) now = now_utc_aware() expected_start = now - timedelta(hours=120) self.assertAlmostEquals( 0, abs((expected_start - tr.start).total_seconds()), 0) self.assertAlmostEquals( 0, abs((expected_start - tr.start_org).total_seconds()), 0) self.assertIsNone(tr.end) self.assertIsNone(tr.tzinfo_) self.assertFalse(tr.expandable) self.assertFalse(tr.expanded) self.assertTrue(tr.is_inf) self.assertAlmostEquals(120, tr.hr_length_org, 0) self.assertAlmostEquals(120, tr.hr_length, 0) self.assertEqual(f"{expected_start.strftime('%m-%d')} ~ -", tr.expr_period_short) self.assertAlmostEquals(tr.end_time_seconds, time_to_seconds(now), 0) prd = tr.get_periods() self.assertListEqual([tr], prd)
def test_fill_range_hr_0(self): tr = TimeRange(range_hr=0) now = now_utc_aware() expected_start_end = now self.assertAlmostEquals( 0, abs((expected_start_end - tr.start).total_seconds()), 0) self.assertAlmostEquals( 0, abs((expected_start_end - tr.start_org).total_seconds()), 0) self.assertAlmostEquals(0, abs((tr.end - now).total_seconds()), 0) self.assertIsNone(tr.tzinfo_) self.assertTrue(tr.expandable) self.assertFalse(tr.expanded) self.assertFalse(tr.is_inf) self.assertAlmostEquals(0, tr.hr_length_org, 0) self.assertAlmostEquals(0, tr.hr_length, 0) self.assertEqual( f"{expected_start_end.strftime('%m-%d')} ~ {now.strftime('%m-%d')}", tr.expr_period_short) self.assertAlmostEquals(tr.end_time_seconds, time_to_seconds(now), 0) prd = tr.get_periods() self.assertListEqual([tr], prd)
def test_end_hr_range(self): end_dt = datetime(2020, 4, 4, 1, 1, 1, tzinfo=pytz.UTC) start_dt_expected = end_dt - timedelta(hours=120) tr = TimeRange(end=end_dt, range_hr=120) self.assertAlmostEquals( 0, abs((start_dt_expected - tr.start).total_seconds()), 0) self.assertAlmostEquals( 0, abs((start_dt_expected - tr.start_org).total_seconds()), 0) self.assertAlmostEquals(0, abs((tr.end - end_dt).total_seconds()), 0) self.assertIsNone(tr.tzinfo_) self.assertTrue(tr.expandable) self.assertFalse(tr.expanded) self.assertFalse(tr.is_inf) self.assertAlmostEquals(120, tr.hr_length_org, 0) self.assertAlmostEquals(120, tr.hr_length, 0) self.assertEqual( f"{start_dt_expected.strftime('%m-%d')} ~ {end_dt.strftime('%m-%d')}", tr.expr_period_short) self.assertAlmostEquals(tr.end_time_seconds, time_to_seconds(end_dt.time()), 0) prd = tr.get_periods() self.assertListEqual([tr], prd)
def test_nfill_start(self): start_dt = datetime(2020, 4, 4, 1, 1, 1, tzinfo=pytz.UTC) tr = TimeRange(start=start_dt, end_autofill_now=False) now = now_utc_aware() hr_diff = (now - start_dt).total_seconds() / 3600 self.assertAlmostEquals(0, abs((start_dt - tr.start).total_seconds()), 0) self.assertAlmostEquals(0, abs((start_dt - tr.start_org).total_seconds()), 0) self.assertIsNone(tr.end) self.assertIsNone(tr.tzinfo_) self.assertFalse(tr.expandable) self.assertFalse(tr.expanded) self.assertTrue(tr.is_inf) self.assertAlmostEquals(hr_diff, tr.hr_length_org, 0) self.assertAlmostEquals(hr_diff, tr.hr_length, 0) self.assertEqual(f"{start_dt.strftime('%m-%d')} ~ -", tr.expr_period_short) self.assertAlmostEquals(tr.end_time_seconds, time_to_seconds(now), 0) prd = tr.get_periods() self.assertListEqual([tr], prd)
def data_days_collected(collection, filter_, *, hr_range: Optional[int] = None, start: Optional[datetime] = None, end: Optional[datetime] = None): """ Returns the count of days collected in data. Notice that this is different from ``days_collected`` in ``__init__()`` because this one connects to the database to calculate the actual days collected in the filtered dataset while the one in ``__init__()`` will not be checked and assume that it is true. ``hr_range`` will be ignored if both ``start`` and ``end`` is specified. """ trange = TimeRange(range_hr=hr_range, start=start, end=end, end_autofill_now=False) if trange.is_inf: oldest = collection.find_one(filter_, sort=[(OID_KEY, pymongo.ASCENDING)]) if not oldest: return HourlyResult.DAYS_NONE now = now_utc_aware() if start: start = make_tz_aware(start) if start and start > now: return HourlyResult.DAYS_NONE if end: end = make_tz_aware(end) return max( ((end or now) - ObjectId(oldest[OID_KEY]).generation_time).total_seconds() / 86400, 0) else: return trange.hr_length / 24
def member_daily_message_count( self, channel_oids: Union[ObjectId, List[ObjectId]], *, hours_within: Optional[int] = None, start: Optional[datetime] = None, end: Optional[datetime] = None, tzinfo_: PytzInfo = UTC.to_tzinfo()) -> \ MemberDailyMessageResult: match_d = self._channel_oids_filter_(channel_oids) trange = TimeRange(range_hr=hours_within, start=start, end=end, tzinfo_=tzinfo_) self._attach_time_range_(match_d, trange=trange) pipeline = [{ "$match": match_d }, { "$group": { "_id": { MemberDailyMessageResult.KEY_DATE: { "$dateToString": { "date": "$_id", "format": MemberDailyMessageResult.FMT_DATE, "timezone": tzinfo_.tzidentifier } }, MemberDailyMessageResult.KEY_MEMBER: "$" + MessageRecordModel.UserRootOid.key }, MemberDailyMessageResult.KEY_COUNT: { "$sum": 1 } } }] return MemberDailyMessageResult(list(self.aggregate(pipeline)), HourlyResult.data_days_collected( self, match_d, hr_range=hours_within, start=start, end=end), tzinfo_, trange=trange)
def test_malformed(self): start_dt = datetime(2020, 4, 16, 1, 1, 1, tzinfo=pytz.UTC) end_dt = datetime(2020, 4, 14, 1, 1, 1, tzinfo=pytz.UTC) with self.assertRaises(TimeRangeEndBeforeStart): TimeRange(start=start_dt, end=end_dt)
def message_count_before_time( self, channel_oids: Union[ObjectId, List[ObjectId]], *, hours_within: Optional[int] = None, start: Optional[datetime] = None, end: Optional[datetime] = None, tzinfo_: PytzInfo = UTC.to_tzinfo()) -> \ CountBeforeTimeResult: match_d = self._channel_oids_filter_(channel_oids) trange = TimeRange(range_hr=hours_within, start=start, end=end, tzinfo_=tzinfo_) self._attach_time_range_(match_d, trange=trange) pipeline = [{ "$match": match_d }, { "$project": { CountBeforeTimeResult.KEY_SEC_OF_DAY: { "$add": [{ "$multiply": [{ "$hour": { "date": "$_id", "timezone": tzinfo_.tzidentifier } }, 3600] }, { "$multiply": [{ "$minute": { "date": "$_id", "timezone": tzinfo_.tzidentifier } }, 60] }, { "$second": { "date": "$_id", "timezone": tzinfo_.tzidentifier } }] } } }, { "$match": { CountBeforeTimeResult.KEY_SEC_OF_DAY: { "$lt": trange.end_time_seconds } } }, { "$group": { "_id": { CountBeforeTimeResult.KEY_DATE: { "$dateToString": { "date": "$_id", "format": CountBeforeTimeResult.FMT_DATE, "timezone": tzinfo_.tzidentifier } } }, CountBeforeTimeResult.KEY_COUNT: { "$sum": 1 } } }, { "$sort": { "_id": pymongo.ASCENDING } }] return CountBeforeTimeResult(list(self.aggregate(pipeline)), HourlyResult.data_days_collected( self, match_d, hr_range=hours_within, start=trange.start_org, end=end), tzinfo_, trange=trange)
def get_user_messages_total_count( self, channel_oids: Union[ObjectId, List[ObjectId]], *, hours_within: Optional[int] = None, start: Optional[datetime] = None, end: Optional[datetime] = None, period_count: int = 3, tzinfo_: Optional[tzinfo] = None) \ -> MemberMessageCountResult: match_d = self._channel_oids_filter_(channel_oids) trange = TimeRange(range_hr=hours_within, start=start, end=end, range_mult=period_count, tzinfo_=tzinfo_) self._attach_time_range_(match_d, trange=trange) # $switch expression for time range switch_branches = [] # Check for full range (inf) # `start` and `end` cannot be `None` for generating `ObjectId`, # however `start` and `end` for full range are `None`. if not trange.is_inf: for idx, range_ in enumerate(trange.get_periods()): start_id = dt_to_objectid(range_.start) if not start_id: continue end_id = dt_to_objectid(range_.end) if not end_id: continue switch_branches.append({ "case": { "$and": [{ "$gte": ["$" + OID_KEY, start_id] }, { "$lt": ["$" + OID_KEY, end_id] }] }, "then": str(idx) }) group_key = { MemberMessageCountResult.KEY_MEMBER_ID: "$" + MessageRecordModel.UserRootOid.key } if switch_branches: group_key[MemberMessageCountResult.KEY_INTERVAL_IDX] = { "$switch": { "branches": switch_branches } } aggr_pipeline = [{ "$match": match_d }, { "$group": { OID_KEY: group_key, MemberMessageCountResult.KEY_COUNT: { "$sum": 1 } } }] return MemberMessageCountResult(list(self.aggregate(aggr_pipeline)), period_count, trange)