def test_bit_operations(): delete_all_events() now = datetime.utcnow() last_month = datetime.utcnow() - timedelta(days=30) # 123 has been active for two months mark_event('active', 123, now=now) mark_event('active', 123, now=last_month) # 224 has only been active last_month mark_event('active', 224, now=last_month) # Assert basic premises assert MonthEvents('active', last_month.year, last_month.month).get_count() == 2 assert MonthEvents('active', now.year, now.month).get_count() == 1 # Try out with bit AND operation active_2_months = BitOpAnd( MonthEvents('active', last_month.year, last_month.month), MonthEvents('active', now.year, now.month) ) assert active_2_months.get_count() == 1 assert 123 in active_2_months assert 224 not in active_2_months # Try out with bit OR operation assert BitOpOr( MonthEvents('active', last_month.year, last_month.month), MonthEvents('active', now.year, now.month) ).get_count() == 2 # Try out with a different system active_2_months = BitOpAnd( 'default_copy', MonthEvents('active', last_month.year, last_month.month), MonthEvents('active', now.year, now.month), ) assert active_2_months.get_count() == 1 assert active_2_months.system == 'default_copy' # Try nested operations active_2_months = BitOpAnd( BitOpAnd( MonthEvents('active', last_month.year, last_month.month), MonthEvents('active', now.year, now.month) ), MonthEvents('active', now.year, now.month) ) assert 123 in active_2_months assert 224 not in active_2_months
def chain_events(base_event_name, events_to_chain, now, time_group, system='default'): """ Chain additional events with a base set of events. Note: ``OR`` operators will apply only to their direct predecessors (i.e., ``A && B && C || D`` will be handled as ``A && B && (C || D)``, and ``A && B || C && D`` will be handled as ``A && (B || C) && D``). :param str base_event_name: Name of event to chain additional events to/with :param list events_to_chain: List of additional event names to chain (e.g., ``[{'name': 'user:logged_in', 'op': 'and'}]``) :param datetime now: Time point at which to get event data :param str time_group: Time scale by which to group results; can be `days`, `weeks`, `months`, `years` :param str system: Which bitmapist should be used :returns: Bitmapist events collection """ fn_get_events = _events_fn(time_group) base_event = fn_get_events(base_event_name, now, system) if not base_event.has_events_marked(): return '' if events_to_chain: chain_events = [] # for idx, event_to_chain in enumerate(events_to_chain): for event_to_chain in events_to_chain: event_name = event_to_chain.get('name') chain_event = fn_get_events(event_name, now, system) chain_events.append(chain_event) # Each OR should operate only on its immediate predecessor, e.g., # `A && B && C || D` should be handled as ~ `A && B && (C || D)`, # and # `A && B || C && D` should be handled as ~ `A && (B || C) && D`. op_or_indices = [idx for idx, e in enumerate(events_to_chain) if e['op'] == 'or'] # Work backwards; least impact on operator combos + list indexing for idx in reversed(op_or_indices): # If first of events to chain, OR will just operate on base event if idx > 0: prev_event = chain_events[idx - 1] or_event = chain_events.pop(idx) # OR events should not be re-chained below events_to_chain.pop(idx) chain_events[idx - 1] = BitOpOr(prev_event, or_event) for idx, name_and_op in enumerate(events_to_chain): if name_and_op.get('op') == 'or': base_event = BitOpOr(base_event, chain_events[idx]) else: base_event = BitOpAnd(base_event, chain_events[idx]) return base_event
def test_bitop_key_sharing(): today = datetime.utcnow() mark_event('task1', 111, now=today) mark_event('task2', 111, now=today) mark_event('task1', 222, now=today) mark_event('task2', 222, now=today) ev1_task1 = DayEvents('task1', today.year, today.month, today.day) ev1_task2 = DayEvents('task2', today.year, today.month, today.day) ev1_both = BitOpAnd(ev1_task1, ev1_task2) ev2_task1 = DayEvents('task1', today.year, today.month, today.day) ev2_task2 = DayEvents('task2', today.year, today.month, today.day) ev2_both = BitOpAnd(ev2_task1, ev2_task2) assert ev1_both.redis_key == ev2_both.redis_key assert len(ev1_both) == len(ev1_both) == 2 ev1_both.delete() assert len(ev1_both) == len(ev1_both) == 0
def test_bit_operations_complex(): now = datetime.utcnow() tom = now + timedelta(days=1) mark_event('task1', 111, now=now) mark_event('task1', 111, now=tom) mark_event('task2', 111, now=now) mark_event('task2', 111, now=tom) mark_event('task1', 222, now=now) mark_event('task1', 222, now=tom) mark_event('task2', 222, now=now) mark_event('task2', 222, now=tom) now_events = BitOpAnd(DayEvents('task1', now.year, now.month, now.day), DayEvents('task2', now.year, now.month, now.day)) tom_events = BitOpAnd(DayEvents('task1', tom.year, tom.month, tom.day), DayEvents('task2', tom.year, tom.month, tom.day)) both_events = BitOpAnd(now_events, tom_events) assert len(now_events) == len(tom_events) assert len(now_events) == len(both_events)
def get_dates_data(select1, select2, select3, time_group='days', system='default', as_precent=1, num_results=25): """ Fetch the data from bitmapist. :param :select1 First filter (could be `active`) :param :select2 Second filter (could be `song:played`) :param :select3 Second filter (could be `song:played`, optional) :param :time_group What is the data grouped by? Can be `days`, `weeks`, `months`, `years` :param :system What bitmapist should be used? :param :as_precent If `True` then percents as calculated and shown. Defaults to `True` :return A list of day data, formated like `[[datetime, count], ...]` """ num_results = int(num_results) # Days if time_group == 'days': fn_get_events = _day_events_fn date_range = num_results now = datetime.utcnow() - timedelta(days=num_results-1) timedelta_inc = lambda d: timedelta(days=d) # Weeks elif time_group == 'weeks': fn_get_events = _weeks_events_fn date_range = num_results now = datetime.utcnow() - relativedelta(weeks=num_results-1) timedelta_inc = lambda w: relativedelta(weeks=w) # Months elif time_group == 'months': fn_get_events = _month_events_fn date_range = num_results now = datetime.utcnow() - relativedelta(months=num_results-1) now -= timedelta(days=now.day-1) timedelta_inc = lambda m: relativedelta(months=m) # Years elif time_group == 'years': fn_get_events = _year_events_fn num_results = 3 date_range = num_results now = datetime.utcnow() - relativedelta(years=num_results-1) timedelta_inc = lambda m: relativedelta(years=m) dates = [] for i in range(0, date_range): result = [now] # Total count day_events = fn_get_events(select1, now, system) total_day_count = len(day_events) result.append(total_day_count) # Daily count for d_delta in range(0, 13): if total_day_count == 0: result.append( '' ) continue delta_now = now + timedelta_inc(d_delta) delta2_events = fn_get_events(select2, delta_now, system) if not delta2_events.has_events_marked(): result.append('') continue delta2_set_op = BitOpAnd(system, day_events, delta2_events) if not select3: delta_count = len(delta2_set_op) delta2_set_op.delete() else: delta3_events = fn_get_events(select3, delta_now, system) if not delta3_events.has_events_marked(): result.append('') continue delta3_set_op = BitOpAnd(system, delta2_set_op, delta3_events) delta_count = len(delta3_set_op) delta3_set_op.delete() delta2_set_op.delete() # Append to result if delta_count == 0: result.append(float(0.0)) else: if as_precent: result.append( (float(delta_count) / float(total_day_count)) * 100 ) else: result.append( delta_count ) dates.append( result ) now = now + timedelta_inc(1) return dates
def get_dates_data(select1, select1b, select2, select2b, time_group='days', system='default', as_precent=1, num_results=25, num_of_rows=12): """ Fetch the data from bitmapist. :param :select1 First filter (could be `active`) :param :select1b Second filter (could be `country:US`, optional) :param :select2 Second filter (could be `song:played`) :param :select2b Second filter (could be `playlist:created`, optional) :param :time_group What is the data grouped by? Can be `days`, `weeks`, `months`, `years` :param :system What bitmapist should be used? :param :as_precent If `True` then percents as calculated and shown. Defaults to `True` :return A list of day data, formated like `[[datetime, count], ...]` """ num_results = int(num_results) num_of_rows = int(num_of_rows) # Days if time_group == 'days': fn_get_events = _day_events_fn date_range = num_results now = datetime.utcnow() - timedelta(days=num_results-1) timedelta_inc = lambda d: timedelta(days=d) # Weeks elif time_group == 'weeks': fn_get_events = _weeks_events_fn date_range = num_results now = datetime.utcnow() - relativedelta(weeks=num_results-1) timedelta_inc = lambda w: relativedelta(weeks=w) # Months elif time_group == 'months': fn_get_events = _month_events_fn date_range = num_results now = datetime.utcnow() - relativedelta(months=num_results-1) now -= timedelta(days=now.day-1) timedelta_inc = lambda m: relativedelta(months=m) # Years elif time_group == 'years': fn_get_events = _year_events_fn num_results = 3 date_range = num_results now = datetime.utcnow() - relativedelta(years=num_results-1) timedelta_inc = lambda m: relativedelta(years=m) dates = [] for i in range(0, date_range): result = [now] # events for select1 (+select1b) select1_events = fn_get_events(select1, now, system) if select1b: select1b_events = fn_get_events(select1b, now, system) select1_events = BitOpAnd(system, select1_events, select1b_events) select1_count = len(select1_events) result.append(select1_count) # Move in time for t_delta in range(0, num_of_rows+1): if select1_count == 0: result.append('') continue delta_now = now + timedelta_inc(t_delta) # events for select2 (+select2b) select2_events = fn_get_events(select2, delta_now, system) if select2b: select2b_events = fn_get_events(select2b, delta_now, system) select2_events = BitOpAnd(system, select2_events, select2b_events) if not select2_events.has_events_marked(): result.append('') continue both_events = BitOpAnd(system, select1_events, select2_events) both_count = len(both_events) # Append to result if both_count == 0: result.append(float(0.0)) else: if as_precent: result.append((float(both_count) / float(select1_count)) * 100) else: result.append(both_count) dates.append(result) now = now + timedelta_inc(1) # clean up results of BitOps delete_runtime_bitop_keys() return dates
def test_get_event_names_prefix(): event_names = {'foo', 'bar', 'baz', 'spam', 'egg'} for e in event_names: mark_event(e, 1) BitOpAnd(DayEvents('foo'), DayEvents('bar')) assert set(get_event_names(prefix='b', batch=2)) == {'bar', 'baz'}
def get_cohort(primary_event_name, secondary_event_name, additional_events=[], time_group='days', num_rows=10, num_cols=10, system='default', with_replacement=False): """ Get the cohort data for multiple chained events at multiple points in time. :param str primary_event_name: Name of primary event for defining cohort :param str secondary_event_name: Name of secondary event for defining cohort :param list additional_events: List of additional events by which to filter cohort (e.g., ``[{'name': 'user:logged_in', 'op': 'and'}]``) :param str time_group: Time scale by which to group results; can be `days`, `weeks`, `months`, `years` :param int num_rows: How many results rows to get; corresponds to how far back to get results from current time :param int num_cols: How many results cols to get; corresponds to how far forward to get results from each time point :param str system: Which bitmapist should be used :param bool with_replacement: Whether more than one occurence of an event should be counted for a given user; e.g., if a user logged in multiple times, whether to include subsequent logins for the cohort :returns: Tuple of (list of lists of cohort results, list of dates for cohort, primary event total for each date) """ cohort = [] dates = [] primary_event_totals = [] # for percents fn_get_events = _events_fn(time_group) # TIMES def increment_delta(t): return relativedelta(**{time_group: t}) now = datetime.utcnow() # - 1 for deltas between time points (?) event_time = now - relativedelta(**{time_group: num_rows - 1}) if time_group == 'months': event_time -= relativedelta(days=event_time.day - 1) # (?) # COHORT for i in range(num_rows): # get results for each date interval from current time point for the row row = [] primary_event = fn_get_events(primary_event_name, event_time, system) primary_total = len(primary_event) primary_event_totals.append(primary_total) dates.append(event_time) if not primary_total: row = [None] * num_cols else: for j in range(num_cols): # get results for each event chain for current incremented time incremented = event_time + increment_delta(j) if incremented > now: # date in future; no events and no need to go through chain combined_total = None else: chained_events = chain_events(secondary_event_name, additional_events, incremented, time_group, system) if chained_events: combined_events = BitOpAnd(chained_events, primary_event) combined_total = len(combined_events) if not with_replacement: primary_event = BitOpXor(primary_event, combined_events) else: combined_total = 0 row.append(combined_total) cohort.append(row) event_time += increment_delta(1) # Clean up results of BitOps delete_runtime_bitop_keys() return cohort, dates, primary_event_totals