Ejemplo n.º 1
0
def test_bit_operations():
    delete_all_events()

    now = datetime.utcnow()
    last_month = datetime.utcnow() - timedelta(days=30)

    # 123 has been active for two months
    mark_event('active', 123, now=now)
    mark_event('active', 123, now=last_month)

    # 224 has only been active last_month
    mark_event('active', 224, now=last_month)

    # Assert basic premises
    assert MonthEvents('active', last_month.year, last_month.month).get_count() == 2
    assert MonthEvents('active', now.year, now.month).get_count() == 1

    # Try out with bit AND operation
    active_2_months = BitOpAnd(
        MonthEvents('active', last_month.year, last_month.month),
        MonthEvents('active', now.year, now.month)
    )
    assert active_2_months.get_count() == 1
    assert 123 in active_2_months
    assert 224 not in active_2_months

    # Try out with bit OR operation
    assert BitOpOr(
        MonthEvents('active', last_month.year, last_month.month),
        MonthEvents('active', now.year, now.month)
    ).get_count() == 2

    # Try out with a different system
    active_2_months = BitOpAnd(
        'default_copy',
        MonthEvents('active', last_month.year, last_month.month),
        MonthEvents('active', now.year, now.month),
    )
    assert active_2_months.get_count() == 1
    assert active_2_months.system == 'default_copy'

    # Try nested operations
    active_2_months = BitOpAnd(
        BitOpAnd(
            MonthEvents('active', last_month.year, last_month.month),
            MonthEvents('active', now.year, now.month)
        ),
        MonthEvents('active', now.year, now.month)
    )

    assert 123 in active_2_months
    assert 224 not in active_2_months
Ejemplo n.º 2
0
def chain_events(base_event_name, events_to_chain, now, time_group,
                 system='default'):
    """
    Chain additional events with a base set of events.

    Note: ``OR`` operators will apply only to their direct predecessors (i.e.,
    ``A && B && C || D`` will be handled as ``A && B && (C || D)``, and
    ``A && B || C && D`` will be handled as ``A && (B || C) && D``).

    :param str base_event_name: Name of event to chain additional events to/with
    :param list events_to_chain: List of additional event names to chain
                                 (e.g., ``[{'name': 'user:logged_in',
                                 'op': 'and'}]``)
    :param datetime now: Time point at which to get event data
    :param str time_group: Time scale by which to group results; can be `days`,
                           `weeks`, `months`, `years`
    :param str system: Which bitmapist should be used
    :returns: Bitmapist events collection
    """

    fn_get_events = _events_fn(time_group)
    base_event = fn_get_events(base_event_name, now, system)

    if not base_event.has_events_marked():
        return ''

    if events_to_chain:
        chain_events = []

        # for idx, event_to_chain in enumerate(events_to_chain):
        for event_to_chain in events_to_chain:
            event_name = event_to_chain.get('name')
            chain_event = fn_get_events(event_name, now, system)
            chain_events.append(chain_event)

        # Each OR should operate only on its immediate predecessor, e.g.,
        #     `A && B && C || D` should be handled as ~ `A && B && (C || D)`,
        #     and
        #     `A && B || C && D` should be handled as ~ `A && (B || C) && D`.
        op_or_indices = [idx for idx, e in enumerate(events_to_chain) if e['op'] == 'or']

        # Work backwards; least impact on operator combos + list indexing
        for idx in reversed(op_or_indices):
            # If first of events to chain, OR will just operate on base event
            if idx > 0:
                prev_event = chain_events[idx - 1]
                or_event = chain_events.pop(idx)

                # OR events should not be re-chained below
                events_to_chain.pop(idx)

                chain_events[idx - 1] = BitOpOr(prev_event, or_event)

        for idx, name_and_op in enumerate(events_to_chain):
            if name_and_op.get('op') == 'or':
                base_event = BitOpOr(base_event, chain_events[idx])
            else:
                base_event = BitOpAnd(base_event, chain_events[idx])

    return base_event
Ejemplo n.º 3
0
def test_bit_operations():
    delete_all_events()

    now = datetime.utcnow()
    last_month = datetime.utcnow() - timedelta(days=30)

    # 123 has been active for two months
    mark_event('active', 123, now=now)
    mark_event('active', 123, now=last_month)

    # 224 has only been active last_month
    mark_event('active', 224, now=last_month)

    # Assert basic premises
    assert MonthEvents('active', last_month.year, last_month.month).get_count() == 2
    assert MonthEvents('active', now.year, now.month).get_count() == 1

    # Try out with bit AND operation
    active_2_months = BitOpAnd(
        MonthEvents('active', last_month.year, last_month.month),
        MonthEvents('active', now.year, now.month)
    )
    assert active_2_months.get_count() == 1
    assert 123 in active_2_months
    assert 224 not in active_2_months

    # Try out with bit OR operation
    assert BitOpOr(
        MonthEvents('active', last_month.year, last_month.month),
        MonthEvents('active', now.year, now.month)
    ).get_count() == 2

    # Try out with a different system
    active_2_months = BitOpAnd(
        'default_copy',
        MonthEvents('active', last_month.year, last_month.month),
        MonthEvents('active', now.year, now.month),
    )
    assert active_2_months.get_count() == 1
    assert active_2_months.system == 'default_copy'

    # Try nested operations
    active_2_months = BitOpAnd(
        BitOpAnd(
            MonthEvents('active', last_month.year, last_month.month),
            MonthEvents('active', now.year, now.month)
        ),
        MonthEvents('active', now.year, now.month)
    )

    assert 123 in active_2_months
    assert 224 not in active_2_months
Ejemplo n.º 4
0
def test_bitop_key_sharing():
    today = datetime.utcnow()

    mark_event('task1', 111, now=today)
    mark_event('task2', 111, now=today)
    mark_event('task1', 222, now=today)
    mark_event('task2', 222, now=today)

    ev1_task1 = DayEvents('task1', today.year, today.month, today.day)
    ev1_task2 = DayEvents('task2', today.year, today.month, today.day)
    ev1_both = BitOpAnd(ev1_task1, ev1_task2)

    ev2_task1 = DayEvents('task1', today.year, today.month, today.day)
    ev2_task2 = DayEvents('task2', today.year, today.month, today.day)
    ev2_both = BitOpAnd(ev2_task1, ev2_task2)

    assert ev1_both.redis_key == ev2_both.redis_key
    assert len(ev1_both) == len(ev1_both) == 2
    ev1_both.delete()
    assert len(ev1_both) == len(ev1_both) == 0
Ejemplo n.º 5
0
def test_bitop_key_sharing():
    today = datetime.utcnow()

    mark_event('task1', 111, now=today)
    mark_event('task2', 111, now=today)
    mark_event('task1', 222, now=today)
    mark_event('task2', 222, now=today)

    ev1_task1 = DayEvents('task1', today.year, today.month, today.day)
    ev1_task2 = DayEvents('task2', today.year, today.month, today.day)
    ev1_both = BitOpAnd(ev1_task1, ev1_task2)

    ev2_task1 = DayEvents('task1', today.year, today.month, today.day)
    ev2_task2 = DayEvents('task2', today.year, today.month, today.day)
    ev2_both = BitOpAnd(ev2_task1, ev2_task2)

    assert ev1_both.redis_key == ev2_both.redis_key
    assert len(ev1_both) == len(ev1_both) == 2
    ev1_both.delete()
    assert len(ev1_both) == len(ev1_both) == 0
Ejemplo n.º 6
0
def test_bit_operations_complex():
    now = datetime.utcnow()
    tom = now + timedelta(days=1)

    mark_event('task1', 111, now=now)
    mark_event('task1', 111, now=tom)
    mark_event('task2', 111, now=now)
    mark_event('task2', 111, now=tom)
    mark_event('task1', 222, now=now)
    mark_event('task1', 222, now=tom)
    mark_event('task2', 222, now=now)
    mark_event('task2', 222, now=tom)

    now_events = BitOpAnd(DayEvents('task1', now.year, now.month, now.day),
                          DayEvents('task2', now.year, now.month, now.day))

    tom_events = BitOpAnd(DayEvents('task1', tom.year, tom.month, tom.day),
                          DayEvents('task2', tom.year, tom.month, tom.day))

    both_events = BitOpAnd(now_events, tom_events)

    assert len(now_events) == len(tom_events)
    assert len(now_events) == len(both_events)
Ejemplo n.º 7
0
def get_dates_data(select1, select2, select3,
                   time_group='days', system='default',
                   as_precent=1, num_results=25):
    """
    Fetch the data from bitmapist.

    :param :select1 First filter (could be `active`)
    :param :select2 Second filter (could be `song:played`)
    :param :select3 Second filter (could be `song:played`, optional)
    :param :time_group What is the data grouped by? Can be `days`, `weeks`, `months`, `years`
    :param :system What bitmapist should be used?
    :param :as_precent If `True` then percents as calculated and shown. Defaults to `True`
    :return A list of day data, formated like `[[datetime, count], ...]`
    """
    num_results = int(num_results)

    # Days
    if time_group == 'days':
        fn_get_events = _day_events_fn

        date_range = num_results
        now = datetime.utcnow() - timedelta(days=num_results-1)
        timedelta_inc = lambda d: timedelta(days=d)
    # Weeks
    elif time_group == 'weeks':
        fn_get_events = _weeks_events_fn

        date_range = num_results
        now = datetime.utcnow() - relativedelta(weeks=num_results-1)
        timedelta_inc = lambda w: relativedelta(weeks=w)
    # Months
    elif time_group == 'months':
        fn_get_events = _month_events_fn

        date_range = num_results
        now = datetime.utcnow() - relativedelta(months=num_results-1)
        now -= timedelta(days=now.day-1)
        timedelta_inc = lambda m: relativedelta(months=m)
    # Years
    elif time_group == 'years':
        fn_get_events = _year_events_fn

        num_results = 3

        date_range = num_results
        now = datetime.utcnow() - relativedelta(years=num_results-1)
        timedelta_inc = lambda m: relativedelta(years=m)

    dates = []

    for i in range(0, date_range):
        result = [now]

        # Total count
        day_events = fn_get_events(select1, now, system)

        total_day_count = len(day_events)
        result.append(total_day_count)

        # Daily count
        for d_delta in range(0, 13):
            if total_day_count == 0:
                result.append( '' )
                continue

            delta_now = now + timedelta_inc(d_delta)

            delta2_events = fn_get_events(select2, delta_now, system)

            if not delta2_events.has_events_marked():
                result.append('')
                continue

            delta2_set_op = BitOpAnd(system, day_events, delta2_events)

            if not select3:
                delta_count = len(delta2_set_op)
                delta2_set_op.delete()
            else:
                delta3_events = fn_get_events(select3, delta_now, system)

                if not delta3_events.has_events_marked():
                    result.append('')
                    continue

                delta3_set_op = BitOpAnd(system, delta2_set_op, delta3_events)
                delta_count = len(delta3_set_op)

                delta3_set_op.delete()
                delta2_set_op.delete()

            # Append to result
            if delta_count == 0:
                result.append(float(0.0))
            else:
                if as_precent:
                    result.append( (float(delta_count) / float(total_day_count)) * 100 )
                else:
                    result.append( delta_count )

        dates.append( result )

        now = now + timedelta_inc(1)

    return dates
Ejemplo n.º 8
0
def get_dates_data(select1, select1b, select2, select2b,
                   time_group='days', system='default',
                   as_precent=1, num_results=25, num_of_rows=12):
    """
    Fetch the data from bitmapist.

    :param :select1 First filter (could be `active`)
    :param :select1b Second filter (could be `country:US`, optional)
    :param :select2 Second filter (could be `song:played`)
    :param :select2b Second filter (could be `playlist:created`, optional)
    :param :time_group What is the data grouped by? Can be `days`, `weeks`, `months`, `years`
    :param :system What bitmapist should be used?
    :param :as_precent If `True` then percents as calculated and shown. Defaults to `True`
    :return A list of day data, formated like `[[datetime, count], ...]`
    """
    num_results = int(num_results)
    num_of_rows = int(num_of_rows)

    # Days
    if time_group == 'days':
        fn_get_events = _day_events_fn

        date_range = num_results
        now = datetime.utcnow() - timedelta(days=num_results-1)
        timedelta_inc = lambda d: timedelta(days=d)
    # Weeks
    elif time_group == 'weeks':
        fn_get_events = _weeks_events_fn

        date_range = num_results
        now = datetime.utcnow() - relativedelta(weeks=num_results-1)
        timedelta_inc = lambda w: relativedelta(weeks=w)
    # Months
    elif time_group == 'months':
        fn_get_events = _month_events_fn

        date_range = num_results
        now = datetime.utcnow() - relativedelta(months=num_results-1)
        now -= timedelta(days=now.day-1)
        timedelta_inc = lambda m: relativedelta(months=m)
    # Years
    elif time_group == 'years':
        fn_get_events = _year_events_fn

        num_results = 3

        date_range = num_results
        now = datetime.utcnow() - relativedelta(years=num_results-1)
        timedelta_inc = lambda m: relativedelta(years=m)

    dates = []

    for i in range(0, date_range):
        result = [now]

        # events for select1 (+select1b)
        select1_events = fn_get_events(select1, now, system)
        if select1b:
            select1b_events = fn_get_events(select1b, now, system)
            select1_events = BitOpAnd(system, select1_events, select1b_events)

        select1_count = len(select1_events)
        result.append(select1_count)

        # Move in time
        for t_delta in range(0, num_of_rows+1):
            if select1_count == 0:
                result.append('')
                continue

            delta_now = now + timedelta_inc(t_delta)

            # events for select2 (+select2b)
            select2_events = fn_get_events(select2, delta_now, system)
            if select2b:
                select2b_events = fn_get_events(select2b, delta_now, system)
                select2_events = BitOpAnd(system, select2_events, select2b_events)

            if not select2_events.has_events_marked():
                result.append('')
                continue

            both_events = BitOpAnd(system, select1_events, select2_events)
            both_count = len(both_events)

            # Append to result
            if both_count == 0:
                result.append(float(0.0))
            else:
                if as_precent:
                    result.append((float(both_count) / float(select1_count)) * 100)
                else:
                    result.append(both_count)

        dates.append(result)
        now = now + timedelta_inc(1)

    # clean up results of BitOps
    delete_runtime_bitop_keys()

    return dates
Ejemplo n.º 9
0
def test_get_event_names_prefix():
    event_names = {'foo', 'bar', 'baz', 'spam', 'egg'}
    for e in event_names:
        mark_event(e, 1)
    BitOpAnd(DayEvents('foo'), DayEvents('bar'))
    assert set(get_event_names(prefix='b', batch=2)) == {'bar', 'baz'}
Ejemplo n.º 10
0
def get_cohort(primary_event_name, secondary_event_name,
               additional_events=[], time_group='days',
               num_rows=10, num_cols=10, system='default',
               with_replacement=False):
    """
    Get the cohort data for multiple chained events at multiple points in time.

    :param str primary_event_name: Name of primary event for defining cohort
    :param str secondary_event_name: Name of secondary event for defining cohort
    :param list additional_events: List of additional events by which to filter
                                   cohort (e.g., ``[{'name': 'user:logged_in',
                                   'op': 'and'}]``)
    :param str time_group: Time scale by which to group results; can be `days`,
                           `weeks`, `months`, `years`
    :param int num_rows: How many results rows to get; corresponds to how far
                         back to get results from current time
    :param int num_cols: How many results cols to get; corresponds to how far
                         forward to get results from each time point
    :param str system: Which bitmapist should be used
    :param bool with_replacement: Whether more than one occurence of an event
                                  should be counted for a given user; e.g., if
                                  a user logged in multiple times, whether to
                                  include subsequent logins for the cohort
    :returns: Tuple of (list of lists of cohort results, list of dates for
              cohort, primary event total for each date)
    """

    cohort = []
    dates = []
    primary_event_totals = []  # for percents

    fn_get_events = _events_fn(time_group)

    # TIMES

    def increment_delta(t):
        return relativedelta(**{time_group: t})

    now = datetime.utcnow()
    # - 1 for deltas between time points (?)
    event_time = now - relativedelta(**{time_group: num_rows - 1})

    if time_group == 'months':
        event_time -= relativedelta(days=event_time.day - 1)  # (?)

    # COHORT

    for i in range(num_rows):
        # get results for each date interval from current time point for the row
        row = []
        primary_event = fn_get_events(primary_event_name, event_time, system)

        primary_total = len(primary_event)
        primary_event_totals.append(primary_total)

        dates.append(event_time)

        if not primary_total:
            row = [None] * num_cols
        else:
            for j in range(num_cols):
                # get results for each event chain for current incremented time
                incremented = event_time + increment_delta(j)

                if incremented > now:
                    # date in future; no events and no need to go through chain
                    combined_total = None

                else:
                    chained_events = chain_events(secondary_event_name,
                                                  additional_events,
                                                  incremented, time_group, system)

                    if chained_events:
                        combined_events = BitOpAnd(chained_events, primary_event)
                        combined_total = len(combined_events)

                        if not with_replacement:
                            primary_event = BitOpXor(primary_event, combined_events)

                    else:
                        combined_total = 0

                row.append(combined_total)

        cohort.append(row)
        event_time += increment_delta(1)

    # Clean up results of BitOps
    delete_runtime_bitop_keys()

    return cohort, dates, primary_event_totals