Esempio n. 1
0
    def get_range(self,
                  base_key: Key,
                  start_time: datetime,
                  end_time: datetime = None,
                  count: int = 0) -> List[Tuple[Key, Any]]:
        """
        Returns the list of items from the store based on the given time range or count.
        :param base_key: Items which don't start with the base_key are filtered out.
        :param start_time: Start time to for the range query
        :param end_time: End time of the range query. If None count is used.
        :param count: The number of items to be returned. Used if end_time is not specified.
        """
        if end_time and count:
            raise ValueError('Only one of `end` or `count` can be set')

        if count:
            end_time = datetime.min.replace(
                tzinfo=timezone.utc) if count < 0 else datetime.max.replace(
                    tzinfo=timezone.utc)

        end_time = self._add_timezone_if_required(end_time)
        start_time = self._add_timezone_if_required(start_time)

        if end_time < start_time:
            start_time, end_time = end_time, start_time

        if base_key.key_type == KeyType.TIMESTAMP:
            start_key = Key(KeyType.TIMESTAMP, base_key.identity,
                            base_key.group, [], start_time)
            end_key = Key(KeyType.TIMESTAMP, base_key.identity, base_key.group,
                          [], end_time)
            return self._get_range_timestamp_key(start_key, end_key, count)
        else:
            return self._get_range_dimension_key(base_key, start_time,
                                                 end_time, count)
Esempio n. 2
0
    def _prepare_window(self, start_time: datetime) -> None:
        """
        Prepares window if any is specified.
        :param start_time: The anchor block start_time from where the window
        should be generated.
        """
        # evaluate window first which sets the correct window in the store
        store = self._schema.schema_loader.get_store(
            self._schema.source.store_schema.fully_qualified_name)
        if Type.is_type_equal(self._schema.window_type,
                              Type.DAY) or Type.is_type_equal(
                                  self._schema.window_type, Type.HOUR):
            block_list = self._load_blocks(
                store.get_range(
                    Key(self._schema.source.key_type, self._identity,
                        self._schema.source.name), start_time,
                    self._get_end_time(start_time)))
        else:
            block_list = self._load_blocks(
                store.get_range(
                    Key(self._schema.source.key_type, self._identity,
                        self._schema.source.name), start_time, None,
                    self._schema.window_value))

        self._window_source = _WindowSource(block_list)
        self._validate_view()
Esempio n. 3
0
def test_save_simple(store: DynamoStore) -> None:
    store.save(Key(KeyType.DIMENSION, 'test_user', 'test_group'), {
        'string_field': 'string',
        'int_field': 1
    })
    assert store.get(Key(KeyType.DIMENSION, 'test_user', 'test_group')) == {
        'string_field': 'string',
        'int_field': 1
    }
Esempio n. 4
0
def test_set_simple(empty_memory_store) -> None:
    """
    Tests that the setter stores an item in memory that can be retrieved by the same key
    :return:
    """
    store = empty_memory_store
    store.save(Key(KeyType.DIMENSION, 'test_user', 'test_group'), 1)

    assert store.get(Key(KeyType.DIMENSION, 'test_user', 'test_group')) == 1
Esempio n. 5
0
def test_equals_dimension_key():
    assert Key(KeyType.DIMENSION, 'a', 'b') == Key(KeyType.DIMENSION, 'a', 'b')
    assert Key(KeyType.DIMENSION, 'a', 'b') != Key(KeyType.DIMENSION, 'a', 'c')
    assert Key(KeyType.DIMENSION, 'a', 'b') != Key(KeyType.DIMENSION, 'a', 'b',
                                                   ['c'])
    assert Key(KeyType.DIMENSION, 'a', 'b',
               ['c']) == Key(KeyType.DIMENSION, 'a', 'b', ['c'])
    assert Key(KeyType.DIMENSION, 'a', 'b', ['c']) != Key(
        KeyType.DIMENSION, 'a', 'b', ['d'])
Esempio n. 6
0
def test_get_range_no_items_on_boundary(loaded_store: DynamoStore) -> None:
    items = loaded_store.get_range(
        Key('test_user', 'test_group',
            datetime(2018, 1, 1, 2, 1, 1, 0, tzinfo=timezone.utc)),
        Key('test_user', 'test_group',
            datetime(2018, 1, 1, 6, 1, 1, 2, tzinfo=timezone.utc)))

    assert len(items) == 5
    assert items[0][1]['int_field'] == 2
    assert items[-1][1]['int_field'] == 6
Esempio n. 7
0
def test_save_time(store: DynamoStore) -> None:
    start_time = datetime(2018, 1, 1, 1, 1, 1, 1, tzinfo=timezone.utc)
    store.save(Key('test_user', 'test_group', start_time), {
        'string_field': 'string2',
        'int_field': 2
    })
    assert store.get(Key('test_user', 'test_group', start_time)) == {
        'string_field': 'string2',
        'int_field': 2
    }
Esempio n. 8
0
    def _prepare_key(self, timestamp: datetime = None):
        """ Generates the Key object based on dimension fields. """
        if self._dimension_fields:
            return Key(
                self._identity, self._name + '.' + (':').join([
                    str(item.value)
                    for item in self._dimension_fields.values()
                ]), timestamp)

        return Key(self._identity, self._name, timestamp)
Esempio n. 9
0
def test_set_get_date(empty_memory_store) -> None:
    """
    Tests that the timestamp is used as part of the key object
    """
    store = empty_memory_store
    now = datetime.utcnow()
    key = Key('user1', 'session', now)
    store.save(key, 'test')

    assert store.get(Key('user1', 'session', now)) == 'test'
Esempio n. 10
0
def test_sort_prefix_key():
    assert Key(KeyType.DIMENSION, 'user1',
               'group1').sort_prefix_key == 'group1/'
    assert Key(KeyType.DIMENSION, 'user1', 'group1',
               ['a', 'b']).sort_prefix_key == 'group1/a:b'
    assert Key(KeyType.TIMESTAMP, 'user1',
               'group1').sort_prefix_key == 'group1//'
    assert Key(
        KeyType.TIMESTAMP, 'user1', 'group1', [],
        datetime(2018, 3, 7, 22, 35,
                 31)).sort_prefix_key == 'group1//2018-03-07T22:35:31+00:00'
Esempio n. 11
0
def test_key_type_and_args_error():
    with pytest.raises(
            ValueError,
            match='`timestamp` should not be set for KeyType.DIMENSION.'):
        Key(KeyType.DIMENSION, 'id', 'group', [],
            datetime(2018, 3, 7, 22, 35, 31))

    with pytest.raises(
            ValueError,
            match='`dimensions` should not be set for KeyType.TIMESTAMP.'):
        Key(KeyType.TIMESTAMP, 'id', 'group', ['dim1'], None)
Esempio n. 12
0
def test_less_than_dimension_key():
    assert (Key(KeyType.DIMENSION, 'a', 'b') < Key(KeyType.DIMENSION, 'a',
                                                   'b')) is False
    assert (Key(KeyType.DIMENSION, 'a', 'b') < Key(KeyType.DIMENSION, 'a',
                                                   'c')) is False
    assert (Key(KeyType.DIMENSION, 'a', 'b') < Key(KeyType.DIMENSION, 'a', 'b',
                                                   ['c'])) is True
    assert (Key(KeyType.DIMENSION, 'a', 'b', ['c']) < Key(
        KeyType.DIMENSION, 'a', 'b', ['c'])) is False
    assert (Key(KeyType.DIMENSION, 'a', 'b', ['c']) < Key(
        KeyType.DIMENSION, 'a', 'b', ['d'])) is True
def test_aggregate_final_state(
        activity_aggregate_schema: ActivityAggregateSchema,
        activity_events: List[Record]) -> None:
    # Initialize the starting state
    identity = 'user1'
    evaluation_context = EvaluationContext()
    evaluation_context.global_add('identity', identity)
    activity_aggregate = ActivityAggregate(activity_aggregate_schema, identity,
                                           evaluation_context)
    evaluation_context.global_add(activity_aggregate._schema.name,
                                  activity_aggregate)

    for record in activity_events:
        evaluate_event(record, activity_aggregate)

    activity_aggregate.run_finalize()

    store_state = activity_aggregate._store.get_all(identity)
    assert len(store_state) == 3
    assert store_state.get(
        Key('user1', 'activity_aggr',
            datetime(2018, 1, 1, 1, 1, 1, 0, timezone.utc))) == {
                '_identity': 'user1',
                '_start_time': datetime(2018, 1, 1, 1, 1, 1, 0,
                                        timezone.utc).isoformat(),
                '_end_time': datetime(2018, 1, 1, 1, 2, 1, 0,
                                      timezone.utc).isoformat(),
                'sum': 111,
                'count': 3
            }

    assert store_state.get(
        Key('user1', 'activity_aggr',
            datetime(2018, 1, 1, 3, 1, 1, 0, timezone.utc))) == {
                '_identity': 'user1',
                '_start_time': datetime(2018, 1, 1, 3, 1, 1, 0,
                                        timezone.utc).isoformat(),
                '_end_time': datetime(2018, 1, 1, 3, 1, 1, 0,
                                      timezone.utc).isoformat(),
                'sum': 1000,
                'count': 1
            }

    assert store_state.get(
        Key('user1', 'activity_aggr',
            datetime(2018, 1, 2, 1, 1, 1, 0, timezone.utc))) == {
                '_identity': 'user1',
                '_start_time': datetime(2018, 1, 2, 1, 1, 1, 0,
                                        timezone.utc).isoformat(),
                '_end_time': datetime(2018, 1, 2, 1, 1, 1, 0,
                                      timezone.utc).isoformat(),
                'sum': 10000,
                'count': 1
            }
Esempio n. 14
0
def test_greater_than_dimension_key():
    assert (Key(KeyType.DIMENSION, 'a', 'b') > Key(KeyType.DIMENSION, 'a',
                                                   'b')) is False
    assert (Key(KeyType.DIMENSION, 'a', 'b') > Key(KeyType.DIMENSION, 'a',
                                                   'c')) is False
    assert (Key(KeyType.DIMENSION, 'a', 'b') > Key(KeyType.DIMENSION, 'a', 'b',
                                                   ['c'])) is False
    assert (Key(KeyType.DIMENSION, 'a', 'b', ['c']) > Key(
        KeyType.DIMENSION, 'a', 'b', ['c'])) is False
    assert (Key(KeyType.DIMENSION, 'a', 'b', ['d']) > Key(
        KeyType.DIMENSION, 'a', 'b', ['c'])) is True
Esempio n. 15
0
def test_get_range_start_end(memory_store: MemoryStore) -> None:
    """
    Tests that the range get does not include the blocks that lie on the boundary
    """
    start = Key('user1', 'session',
                datetime(2018, 3, 7, 19, 35, 31, 0, timezone.utc))
    end = Key('user1', 'session',
              datetime(2018, 3, 7, 22, 38, 31, 0, timezone.utc))
    blocks = memory_store.get_range(start, end)
    assert len(blocks) == 2
    assert blocks[0][1]['_start_time'] == datetime(2018, 3, 7, 20, 35, 35, 0,
                                                   timezone.utc).isoformat()
Esempio n. 16
0
def test_get(memory_store: MemoryStore) -> None:
    key = Key(KeyType.DIMENSION, 'user1', 'state')
    assert memory_store.get(key) == {
        'variable_1': 1,
        'variable_a': 'a',
        'variable_true': True
    }

    date = datetime(2018, 3, 7, 19, 35, 31, 0, timezone.utc)
    key = Key(KeyType.TIMESTAMP, 'user1', 'session', [], date)
    assert memory_store.get(key) == {
        'events': 1,
        '_start_time': date.isoformat()
    }
Esempio n. 17
0
def test_two_key_fields_in_aggregate(
        identity_aggregate_schema_spec_with_two_key_fields: Dict[str, Any],
        store_spec: Dict[str, Any], records: List[Record]):
    schema = identity_aggregate_schema(
        identity_aggregate_schema_spec_with_two_key_fields, store_spec)
    # Initialize the starting state
    identity = 'user1'
    evaluation_context = EvaluationContext()
    evaluation_context.global_add('identity', identity)
    identity_aggregate = IdentityAggregate(schema, identity,
                                           evaluation_context)
    evaluation_context.global_add(identity_aggregate._schema.name,
                                  identity_aggregate)

    # Evaluate all the events
    for event in records:
        evaluate_event(event, identity_aggregate)

    identity_aggregate.run_finalize()

    store_state = identity_aggregate._store.get_all('user1')
    assert len(store_state) == 3

    assert store_state.get(
        Key(KeyType.DIMENSION, 'user1', 'label_aggr', ['a', '97'])) == {
            '_identity': 'user1',
            'label': 'a',
            'label_ascii': 97,
            'sum': 110,
            'count': 2
        }

    assert store_state.get(
        Key(KeyType.DIMENSION, 'user1', 'label_aggr', ['b', '98'])) == {
            '_identity': 'user1',
            'label': 'b',
            'label_ascii': 98,
            'sum': 1,
            'count': 1
        }

    assert store_state.get(
        Key(KeyType.DIMENSION, 'user1', 'label_aggr', ['c', '99'])) == {
            '_identity': 'user1',
            'label': 'c',
            'label_ascii': 99,
            'sum': 11000,
            'count': 2
        }
Esempio n. 18
0
def test_split_when_label_evaluates_to_none(
        identity_aggregate_schema_spec: Dict[str, Any],
        store_spec: Dict[str, Any], records: List[Record]):
    identity_aggregate_schema_spec['Dimensions'][0][
        'Value'] = '1/0 if source.label == \'a\' else source.label'
    schema = identity_aggregate_schema(identity_aggregate_schema_spec,
                                       store_spec)
    # Initialize the starting state
    identity = 'user1'
    evaluation_context = EvaluationContext()
    evaluation_context.global_add('identity', identity)
    identity_aggregate = IdentityAggregate(schema, identity,
                                           evaluation_context)
    evaluation_context.global_add(identity_aggregate._schema.name,
                                  identity_aggregate)

    # Check for error states
    evaluate_event(records[0], identity_aggregate)
    evaluate_event(records[1], identity_aggregate)
    evaluate_event(records[2], identity_aggregate)
    assert identity_aggregate._dimension_fields['label'].value == 'b'

    identity_aggregate.run_finalize()

    store_state = identity_aggregate._store.get_all(identity)
    assert len(store_state) == 1

    assert store_state.get(Key('user1', 'label_aggr.b')) == {
        '_identity': 'user1',
        'label': 'b',
        'sum': 1,
        'count': 1
    }
Esempio n. 19
0
def test_get_range_start_end_time_no_dimensions_match(
        memory_store: MemoryStore) -> None:
    key = Key(KeyType.DIMENSION, 'user1', 'session_dim', ['dimC'])
    blocks = memory_store.get_range(
        key, datetime(2018, 3, 7, 19, 35, 31, 0, timezone.utc),
        datetime(2018, 3, 7, 22, 38, 31, 0, timezone.utc))
    assert len(blocks) == 0
Esempio n. 20
0
def test_no_variable_aggreate_data_stored():
    runner, data = execute_runner('tests/data/stream.yml', None,
                                  ['tests/data/raw.json'])
    block_data = {k: v for (k, v) in data.collect()}

    # Variables should not be stored
    assert Key('userA', 'vars') not in block_data
Esempio n. 21
0
def test_block_aggregate_schema_evaluate_without_split(
        block_aggregate_schema_spec, schema_loader):
    name = schema_loader.add_schema_spec(block_aggregate_schema_spec)
    block_aggregate_schema = BlockAggregateSchema(name, schema_loader)

    identity = 'userA'
    time = datetime(2018, 3, 7, 19, 35, 31, 0, timezone.utc)
    block_aggregate = create_block_aggregate(block_aggregate_schema, time,
                                             identity)
    block_aggregate.run_evaluate()

    # Check eval results of various fields
    assert len(block_aggregate._nested_items) == 4
    assert check_fields(
        block_aggregate._nested_items, {
            '_identity': identity,
            'event_count': 1,
            '_start_time': time,
            '_end_time': time
        })

    # aggregate snapshot should not exist in store
    assert block_aggregate._store.get(
        Key(identity=block_aggregate._identity,
            group=block_aggregate._name,
            timestamp=block_aggregate._start_time)) is None
Esempio n. 22
0
 def _persist(self, timestamp=None) -> None:
     """
     Persists the current data group
     :param timestamp: Optional timestamp to include in the Key construction
     """
     if self._store:
         self._store.save(Key(self._identity, self._name, timestamp), self._snapshot)
Esempio n. 23
0
def test_streaming_transformer_finalize(schema_loader: SchemaLoader,
                                        schema_spec: Dict[str, Any]) -> None:
    streaming_bts = schema_loader.add_schema_spec(schema_spec)
    transformer_schema = StreamingTransformerSchema(streaming_bts, schema_loader)
    transformer = StreamingTransformer(transformer_schema, 'user1')
    store = schema_loader.get_store('test.memstore')

    transformer.run_finalize()
    assert store.get(Key(KeyType.DIMENSION, 'user1', 'test_group')) is None

    transformer.run_evaluate(Record())
    transformer.run_finalize()
    assert store.get(Key(KeyType.DIMENSION, 'user1', 'test_group')) == {
        '_identity': 'user1',
        'events': 1
    }
Esempio n. 24
0
def test_get_range_count_backward(loaded_store: DynamoStore) -> None:
    items = loaded_store.get_range(
        Key('test_user', 'test_group',
            datetime(2018, 1, 1, 6, 1, 1, 1, tzinfo=timezone.utc)), None, -3)

    assert len(items) == 3
    assert items[0][1]['int_field'] == 5
    assert items[-1][1]['int_field'] == 3
Esempio n. 25
0
def test_get_range_count_positive_partial_dimensions_match(
        loaded_store: DynamoStore) -> None:
    key = Key(KeyType.DIMENSION, 'user1', 'session_dim', ['dimA'])
    blocks = loaded_store.get_range(
        key, datetime(2018, 3, 7, 19, 35, 31, 0, timezone.utc), None, 2)
    assert len(blocks) == 2
    assert blocks[0][1]['_start_time'] == datetime(2018, 3, 7, 21, 36, 31, 0,
                                                   timezone.utc).isoformat()
Esempio n. 26
0
def test_get_range_count_negative_partial_dimensions_match(
        memory_store: MemoryStore) -> None:
    key = Key(KeyType.DIMENSION, 'user1', 'session_dim', ['dimA'])
    blocks = memory_store.get_range(
        key, datetime(2018, 3, 7, 22, 38, 31, 0, timezone.utc), None, -2)
    assert len(blocks) == 2
    assert blocks[0][1]['_start_time'] == datetime(2018, 3, 7, 19, 35, 31, 0,
                                                   timezone.utc).isoformat()
Esempio n. 27
0
def stream_transformer(schema_loader, stream_schema_spec):
    stream_bts_name = schema_loader.add_schema_spec(stream_schema_spec)
    stream_transformer = StreamingTransformer(
        schema_loader.get_schema_object(stream_bts_name), 'user1')
    stream_transformer.run_restore(
        {Key(KeyType.DIMENSION, 'user1', 'state'): {
             'country': 'US'
         }})
    return stream_transformer
Esempio n. 28
0
def test_aggregate_finalize(aggregate_schema_with_store):
    aggregate = MockAggregate(schema=aggregate_schema_with_store,
                              identity="12345",
                              evaluation_context=EvaluationContext())
    aggregate.run_finalize()
    snapshot_aggregate = aggregate._store.get(
        Key(identity="12345", group="user"))
    assert snapshot_aggregate is not None
    assert snapshot_aggregate == aggregate._snapshot
Esempio n. 29
0
def test_get_range_count_negative_from_first_element(
        memory_store: MemoryStore, key_type_and_group) -> None:
    """
    Tests that the range get does not include the blocks that lie on the boundary
    """
    key = Key(key_type_and_group[0], 'user1', key_type_and_group[1])
    blocks = memory_store.get_range(
        key, datetime(2018, 3, 7, 19, 35, 31, 0, timezone.utc), None, -2)
    assert len(blocks) == 0
Esempio n. 30
0
def test_no_variable_aggreate_data_stored():
    runner, data = execute_runner('tests/data/stream.yml', None,
                                  ['tests/data/raw.json'])
    block_data = {}
    for id, (per_id_block_data, _) in data.collect():
        block_data[id] = per_id_block_data

    # Variables should not be stored
    assert Key(KeyType.DIMENSION, 'userA', 'vars') not in block_data['userA']