Exemple #1
0
    def digest(self,
               key: str,
               minimum_delay: Optional[int] = None,
               timestamp: Optional[float] = None) -> Any:
        if minimum_delay is None:
            minimum_delay = self.minimum_delay

        if timestamp is None:
            timestamp = time.time()

        connection = self._get_connection(key)
        with self._get_timeline_lock(key, duration=30).acquire():
            try:
                response = script(
                    connection,
                    [key],
                    [
                        "DIGEST_OPEN",
                        self.namespace,
                        self.ttl,
                        timestamp,
                        key,
                        self.capacity if self.capacity else -1,
                    ],
                )
            except ResponseError as e:
                if "err(invalid_state):" in str(e):
                    raise InvalidState(
                        "Timeline is not in the ready state.") from e
                else:
                    raise

            records = map(
                lambda key__value__timestamp: Record(
                    key__value__timestamp[0].decode("utf-8"),
                    self.codec.decode(key__value__timestamp[1])
                    if key__value__timestamp[1] is not None else None,
                    float(key__value__timestamp[2]),
                ),
                response,
            )

            # If the record value is `None`, this means the record data was
            # missing (it was presumably evicted by Redis) so we don't need to
            # return it here.
            yield [record for record in records if record.value is not None]

            script(
                connection,
                [key],
                [
                    "DIGEST_CLOSE", self.namespace, self.ttl, timestamp, key,
                    minimum_delay
                ] + [record.key for record in records],
            )
Exemple #2
0
    def digest(self, key, minimum_delay=None, timestamp=None):
        if minimum_delay is None:
            minimum_delay = self.minimum_delay

        if timestamp is None:
            timestamp = time.time()

        connection = self._get_connection(key)
        with self._get_timeline_lock(key, duration=30).acquire():
            try:
                response = script(connection, [key], [
                    'DIGEST_OPEN',
                    self.namespace,
                    self.ttl,
                    timestamp,
                    key,
                    self.capacity if self.capacity else -1,
                ])
            except ResponseError as e:
                if 'err(invalid_state):' in six.text_type(e):
                    six.raise_from(
                        InvalidState('Timeline is not in the ready state.'),
                        e,
                    )
                else:
                    raise

            records = map(
                lambda key__value__timestamp: Record(
                    key__value__timestamp[0],
                    self.codec.decode(key__value__timestamp[1])
                    if key__value__timestamp[1] is not None else None,
                    float(key__value__timestamp[2]),
                ),
                response,
            )

            # If the record value is `None`, this means the record data was
            # missing (it was presumably evicted by Redis) so we don't need to
            # return it here.
            yield filter(
                lambda record: record.value is not None,
                records,
            )

            script(
                connection,
                [key],
                [
                    'DIGEST_CLOSE', self.namespace, self.ttl, timestamp, key,
                    minimum_delay
                ] + [record.key for record in records],
            )
Exemple #3
0
    def digest(self, key, minimum_delay=None):
        if minimum_delay is None:
            minimum_delay = self.minimum_delay

        timeline_key = make_timeline_key(self.namespace, key)
        digest_key = make_digest_key(timeline_key)

        connection = self.cluster.get_local_client_for_key(timeline_key)

        with Lock(timeline_key, nowait=True, timeout=30):
            # Check to ensure the timeline is in the correct state ("ready")
            # before sending. This acts as a throttling mechanism to prevent
            # sending a digest before it's next scheduled delivery time in a
            # race condition scenario.
            if connection.zscore(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key) is None:
                raise InvalidState('Timeline is not in the ready state.')

            with connection.pipeline() as pipeline:
                pipeline.watch(digest_key)  # This shouldn't be necessary, but better safe than sorry?

                if pipeline.exists(digest_key):
                    pipeline.multi()
                    pipeline.zunionstore(digest_key, (timeline_key, digest_key), aggregate='max')
                    pipeline.delete(timeline_key)
                    pipeline.expire(digest_key, self.ttl)
                    pipeline.execute()
                else:
                    pipeline.multi()
                    pipeline.rename(timeline_key, digest_key)
                    pipeline.expire(digest_key, self.ttl)
                    try:
                        pipeline.execute()
                    except ResponseError as error:
                        if 'no such key' in str(error):
                            logger.debug('Could not move timeline for digestion (likely has no contents.)')
                        else:
                            raise

            # XXX: This must select all records, even though not all of them will
            # be returned if they exceed the capacity, to ensure that all records
            # will be garbage collected.
            records = connection.zrevrange(digest_key, 0, -1, withscores=True)
            if not records:
                logger.info('Retrieved timeline containing no records.')

            def get_records_for_digest():
                with connection.pipeline(transaction=False) as pipeline:
                    for record_key, timestamp in records:
                        pipeline.get(make_record_key(timeline_key, record_key))

                    for (record_key, timestamp), value in zip(records, pipeline.execute()):
                        # We have to handle failures if the key does not exist --
                        # this could happen due to evictions or race conditions
                        # where the record was added to a timeline while it was
                        # already being digested.
                        if value is None:
                            logger.warning('Could not retrieve event for timeline.')
                        else:
                            yield Record(record_key, self.codec.decode(value), timestamp)

            yield itertools.islice(get_records_for_digest(), self.capacity)

            def cleanup_records(pipeline):
                record_keys = [make_record_key(timeline_key, record_key) for record_key, score in records]
                pipeline.delete(digest_key, *record_keys)

            def reschedule():
                with connection.pipeline() as pipeline:
                    pipeline.watch(digest_key)  # This shouldn't be necessary, but better safe than sorry?
                    pipeline.multi()

                    cleanup_records(pipeline)
                    pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key)
                    pipeline.zadd(make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING), time.time() + minimum_delay, key)
                    pipeline.setex(make_last_processed_timestamp_key(timeline_key), self.ttl, int(time.time()))
                    pipeline.execute()

            def unschedule():
                with connection.pipeline() as pipeline:
                    # Watch the timeline to ensure that no other transactions add
                    # events to the timeline while we are trying to delete it.
                    pipeline.watch(timeline_key)
                    pipeline.multi()
                    if connection.zcard(timeline_key) == 0:
                        cleanup_records(pipeline)
                        pipeline.delete(make_last_processed_timestamp_key(timeline_key))
                        pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key)
                        pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING), key)
                        pipeline.execute()

            # If there were records in the digest, we need to schedule it so
            # that we schedule any records that were added during digestion. If
            # there were no items, we can try to remove the timeline from the
            # digestion schedule.
            if records:
                reschedule()
            else:
                try:
                    unschedule()
                except WatchError:
                    logger.debug('Could not remove timeline from schedule, rescheduling instead')
                    reschedule()