Beispiel #1
0
def consumer_func(divisor):
    ''' Checks whether a number is divisible by the divisor without remainder '''
    redis = get_connection()
    timeout = 100
    retries = 0
    last_id = '$'

    while True:
        reply = redis.xread({KEY: last_id}, count=1, block=timeout)
        if not reply:
            if retries == 5:
                print(f'CONSUMER: Waited long enough for {divisor}s - bye bye...')
                break
            retries += 1
            timeout *= 2
            continue

        timeout = 100
        retries = 0

        # Process the messages
        for _, messages in reply:
            for message in messages:
                last_id = message[0]
                n = int(message[1]['n'])
                if n % divisor == 0:
                    print(f'CONSUMER: The number {n} can be divided by {divisor}')
Beispiel #2
0
def producer_func():
    ''' Natural Numbers Stream producer '''
    redis = get_connection()
    n = 0
    while True:
        data = {'n': n}
        _id = redis.xadd(KEY, data)
        print(f'PRODUCER: Produced the number {n}')
        n += 1
Beispiel #3
0
def averages_consumer_func():
    redis = get_connection()

    # Recover our last message ID context or default to 0
    last_message_id = "0"
    h = redis.hgetall(const.AVERAGES_CONSUMER_STATE_KEY)

    if h:
        last_message_id = h["last_message_id"]

    log(
        AVERAGES_CONSUMER_PREFIX,
        f"Starting averages consumer in stream {const.AVERAGES_STREAM_KEY} at message {last_message_id}."
    )

    while True:
        # Get the next message from the stream, if any
        streamDict = {}
        streamDict[const.AVERAGES_STREAM_KEY] = last_message_id

        response = redis.xread(streamDict, count=1, block=5000)

        if response:
            msg = get_message_from_response(response)

            # Get the ID of the message that was just read.
            msg_id = msg[0]

            # Get the average temperature value from the message.
            msg_average_temperature = msg[1]["average_temp_f"]

            # Get the date value from the message.
            msg_date = msg[1]["date"]

            # Get the hour value from the message.
            msg_hour = msg[1]["hour"]

            # Get the number of observations from the message.
            msg_num_observations = msg[1]["num_observations"]

            log(
                AVERAGES_CONSUMER_PREFIX,
                f"Average temperature for {msg_date} at {msg_hour} was {msg_average_temperature}F ({msg_num_observations} observations)."
            )

            # Update our last message for the next XREAD.
            last_message_id = msg_id

            # Store current state in Redis.
            redis.hmset(const.AVERAGES_CONSUMER_STATE_KEY,
                        {"last_message_id": last_message_id})
        else:
            log(
                AVERAGES_CONSUMER_PREFIX,
                f"Waiting for new messages in stream {const.AVERAGES_STREAM_KEY}"
            )
Beispiel #4
0
def producer_func():
    ''' Natural Numbers Stream producer '''
    redis = get_connection('PRODUCER')
    n = 0

    while True:
        data = {'n': n}
        _id = redis.xadd(KEY, data)
        n += 1
        sleep(random.random() / MEMBERS)
Beispiel #5
0
def data_for_item(item_id=None):
    connection = get_connection()
    with connection:
        jobs = jobs_producing(item_id, connection)
        for job in jobs:
            operations = fetch_job_operations(job["id"], connection)
            job["operations"] = operations
            state = json.loads(job.pop("state"))
            job["state"] = state[-1]["operation"]

    return jobs
Beispiel #6
0
def consumer_func(name):
    ''' An implementation of a group consumer '''
    redis = get_connection(name)
    timeout = 100
    retries = 0
    recovery = True
    from_id = '0'

    while True:
        count = random.randint(1, 5)
        reply = redis.xreadgroup(GROUP,
                                 name, {KEY: from_id},
                                 count=count,
                                 block=timeout)
        if not reply:
            if retries == 5:
                print(f'{name}: Waited long enough - bye bye...')
                break
            retries += 1
            timeout *= 2
            continue

        timeout = 100
        retries = 0

        if recovery:
            # Verify that there are messages to recover. The zeroth member of the
            # reply contains the following:
            #
            # At element 0: the name of the stream
            #
            # At element 1: a list of pending messages, if any.
            #
            # If there are messages, we recover them.
            #
            # Example contents for "reply":
            #
            # [['numbers', [('1557775037438-0', {'n': '8'})]]]
            if reply[0][1]:
                print(f'{name}: Recovering pending messages...')
            else:
                # If there are no messages to recover, switch to fetching new messages
                # and call xreadgroup again.
                recovery = False
                from_id = '>'
                continue

        # Process the messages
        for _, messages in reply:
            for message in messages:
                n = int(message[1]['n'])
                if prime(n):
                    print(f'{name}: {n} is a prime number')
                redis.xack(KEY, GROUP, message[0])
Beispiel #7
0
def main():
    current_stream_key = ""
    last_message_id = "0"
    current_hourly_total = 0
    current_hourly_count = 0

    # The aggregator consumer needs to know its initial start
    # point stream partition name for reading temperatures from.
    # Try to read a stream name from arguments, if not supplied
    # then we are resuming from a crash so should instead load
    # the saved state from Redis.
    if len(sys.argv) == 2:
        current_stream_key = sys.argv[1]

        # Do very basic validation that we might have been supplied
        # with a stream name from the command line.
        if not current_stream_key.startswith(const.STREAM_KEY_BASE):
            print("Invalid stream key supplied.")
            sys.exit(1)

        # When starting for first time, clear any prior saved state
        # and remove the temps:averages stream.
        reset_state()
    else:
        # Load aggregator consumer saved state from Redis.
        redis = get_connection()

        h = redis.hgetall(const.AGGREGATING_CONSUMER_STATE_KEY)

        if not h:
            print("No stream key and last message ID found in Redis.")
            print(
                "Start the consumer with stream key and last message ID parameters."
            )
            sys.exit(1)
        else:
            current_stream_key = h["current_stream_key"]
            last_message_id = h["last_message_id"]
            current_hourly_total = int(h["current_hourly_total"])
            current_hourly_count = int(h["current_hourly_count"])

    # Start the aggregating consumer process.
    aggregating_consumer = Process(target=aggregating_consumer_func,
                                   args=(current_stream_key, last_message_id,
                                         current_hourly_total,
                                         current_hourly_count))
    aggregating_consumer.start()

    # Start the averages consumer process which always loads its
    # own saved state from Redis.
    averages_consumer = Process(target=averages_consumer_func, args=())
    averages_consumer.start()
Beispiel #8
0
def reset_state():
    redis = get_connection()

    keys_to_delete = []

    # Delete the keys used by the consumers to hold state.
    keys_to_delete.append(const.AGGREGATING_CONSUMER_STATE_KEY)
    keys_to_delete.append(const.AVERAGES_CONSUMER_STATE_KEY)
    keys_to_delete.append(const.AVERAGES_STREAM_KEY)

    redis.delete(*keys_to_delete)
    print(
        f"Deleted {const.AVERAGES_STREAM_KEY} stream and consumer state keys.")
def reset_state():
    redis = get_connection()

    # Delete any old streams that have not yet expired.
    keys_to_delete = []
    stream_timestamp = TIMESTAMP_START

    print("Deleting old streams:")
    for day in range(DAYS_TO_GENERATE):
        stream_key_name = f"{const.STREAM_KEY_BASE}:{datetime.utcfromtimestamp(stream_timestamp).strftime('%Y%m%d')}"
        print(stream_key_name)
        keys_to_delete.append(stream_key_name)
        stream_timestamp += ONE_DAY_SECONDS

    redis.delete(*keys_to_delete)
Beispiel #10
0
def consumer_func(name):
    ''' An implementation of a group consumer '''
    redis = get_connection(name)
    timeout = 100
    retries = 0
    recovery = True
    from_id = '0'

    while True:
        count = random.randint(1, 5)
        reply = redis.xreadgroup(GROUP,
                                 name, {KEY: from_id},
                                 count=count,
                                 block=timeout)
        if not reply:
            if retries == 5:
                print(f'{name}: Waited long enough - bye bye...')
                break
            retries += 1
            timeout *= 2
            continue

        timeout = 100
        retries = 0

        if recovery:
            # Verify there are messages to recover
            for _, messages in reply:
                if messages:
                    print(f'{name}: Recovering pending messages...')
                    break

            # If there are no messages to recover, switch to fetching new messages
            recovery = False
            from_id = '>'
            continue

        # Process the messages
        for _, messages in reply:
            for message in messages:
                n = int(message[1]['n'])
                if prime(n):
                    print(f'{name}: {n} is a prime number')
                redis.xack(KEY, GROUP, message[0])
def main():
    reset_state()

    measurement = Measurement()
    previous_stream_key = ""
    current_timestamp = TIMESTAMP_START

    # End data production a configurable number of days after we began.
    end_timestamp = TIMESTAMP_START + (ONE_DAY_SECONDS * DAYS_TO_GENERATE)

    redis = get_connection()

    stream_key = ""

    while current_timestamp < end_timestamp:
        # Get the stream partition key name that this timestamp should
        # be written to.
        stream_key = get_stream_key_for_timestamp(current_timestamp)

        # Get a temperature reading.
        entry = measurement.get_next()

        # Publish to the current stream partition and set
        # or update expiry time on the stream partition.
        # This is done as a pipeline so that both commands are
        # executed with a single round trip to the Redis Server
        # for performance reasons.  An alternative strategy might
        # be to only update the expiry time every 100th message
        # or similar.
        # Pipeline: https://redis.io/topics/pipelining
        pipe = redis.pipeline()
        pipe.xadd(stream_key, entry, current_timestamp)
        pipe.expireat(stream_key, current_timestamp + PARTITION_EXPIRY_TIME)
        pipe.execute()

        # Have we started a new stream?
        if (stream_key != previous_stream_key):
            # A new day's stream started.
            print(f"Populating stream partition {stream_key}.")
            previous_stream_key = stream_key

        # Move on to the next timestamp value.
        current_timestamp += TEMPERATURE_READING_INTERVAL_SECONDS
'''
Sum the numbers in the Stream and block for new messages
'''

from util.connection import get_connection

if __name__ == '__main__':
    redis = get_connection()

    key = 'numbers'
    sum_key = f'{key}:blockingread_sum'
    timeout = 100
    retries = 0

    # Load the previous state or initialize it
    h = redis.hgetall(sum_key)
    if not h:
        # Assume first run and initialize when the Hash does not exist
        last_id = '0-0'
        n_sum = 0
    else:
        last_id = h['last_id']
        n_sum = int(h['n_sum'])

    while True:
        # Get the next message
        reply = redis.xread({key: last_id}, count=1, block=timeout)

        # An empty response means we've timed out
        if not reply:
            # Try an exponential backoff
Beispiel #13
0
def aggregating_consumer_func(current_stream_key, last_message_id,
                              current_hourly_total, current_hourly_count):
    log(
        AGGREGATING_CONSUMER_PREFIX,
        f"Starting aggregating consumer in stream {current_stream_key} at message {last_message_id}."
    )

    redis = get_connection()

    while True:
        # Get the next message from the stream, if any
        streamDict = {}
        streamDict[current_stream_key] = last_message_id

        response = redis.xread(streamDict, count=1, block=5000)

        if not response:
            # We either need to switch to another stream partition
            # or wait for more messages to appear on the one we are
            # on if no newer partitions exist.

            # Get the name of the next stream partition to process
            # (one day later than the partition currently being processed).
            new_stream_key = get_next_stream_partition_key_name(
                current_stream_key)

            # Does the next partition exist?  If so, read from it;
            # otherwise stick with this stream which will block as we
            # are at the latest partition now.
            if (redis.exists(new_stream_key) == 1):
                # We are still catching up and have not reached
                # the latest stream partition yet, so move on to
                # consuming the next partition.
                current_stream_key = new_stream_key

                log(AGGREGATING_CONSUMER_PREFIX,
                    f"Changing partition to consume stream: {new_stream_key}")
            else:
                # We are currently on the latest stream partition
                # and have caught up with the producer so should
                # block for a while then try reading it again.
                log(
                    AGGREGATING_CONSUMER_PREFIX,
                    f"Waiting for new messages in stream {current_stream_key}, or new stream partition."
                )
        else:
            # Read the response that we got from Redis
            msg = get_message_from_response(response)

            # Get the ID of the message that was just read.
            msg_id = msg[0]

            # Get the timestamp value from the message ID
            # (everything before the - in the ID).
            msg_timestamp = msg_id.split("-")[0]

            # Get the temperature value from the message.
            msg_temperature = msg[1]["temp_f"]

            # Get hour for this message
            msg_date = datetime.utcfromtimestamp(int(msg_timestamp))
            msg_hour = msg_date.hour

            # Get the hour for the last message
            last_message_hour = 0
            if "-" in last_message_id:
                last_message_timestamp = last_message_id.split("-")[0]
                last_message_date = datetime.utcfromtimestamp(
                    int(last_message_timestamp))
                last_message_hour = last_message_date.hour

            # Did we start a new hour?
            if last_message_hour != msg_hour:
                # Starting a new hour, push our result to the averages stream.
                formatted_date = last_message_date.strftime('%Y/%m/%d')

                # Publish result for this hour, trimming the stream each
                # time a new message is added.
                payload = {
                    "hour":
                    last_message_hour,
                    "date":
                    formatted_date,
                    "average_temp_f":
                    int(current_hourly_total / current_hourly_count),
                    "num_observations":
                    current_hourly_count
                }

                # Publish the hourly average value to the temps:averages stream
                # and trim the stream's length to around 120 entries.
                redis.xadd(const.AVERAGES_STREAM_KEY,
                           payload,
                           "*",
                           maxlen=120,
                           approximate=True)

                # Reset values and put the current message's temperature
                # into the new hour.
                current_hourly_total = int(msg_temperature)
                current_hourly_count = 1
            else:
                # Still working through current hour.
                current_hourly_total += int(msg_temperature)
                current_hourly_count += 1

            # Update the last ID we've seen.
            last_message_id = msg_id

            # Store current state in Redis in case we crash and
            # have to resume.  Here we are storing this every
            # time we read a message, depending on the nature of
            # your workload you may be able to update it less
            # frequently, for example after reading 100 messages.
            redis.hmset(
                const.AGGREGATING_CONSUMER_STATE_KEY, {
                    "current_stream_key": current_stream_key,
                    "last_message_id": last_message_id,
                    "current_hourly_total": current_hourly_total,
                    "current_hourly_count": current_hourly_count
                })
Beispiel #14
0
def setup():
    ''' Initializes the Stream and the primes consumers group '''
    redis = get_connection()
    redis.delete(KEY)
    redis.xgroup_create(KEY, GROUP, mkstream=True)
Beispiel #15
0
def setup():
    ''' Initializes the Stream '''
    redis = get_connection()
    redis.delete(KEY)