def consumer_func(divisor): ''' Checks whether a number is divisible by the divisor without remainder ''' redis = get_connection() timeout = 100 retries = 0 last_id = '$' while True: reply = redis.xread({KEY: last_id}, count=1, block=timeout) if not reply: if retries == 5: print(f'CONSUMER: Waited long enough for {divisor}s - bye bye...') break retries += 1 timeout *= 2 continue timeout = 100 retries = 0 # Process the messages for _, messages in reply: for message in messages: last_id = message[0] n = int(message[1]['n']) if n % divisor == 0: print(f'CONSUMER: The number {n} can be divided by {divisor}')
def producer_func(): ''' Natural Numbers Stream producer ''' redis = get_connection() n = 0 while True: data = {'n': n} _id = redis.xadd(KEY, data) print(f'PRODUCER: Produced the number {n}') n += 1
def averages_consumer_func(): redis = get_connection() # Recover our last message ID context or default to 0 last_message_id = "0" h = redis.hgetall(const.AVERAGES_CONSUMER_STATE_KEY) if h: last_message_id = h["last_message_id"] log( AVERAGES_CONSUMER_PREFIX, f"Starting averages consumer in stream {const.AVERAGES_STREAM_KEY} at message {last_message_id}." ) while True: # Get the next message from the stream, if any streamDict = {} streamDict[const.AVERAGES_STREAM_KEY] = last_message_id response = redis.xread(streamDict, count=1, block=5000) if response: msg = get_message_from_response(response) # Get the ID of the message that was just read. msg_id = msg[0] # Get the average temperature value from the message. msg_average_temperature = msg[1]["average_temp_f"] # Get the date value from the message. msg_date = msg[1]["date"] # Get the hour value from the message. msg_hour = msg[1]["hour"] # Get the number of observations from the message. msg_num_observations = msg[1]["num_observations"] log( AVERAGES_CONSUMER_PREFIX, f"Average temperature for {msg_date} at {msg_hour} was {msg_average_temperature}F ({msg_num_observations} observations)." ) # Update our last message for the next XREAD. last_message_id = msg_id # Store current state in Redis. redis.hmset(const.AVERAGES_CONSUMER_STATE_KEY, {"last_message_id": last_message_id}) else: log( AVERAGES_CONSUMER_PREFIX, f"Waiting for new messages in stream {const.AVERAGES_STREAM_KEY}" )
def producer_func(): ''' Natural Numbers Stream producer ''' redis = get_connection('PRODUCER') n = 0 while True: data = {'n': n} _id = redis.xadd(KEY, data) n += 1 sleep(random.random() / MEMBERS)
def data_for_item(item_id=None): connection = get_connection() with connection: jobs = jobs_producing(item_id, connection) for job in jobs: operations = fetch_job_operations(job["id"], connection) job["operations"] = operations state = json.loads(job.pop("state")) job["state"] = state[-1]["operation"] return jobs
def consumer_func(name): ''' An implementation of a group consumer ''' redis = get_connection(name) timeout = 100 retries = 0 recovery = True from_id = '0' while True: count = random.randint(1, 5) reply = redis.xreadgroup(GROUP, name, {KEY: from_id}, count=count, block=timeout) if not reply: if retries == 5: print(f'{name}: Waited long enough - bye bye...') break retries += 1 timeout *= 2 continue timeout = 100 retries = 0 if recovery: # Verify that there are messages to recover. The zeroth member of the # reply contains the following: # # At element 0: the name of the stream # # At element 1: a list of pending messages, if any. # # If there are messages, we recover them. # # Example contents for "reply": # # [['numbers', [('1557775037438-0', {'n': '8'})]]] if reply[0][1]: print(f'{name}: Recovering pending messages...') else: # If there are no messages to recover, switch to fetching new messages # and call xreadgroup again. recovery = False from_id = '>' continue # Process the messages for _, messages in reply: for message in messages: n = int(message[1]['n']) if prime(n): print(f'{name}: {n} is a prime number') redis.xack(KEY, GROUP, message[0])
def main(): current_stream_key = "" last_message_id = "0" current_hourly_total = 0 current_hourly_count = 0 # The aggregator consumer needs to know its initial start # point stream partition name for reading temperatures from. # Try to read a stream name from arguments, if not supplied # then we are resuming from a crash so should instead load # the saved state from Redis. if len(sys.argv) == 2: current_stream_key = sys.argv[1] # Do very basic validation that we might have been supplied # with a stream name from the command line. if not current_stream_key.startswith(const.STREAM_KEY_BASE): print("Invalid stream key supplied.") sys.exit(1) # When starting for first time, clear any prior saved state # and remove the temps:averages stream. reset_state() else: # Load aggregator consumer saved state from Redis. redis = get_connection() h = redis.hgetall(const.AGGREGATING_CONSUMER_STATE_KEY) if not h: print("No stream key and last message ID found in Redis.") print( "Start the consumer with stream key and last message ID parameters." ) sys.exit(1) else: current_stream_key = h["current_stream_key"] last_message_id = h["last_message_id"] current_hourly_total = int(h["current_hourly_total"]) current_hourly_count = int(h["current_hourly_count"]) # Start the aggregating consumer process. aggregating_consumer = Process(target=aggregating_consumer_func, args=(current_stream_key, last_message_id, current_hourly_total, current_hourly_count)) aggregating_consumer.start() # Start the averages consumer process which always loads its # own saved state from Redis. averages_consumer = Process(target=averages_consumer_func, args=()) averages_consumer.start()
def reset_state(): redis = get_connection() keys_to_delete = [] # Delete the keys used by the consumers to hold state. keys_to_delete.append(const.AGGREGATING_CONSUMER_STATE_KEY) keys_to_delete.append(const.AVERAGES_CONSUMER_STATE_KEY) keys_to_delete.append(const.AVERAGES_STREAM_KEY) redis.delete(*keys_to_delete) print( f"Deleted {const.AVERAGES_STREAM_KEY} stream and consumer state keys.")
def reset_state(): redis = get_connection() # Delete any old streams that have not yet expired. keys_to_delete = [] stream_timestamp = TIMESTAMP_START print("Deleting old streams:") for day in range(DAYS_TO_GENERATE): stream_key_name = f"{const.STREAM_KEY_BASE}:{datetime.utcfromtimestamp(stream_timestamp).strftime('%Y%m%d')}" print(stream_key_name) keys_to_delete.append(stream_key_name) stream_timestamp += ONE_DAY_SECONDS redis.delete(*keys_to_delete)
def consumer_func(name): ''' An implementation of a group consumer ''' redis = get_connection(name) timeout = 100 retries = 0 recovery = True from_id = '0' while True: count = random.randint(1, 5) reply = redis.xreadgroup(GROUP, name, {KEY: from_id}, count=count, block=timeout) if not reply: if retries == 5: print(f'{name}: Waited long enough - bye bye...') break retries += 1 timeout *= 2 continue timeout = 100 retries = 0 if recovery: # Verify there are messages to recover for _, messages in reply: if messages: print(f'{name}: Recovering pending messages...') break # If there are no messages to recover, switch to fetching new messages recovery = False from_id = '>' continue # Process the messages for _, messages in reply: for message in messages: n = int(message[1]['n']) if prime(n): print(f'{name}: {n} is a prime number') redis.xack(KEY, GROUP, message[0])
def main(): reset_state() measurement = Measurement() previous_stream_key = "" current_timestamp = TIMESTAMP_START # End data production a configurable number of days after we began. end_timestamp = TIMESTAMP_START + (ONE_DAY_SECONDS * DAYS_TO_GENERATE) redis = get_connection() stream_key = "" while current_timestamp < end_timestamp: # Get the stream partition key name that this timestamp should # be written to. stream_key = get_stream_key_for_timestamp(current_timestamp) # Get a temperature reading. entry = measurement.get_next() # Publish to the current stream partition and set # or update expiry time on the stream partition. # This is done as a pipeline so that both commands are # executed with a single round trip to the Redis Server # for performance reasons. An alternative strategy might # be to only update the expiry time every 100th message # or similar. # Pipeline: https://redis.io/topics/pipelining pipe = redis.pipeline() pipe.xadd(stream_key, entry, current_timestamp) pipe.expireat(stream_key, current_timestamp + PARTITION_EXPIRY_TIME) pipe.execute() # Have we started a new stream? if (stream_key != previous_stream_key): # A new day's stream started. print(f"Populating stream partition {stream_key}.") previous_stream_key = stream_key # Move on to the next timestamp value. current_timestamp += TEMPERATURE_READING_INTERVAL_SECONDS
''' Sum the numbers in the Stream and block for new messages ''' from util.connection import get_connection if __name__ == '__main__': redis = get_connection() key = 'numbers' sum_key = f'{key}:blockingread_sum' timeout = 100 retries = 0 # Load the previous state or initialize it h = redis.hgetall(sum_key) if not h: # Assume first run and initialize when the Hash does not exist last_id = '0-0' n_sum = 0 else: last_id = h['last_id'] n_sum = int(h['n_sum']) while True: # Get the next message reply = redis.xread({key: last_id}, count=1, block=timeout) # An empty response means we've timed out if not reply: # Try an exponential backoff
def aggregating_consumer_func(current_stream_key, last_message_id, current_hourly_total, current_hourly_count): log( AGGREGATING_CONSUMER_PREFIX, f"Starting aggregating consumer in stream {current_stream_key} at message {last_message_id}." ) redis = get_connection() while True: # Get the next message from the stream, if any streamDict = {} streamDict[current_stream_key] = last_message_id response = redis.xread(streamDict, count=1, block=5000) if not response: # We either need to switch to another stream partition # or wait for more messages to appear on the one we are # on if no newer partitions exist. # Get the name of the next stream partition to process # (one day later than the partition currently being processed). new_stream_key = get_next_stream_partition_key_name( current_stream_key) # Does the next partition exist? If so, read from it; # otherwise stick with this stream which will block as we # are at the latest partition now. if (redis.exists(new_stream_key) == 1): # We are still catching up and have not reached # the latest stream partition yet, so move on to # consuming the next partition. current_stream_key = new_stream_key log(AGGREGATING_CONSUMER_PREFIX, f"Changing partition to consume stream: {new_stream_key}") else: # We are currently on the latest stream partition # and have caught up with the producer so should # block for a while then try reading it again. log( AGGREGATING_CONSUMER_PREFIX, f"Waiting for new messages in stream {current_stream_key}, or new stream partition." ) else: # Read the response that we got from Redis msg = get_message_from_response(response) # Get the ID of the message that was just read. msg_id = msg[0] # Get the timestamp value from the message ID # (everything before the - in the ID). msg_timestamp = msg_id.split("-")[0] # Get the temperature value from the message. msg_temperature = msg[1]["temp_f"] # Get hour for this message msg_date = datetime.utcfromtimestamp(int(msg_timestamp)) msg_hour = msg_date.hour # Get the hour for the last message last_message_hour = 0 if "-" in last_message_id: last_message_timestamp = last_message_id.split("-")[0] last_message_date = datetime.utcfromtimestamp( int(last_message_timestamp)) last_message_hour = last_message_date.hour # Did we start a new hour? if last_message_hour != msg_hour: # Starting a new hour, push our result to the averages stream. formatted_date = last_message_date.strftime('%Y/%m/%d') # Publish result for this hour, trimming the stream each # time a new message is added. payload = { "hour": last_message_hour, "date": formatted_date, "average_temp_f": int(current_hourly_total / current_hourly_count), "num_observations": current_hourly_count } # Publish the hourly average value to the temps:averages stream # and trim the stream's length to around 120 entries. redis.xadd(const.AVERAGES_STREAM_KEY, payload, "*", maxlen=120, approximate=True) # Reset values and put the current message's temperature # into the new hour. current_hourly_total = int(msg_temperature) current_hourly_count = 1 else: # Still working through current hour. current_hourly_total += int(msg_temperature) current_hourly_count += 1 # Update the last ID we've seen. last_message_id = msg_id # Store current state in Redis in case we crash and # have to resume. Here we are storing this every # time we read a message, depending on the nature of # your workload you may be able to update it less # frequently, for example after reading 100 messages. redis.hmset( const.AGGREGATING_CONSUMER_STATE_KEY, { "current_stream_key": current_stream_key, "last_message_id": last_message_id, "current_hourly_total": current_hourly_total, "current_hourly_count": current_hourly_count })
def setup(): ''' Initializes the Stream and the primes consumers group ''' redis = get_connection() redis.delete(KEY) redis.xgroup_create(KEY, GROUP, mkstream=True)
def setup(): ''' Initializes the Stream ''' redis = get_connection() redis.delete(KEY)