def publish_to_failure_topic(event, reason): # todo: prepare against failure of publish sns print('Event failed: %s' % event) if 'failure_topic' in event: payload = { 'error': reason, 'event': event } publish_sns(event['failure_topic'], json.dumps(payload))
def schedule_events(events): successful_ids = [] failed_ids = [] to_be_scheduled = [] events_by_id = {} for event in events: events_by_id[event['sk']] = event delta = datetime.fromisoformat(event['date']) - datetime.utcnow() delay = delta.total_seconds() rounded_delay = math.ceil(delay) # schedule the event a second earlier to help with delays in sqs/lambda cold start # the emitter will wait accordingly rounded_delay -= 1 if rounded_delay < 0: rounded_delay = 0 print( f'ID {event["sk"]} is supposed to emit in {rounded_delay}s which is {delay - rounded_delay}s before target.' ) event = { 'payload': event['payload'], 'target': event['target'], 'sk': event['sk'], 'pk': int(event['pk']), 'date': event['date'] } if 'failure_topic' in event: event['failure_topic'] = event['failure_topic'] sqs_message = { 'Id': event['sk'], 'MessageBody': json.dumps(event), 'DelaySeconds': rounded_delay } to_be_scheduled.append(sqs_message) if len(to_be_scheduled) == 10: successes, failures = publish_sqs(os.environ.get('QUEUE_URL'), to_be_scheduled) failed_ids.extend(failures) successful_ids.extend(successes) to_be_scheduled = [] successes, failures = publish_sqs(os.environ.get('QUEUE_URL'), to_be_scheduled) failed_ids.extend(failures) successful_ids.extend(successes) print(f'Success: {len(successful_ids)}, Failed: {len(failed_ids)}') for id in failed_ids: print(f"Failed to schedule the following events: {failures}") item = events_by_id[id] # todo: instead of publishing the error we should reschedule it automatically # can happen if sqs does not respond if 'failure_topic' in item: payload = {'error': 'ERROR', 'event': item['payload']} publish_sns(item['failure_topic'], json.dumps(payload))
def handle(items): failed_ids = [] print(f'Processing {len(items)} records') # sort the items so that we process the earliest first items.sort(key=lambda x: x['date']) for item in items: event_id = item['id'] # the event we received may have been scheduled early scheduled_execution = datetime.fromisoformat(item['date']) delay = (scheduled_execution - datetime.utcnow()).total_seconds() # remove another 10ms as there will be a short delay between the emitter, the target sns and its consumer delay -= 0.01 # if there is a positive delay then wait until it's time if delay > 0: time.sleep(delay) try: publish_sns(item['target'], item['payload']) print('event.emitted %s' % (json.dumps({ 'id': event_id, 'timestamp': str(datetime.utcnow()), 'scheduled': str(scheduled_execution) }))) except Exception as e: print(str(e)) failed_ids.append(event_id) failed_items = [] for event_id in failed_ids: try: event = EventWrapper.get(hash_key=event_id) event.status = 'FAILED' failed_items.append(event) # can happen if sqs does not respond if event.failure_topic is not None: payload = {'error': 'ERROR', 'event': event.payload} publish_sns(event.failure_topic, json.dumps(payload)) except Exception as e: print( f'Failure update: Skipped {event_id} because it doesn\'t exist anymore' ) print(str(e)) save_with_retry(failed_items)
def handle(events): successful_ids = [] failed_ids = [] to_be_scheduled = [] events_by_id = {} for event_id in events: event_response = table.query( KeyConditionExpression=Key('id').eq(event_id)) if event_response['Count'] == 0: print('Event %s doesn\'t exist anymore' % event_id) continue item = event_response['Items'][0] events_by_id[event_id] = item delta = datetime.fromisoformat(item['date']) - datetime.utcnow() delay = delta.total_seconds() rounded_delay = math.ceil(delay) if rounded_delay < 0: rounded_delay = 0 # schedule the event a second earlier to help with delays in sqs/lambda cold start # the emitter will wait accordingly rounded_delay -= 1 print( f'ID {event_id} is supposed to emit in {rounded_delay}s which is {delay - rounded_delay}s before target.' ) event = { 'payload': item['payload'], 'target': item['target'], 'id': item['id'], 'date': item['date'] } if 'failure_topic' in item: event['failure_topic'] = item['failure_topic'] sqs_message = { 'Id': event_id, 'MessageBody': json.dumps(event), 'DelaySeconds': rounded_delay } to_be_scheduled.append(sqs_message) if len(to_be_scheduled) == 10: successes, failures = send_to_sqs(to_be_scheduled) failed_ids.extend(failures) successful_ids.extend(successes) to_be_scheduled = [] successes, failures = send_to_sqs(to_be_scheduled) failed_ids.extend(failures) successful_ids.extend(successes) print(f'Success: {len(successful_ids)}, Failed: {len(failed_ids)}') to_save = [] for id in successful_ids: item = events_by_id[id] item['status'] = 'SCHEDULED' to_save.append(item) for id in failed_ids: item = events_by_id[id] item['status'] = 'FAILED' to_save.append(item) # todo: instead of publishing the error we should reschedule it automatically # can happen if sqs does not respond if 'failure_topic' in item: payload = {'error': 'ERROR', 'event': item['payload']} publish_sns(item['failure_topic'], json.dumps(payload)) save_with_retry(to_save)
def handle(items): print(f'Processing {len(items)} records') # sort the items so that we process the earliest first items.sort(key=lambda x: x['date']) failed_events = [] delays_ms = [] for item in items: event_id = item['sk'] # the event we received may have been scheduled early scheduled_execution = datetime.fromisoformat(item['date']) delay = (scheduled_execution - datetime.utcnow()).total_seconds() # remove another 10ms as there will be a short delay between the emitter, the target sns and its consumer delay -= 0.01 # if there is a positive delay then wait until it's time if delay > 0: time.sleep(delay) try: publish_sns(item['target'], item['payload']) now = datetime.utcnow() print('event.emitted %s' % (json.dumps({ 'sk': event_id, 'timestamp': str(now), 'scheduled': str(scheduled_execution) }))) actual_delay = int( (now - scheduled_execution).total_seconds() * 1000) print( f"{json.dumps({'event_id': event_id, 'timestamp': str(now), 'scheduled': str(scheduled_execution), 'delay': actual_delay, 'log_type': 'emit_delay'})}" ) delays_ms.append(actual_delay) except Exception as e: print(f"Failed to emit event {event_id}: {str(e)}") failed_events.append(item) delays_grouped = {} for delay in delays_ms: if delay not in delays_grouped: delays_grouped[delay] = 0 delays_grouped[delay] += 1 values = [] counts = [] for delay, count in delays_grouped.items(): values.append(delay) counts.append(count) for event in failed_events: try: if event.failure_topic is not None: payload = {'error': 'ERROR', 'event': event.payload} publish_sns(event.failure_topic, json.dumps(payload)) except Exception as e: print( f"Failed to emit event {event['sk']} to failure topic: {str(e)}" )