def __init__(self, db, expand_targets, message_send_enqueue, sender_app): self.db = db self.expand_targets = expand_targets self.message_send_enqueue = message_send_enqueue self.iris_application = None if sender_app: self.iris_application = iris.cache.applications.get(sender_app) if self.iris_application: logger.info( 'Using iris application (%s) for sender quota notifications.', sender_app) else: logger.error( 'Invalid iris application (%s) used for sender. Quota breach notificiations/incidents will not work.', sender_app) else: logger.warning( 'Iris sender_app not configured so notifications for quota breaches will not work' ) self.rates = { } # application: (hard_buckets, soft_buckets, hard_limit, soft_limit, wait_time, plan_name, (target_name, target_role)) self.last_incidents = {} # application: (incident_id, time()) self.last_incidents_mutex = Semaphore() self.last_soft_quota_notification_time = {} # application: time() self.last_soft_quota_notification_time_mutex = Semaphore() metrics.add_new_metrics({ 'quota_hard_exceed_cnt': 0, 'quota_soft_exceed_cnt': 0 }) spawn(self.refresh)
def distributed_send_message(message): if rpc.num_slaves and rpc.sender_slaves: for i, address in enumerate(rpc.sender_slaves): if i >= rpc.num_slaves: logger.error( 'Failed using all configured slaves; resorting to local send_message' ) break if rpc.send_message_to_slave(message, address): return True logger.info('Sending message (ID %s) locally', message.get('message_id', '?')) runtime = send_message(message) add_mode_stat(message['mode'], runtime) metrics_key = 'app_%(application)s_mode_%(mode)s_cnt' % message metrics.add_new_metrics({metrics_key: 0}) metrics.incr(metrics_key) if runtime is not None: return True raise Exception('Failed sending message')
def handle_slave_send(socket, address, req): message = req['data'] message_id = message.get('message_id', '?') try: runtime = send_funcs['send_message'](message) add_mode_stat(message['mode'], runtime) metrics_key = 'app_%(application)s_mode_%(mode)s_cnt' % message metrics.add_new_metrics({metrics_key: 0}) metrics.incr(metrics_key) if runtime is not None: response = 'OK' access_logger.info( 'Message (ID %s) from master %s sent successfully', message_id, address) metrics.incr('slave_message_send_success_cnt') else: response = 'FAIL' access_logger.error( 'Got falsy value from send_message for message (ID %s) from master %s: %s', message_id, address, runtime) metrics.incr('slave_message_send_fail_cnt') except Exception: response = 'FAIL' logger.exception('Sending message (ID %s) from master %s failed.') metrics.incr('slave_message_send_fail_cnt') socket.sendall(msgpack.packb(response))
def distributed_send_message(message): # If I am the master, attempt sending my messages through my slaves. if coordinator.am_i_master(): try: if coordinator.slave_count and coordinator.slaves: for i, address in enumerate(coordinator.slaves): if i >= coordinator.slave_count: logger.error('Failed using all configured slaves; resorting to local send_message') break if rpc.send_message_to_slave(message, address): return True except StopIteration: logger.warning('No more slaves. Sending locally.') logger.info('Sending message (ID %s) locally', message.get('message_id', '?')) runtime = send_message(message) add_mode_stat(message['mode'], runtime) metrics_key = 'app_%(application)s_mode_%(mode)s_cnt' % message metrics.add_new_metrics({metrics_key: 0}) metrics.incr(metrics_key) if runtime is not None: return True raise Exception('Failed sending message')
def __init__(self, db, expand_targets, message_send_enqueue, sender_app, rate_configs): self.db = db # configure default rate limiting params for messages hard_limit = rate_configs.get('hard_limit', 6000) soft_limit = rate_configs.get('soft_limit', 600) hard_duration = rate_configs.get('hard_duration', 1) soft_duration = rate_configs.get('soft_duration', 1) wait_time = rate_configs.get('wait_time', 3600) plan_name = rate_configs.get('plan_name', None) target_name = rate_configs.get('target_name', None) target_role = rate_configs.get('target_role', None) target = None if target_name and target_role: target = (target_name, target_role) self.default_rate_def = (hard_limit, soft_limit, hard_duration, soft_duration, wait_time, plan_name, target) self.expand_targets = expand_targets self.message_send_enqueue = message_send_enqueue self.iris_application = None if sender_app: self.iris_application = iris.cache.applications.get(sender_app) if self.iris_application: logger.info( 'Using iris application (%s) for sender quota notifications.', sender_app) else: logger.error( 'Invalid iris application (%s) used for sender. Quota breach notificiations/incidents will not work.', sender_app) else: logger.warning( 'Iris sender_app not configured so notifications for quota breaches will not work' ) self.rates = { } # application: (hard_buckets, soft_buckets, hard_limit, soft_limit, wait_time, plan_name, (target_name, target_role)) self.last_incidents = {} # application: (incident_id, time()) self.last_incidents_mutex = Semaphore() self.last_soft_quota_notification_time = {} # application: time() self.last_soft_quota_notification_time_mutex = Semaphore() metrics.add_new_metrics({ 'quota_hard_exceed_cnt': 0, 'quota_soft_exceed_cnt': 0 }) spawn(self.refresh)
def refresh(self): while True: logger.info('Refreshing app quotas') new_rates = {} for application, hard_limit, soft_limit, hard_duration, soft_duration, target_name, target_role, plan_name, wait_time in self.get_new_rules(): new_rates[application] = (hard_limit, soft_limit, hard_duration / 60, soft_duration / 60, wait_time, plan_name, (target_name, target_role)) old_keys = self.rates.viewkeys() new_keys = new_rates.viewkeys() # Remove old application entries for key in old_keys - new_keys: logger.info('Pruning old application quota for %s', key) try: del(self.rates[key]) del(self.last_incidents[key]) except KeyError: pass # Create new ones with fresh buckets for key in new_keys - old_keys: hard_limit, soft_limit, hard_duration, soft_duration, wait_time, plan_name, target = new_rates[key] self.rates[key] = (deque([0] * hard_duration, maxlen=hard_duration), # hard buckets deque([0] * soft_duration, maxlen=soft_duration), # soft buckets hard_limit, soft_limit, wait_time, plan_name, target) # Update existing ones + append new time interval. Keep same time bucket object if duration hasn't changed, otherwise create new # one and resize accordingly for key in new_keys & old_keys: hard_limit, soft_limit, hard_duration, soft_duration, wait_time, plan_name, target = new_rates[key] self.rates[key] = (self.rates[key][0] if len(self.rates[key][0]) == hard_duration else deque(self.rates[key][0], maxlen=hard_duration), self.rates[key][1] if len(self.rates[key][1]) == soft_duration else deque(self.rates[key][1], maxlen=soft_duration), hard_limit, soft_limit, wait_time, plan_name, target) # Increase minute interval for hard + soft buckets self.rates[key][0].append(0) self.rates[key][1].append(0) metrics.add_new_metrics({'app_%s_quota_%s_usage_pct' % (app, quota_type): 0 for quota_type in ('hard', 'soft') for app in new_keys}) logger.info('Refreshed app quotas: %s', ', '.join(new_keys)) sleep(60)
def __init__(self, db, expand_targets, sender_app): self.db = db self.expand_targets = expand_targets self.iris_application = None if sender_app: self.iris_application = applications.get(sender_app) if not self.iris_application: logger.error( 'Invalid iris application (%s) used for sender. Quota breach notificiations/incidents will not work.', sender_app) self.rates = { } # application: (hard_buckets, soft_buckets, hard_limit, soft_limit, wait_time, plan_name, (target_name, target_role)) self.last_incidents = {} # application: (incident_id, time()) metrics.add_new_metrics({ 'quota_hard_exceed_cnt': 0, 'quota_soft_exceed_cnt': 0 }) spawn(self.refresh)
def fetch_and_send_message(): message = send_queue.get() has_contact = set_target_contact(message) if not has_contact: mark_message_has_no_contact(message) return if 'message_id' not in message: message['message_id'] = None drop_mode_id = api_cache.modes.get('drop') # If this app breaches hard quota, drop message on floor, and update in UI if it has an ID if not quota.allow_send(message): logger.warn( 'Hard message quota exceeded; Dropping this message on floor: %s', message) if message['message_id']: spawn(auditlog.message_change, message['message_id'], auditlog.MODE_CHANGE, message.get('mode', '?'), 'drop', 'Dropping due to hard quota violation.') # If we know the ID for the mode drop, reflect that for the message if drop_mode_id: message['mode'] = 'drop' message['mode_id'] = drop_mode_id else: logger.error( 'Can\'t mark message %s as dropped as we don\'t know the mode ID for %s', message, 'drop') # Render, so we're able to populate the message table with the proper subject/etc as well as # information that it was dropped. render(message) mark_message_as_sent(message) return # If we're set to drop this message, no-op this before message gets sent to a vendor if message.get('mode') == 'drop': if message['message_id']: render(message) mark_message_as_sent(message) add_mode_stat('drop', 0) metrics_key = 'app_%(application)s_mode_drop_cnt' % message metrics.add_new_metrics({metrics_key: 0}) metrics.incr(metrics_key) return render(message) # Drop this message, and mark it as dropped, rather than sending it, if its body is too long and we were normally # going to send it anyway. body_length = len(message['body']) if body_length > MAX_MESSAGE_BODY_LENGTH: logger.warn( 'Message id %s has a ridiculously long body (%s chars). Dropping it.', message['message_id'], body_length) spawn( auditlog.message_change, message['message_id'], auditlog.MODE_CHANGE, message.get('mode', '?'), 'drop', 'Dropping due to excessive body length (%s > %s chars)' % (body_length, MAX_MESSAGE_BODY_LENGTH)) metrics.incr('msg_drop_length_cnt') # Truncate this here to avoid a duplicate log message in mark_message_as_sent(), as we still need to call # that to update the body/subject message['body'] = message['body'][:MAX_MESSAGE_BODY_LENGTH] if drop_mode_id: message['mode'] = 'drop' message['mode_id'] = drop_mode_id mark_message_as_sent(message) return success = None try: success = distributed_send_message(message) except Exception: logger.exception('Failed to send message: %s', message) if message['mode'] == 'email': logger.error( 'unable to send %(mode)s %(message_id)s %(application)s %(destination)s %(subject)s %(body)s', message) metrics.incr('task_failure') else: logger.error( 'reclassifying as email %(mode)s %(message_id)s %(application)s %(destination)s %(subject)s %(body)s', message) old_mode = message['mode'] if (set_target_fallback_mode(message)): update_message_mode(message) auditlog.message_change( message['message_id'], auditlog.MODE_CHANGE, old_mode, message['mode'], 'Changing mode due to original mode failure') render(message) try: success = distributed_send_message(message) # nope - log and bail except Exception: metrics.incr('task_failure') logger.error( 'unable to send %(mode)s %(message_id)s %(application)s %(destination)s %(subject)s %(body)s', message) if success: metrics.incr('message_send_cnt') if message['message_id']: mark_message_as_sent(message) if message['message_id']: update_message_sent_status(message, success)