def evaluate_triggers(self): last_finished_run = TriggerRun.objects.filter( finished=True).order_by('-ran_at').first() if last_finished_run: issues = Issue.objects.filter( last_seen__gte=last_finished_run.ran_at) else: issues = Issue.objects.all() logger.info(f'Found {len(issues)} issues since last finished run.') # Clear caches since we're starting a new run latest_nightly_appbuildid.cache_clear() # Evaluate triggers for trigger_class in get_trigger_classes(): trigger = trigger_class(self.alert_backend, self.dry_run, self.now) for issue in issues: logger.debug( f'Evaluting {trigger_class.__name__} against issue {issue.fingerprint}.' ) # Don't let a single failure block all trigger evaluations try: trigger.evaluate(issue) except Exception: capture_exception( f'Error while running trigger {trigger.__name__} against issue ' f'{issue.fingerprint}')
def listen(queue_backend, worker_message_count): """ Listen for incoming events and process them. This is the entrypoint for worker processes. """ logger = logging.getLogger('bec-alerts.processor.worker') logger.info('Waiting for an event') # Exit after worker_message_count events have been processed. messages_processed = 0 while messages_processed < worker_message_count: try: for event_data in queue_backend.receive_events(): event = SentryEvent(event_data) logger.debug(f'Received event ID: {event.id}') # The nested try avoids errors on a single event stopping us # from processing the rest of the received events. try: process_event(event) messages_processed += 1 except Exception as err: capture_exception(f'Error processing event: {event.id}') except Exception as err: capture_exception('Error receiving message')
def main( once, dry_run, console_alerts, sleep_delay, from_email, endpoint_url, connect_timeout, read_timeout, verify_email, datadog_counter_name, sentry_dsn, ): """Evaluate alert triggers and send alerts.""" initialize_error_reporting(sentry_dsn) try: datadog.initialize() if console_alerts: alert_backend = ConsoleAlertBackend() else: alert_backend = EmailAlertBackend( from_email=from_email, endpoint_url=endpoint_url, connect_timeout=connect_timeout, read_timeout=read_timeout, verify_email=verify_email, ) except Exception: # Just make sure Sentry knows that we failed on startup capture_exception('Failed during watcher startup') raise while True: try: evaluator = TriggerEvaluator(alert_backend, dry_run) evaluator.run_job() except Exception as err: capture_exception('Error evaluating triggers') finally: datadog.statsd.increment(datadog_counter_name) if once: break time.sleep(sleep_delay)
def send_alert(self, to, subject, body): try: self.ses.send_email(Destination={'ToAddresses': [to]}, Message={ 'Body': { 'Text': { 'Charset': 'UTF-8', 'Data': body, }, }, 'Subject': { 'Charset': 'UTF-8', 'Data': subject, }, }, Source=self.from_email) except ClientError as err: capture_exception( f'Could not send email: {err.response["Error"]["Message"]}')
def main( queue_name, endpoint_url, connect_timeout, read_timeout, process_count, worker_message_count, sentry_dsn, ): """ Listen for incoming events from Sentry and aggregate the data we care about from them. Manages a pool of subprocesses that perform the listening and processing. """ initialize_error_reporting(sentry_dsn) logger = logging.getLogger('bec-alerts.processor') worker_ids = itertools.count() try: queue_backend = SQSQueueBackend( queue_name=queue_name, endpoint_url=endpoint_url, connect_timeout=connect_timeout, read_timeout=read_timeout, ) except Exception: capture_exception('Error initializing queue backend, will exit.') return logger.info('Starting processor workers') processes = [] listen_kwargs = { 'queue_backend': queue_backend, 'worker_message_count': worker_message_count, } for k in range(process_count): process = Process(target=listen, kwargs=listen_kwargs) process.name = f'worker-{next(worker_ids)}' processes.append(process) try: for process in processes: process.start() # Watch for terminated processes and replace them while True: for k, process in enumerate(processes): if not process.is_alive(): logger.info('Worker died, restarting process.') processes[k] = Process(target=listen, kwargs=listen_kwargs) processes[k].name = f'worker-{next(worker_ids)}' processes[k].start() time.sleep(5) except KeyboardInterrupt: for process in processes: if process.is_alive(): process.terminate() except Exception: capture_exception()