Exemple #1
0
    def evaluate_triggers(self):
        last_finished_run = TriggerRun.objects.filter(
            finished=True).order_by('-ran_at').first()
        if last_finished_run:
            issues = Issue.objects.filter(
                last_seen__gte=last_finished_run.ran_at)
        else:
            issues = Issue.objects.all()

        logger.info(f'Found {len(issues)} issues since last finished run.')

        # Clear caches since we're starting a new run
        latest_nightly_appbuildid.cache_clear()

        # Evaluate triggers
        for trigger_class in get_trigger_classes():
            trigger = trigger_class(self.alert_backend, self.dry_run, self.now)
            for issue in issues:
                logger.debug(
                    f'Evaluting {trigger_class.__name__} against issue {issue.fingerprint}.'
                )

                # Don't let a single failure block all trigger evaluations
                try:
                    trigger.evaluate(issue)
                except Exception:
                    capture_exception(
                        f'Error while running trigger {trigger.__name__} against issue '
                        f'{issue.fingerprint}')
Exemple #2
0
def listen(queue_backend, worker_message_count):
    """
    Listen for incoming events and process them.

    This is the entrypoint for worker processes.
    """
    logger = logging.getLogger('bec-alerts.processor.worker')
    logger.info('Waiting for an event')

    # Exit after worker_message_count events have been processed.
    messages_processed = 0
    while messages_processed < worker_message_count:
        try:
            for event_data in queue_backend.receive_events():
                event = SentryEvent(event_data)
                logger.debug(f'Received event ID: {event.id}')

                # The nested try avoids errors on a single event stopping us
                # from processing the rest of the received events.
                try:
                    process_event(event)
                    messages_processed += 1
                except Exception as err:
                    capture_exception(f'Error processing event: {event.id}')
        except Exception as err:
            capture_exception('Error receiving message')
Exemple #3
0
def main(
    once,
    dry_run,
    console_alerts,
    sleep_delay,
    from_email,
    endpoint_url,
    connect_timeout,
    read_timeout,
    verify_email,
    datadog_counter_name,
    sentry_dsn,
):
    """Evaluate alert triggers and send alerts."""
    initialize_error_reporting(sentry_dsn)

    try:
        datadog.initialize()

        if console_alerts:
            alert_backend = ConsoleAlertBackend()
        else:
            alert_backend = EmailAlertBackend(
                from_email=from_email,
                endpoint_url=endpoint_url,
                connect_timeout=connect_timeout,
                read_timeout=read_timeout,
                verify_email=verify_email,
            )
    except Exception:
        # Just make sure Sentry knows that we failed on startup
        capture_exception('Failed during watcher startup')
        raise

    while True:
        try:
            evaluator = TriggerEvaluator(alert_backend, dry_run)
            evaluator.run_job()
        except Exception as err:
            capture_exception('Error evaluating triggers')
        finally:
            datadog.statsd.increment(datadog_counter_name)

        if once:
            break
        time.sleep(sleep_delay)
 def send_alert(self, to, subject, body):
     try:
         self.ses.send_email(Destination={'ToAddresses': [to]},
                             Message={
                                 'Body': {
                                     'Text': {
                                         'Charset': 'UTF-8',
                                         'Data': body,
                                     },
                                 },
                                 'Subject': {
                                     'Charset': 'UTF-8',
                                     'Data': subject,
                                 },
                             },
                             Source=self.from_email)
     except ClientError as err:
         capture_exception(
             f'Could not send email: {err.response["Error"]["Message"]}')
Exemple #5
0
def main(
    queue_name,
    endpoint_url,
    connect_timeout,
    read_timeout,
    process_count,
    worker_message_count,
    sentry_dsn,
):
    """
    Listen for incoming events from Sentry and aggregate the data we
    care about from them.

    Manages a pool of subprocesses that perform the listening and
    processing.
    """
    initialize_error_reporting(sentry_dsn)
    logger = logging.getLogger('bec-alerts.processor')
    worker_ids = itertools.count()

    try:
        queue_backend = SQSQueueBackend(
            queue_name=queue_name,
            endpoint_url=endpoint_url,
            connect_timeout=connect_timeout,
            read_timeout=read_timeout,
        )
    except Exception:
        capture_exception('Error initializing queue backend, will exit.')
        return

    logger.info('Starting processor workers')
    processes = []
    listen_kwargs = {
        'queue_backend': queue_backend,
        'worker_message_count': worker_message_count,
    }
    for k in range(process_count):
        process = Process(target=listen, kwargs=listen_kwargs)
        process.name = f'worker-{next(worker_ids)}'
        processes.append(process)

    try:
        for process in processes:
            process.start()

        # Watch for terminated processes and replace them
        while True:
            for k, process in enumerate(processes):
                if not process.is_alive():
                    logger.info('Worker died, restarting process.')
                    processes[k] = Process(target=listen, kwargs=listen_kwargs)
                    processes[k].name = f'worker-{next(worker_ids)}'
                    processes[k].start()
                time.sleep(5)
    except KeyboardInterrupt:
        for process in processes:
            if process.is_alive():
                process.terminate()
    except Exception:
        capture_exception()