Example #1
0
 def _initialize_sentry(self):
     """If running a cloud crawl, we can pull the sentry endpoint
     and related config varibles from the environment"""
     self._breadcrumb_handler = BreadcrumbHandler(
         level=self._log_level_sentry_breadcrumb)
     self._event_handler = EventHandler(level=self._log_level_sentry_event)
     sentry_sdk.init(dsn=self._sentry_dsn,
                     before_send=self._sentry_before_send)
     with sentry_sdk.configure_scope() as scope:
         if self._crawl_context:
             scope.set_tag(
                 'CRAWL_REFERENCE', '%s/%s' %
                 (self._crawl_context.get('s3_bucket', 'UNKNOWN'),
                  self._crawl_context.get('s3_directory', 'UNKNOWN')))
Example #2
0
 def _initialize_sentry(self):
     """If running a cloud crawl, we can pull the sentry endpoint
     and related config varibles from the environment"""
     self._breadcrumb_handler = BreadcrumbHandler(
         level=self._log_level_sentry_breadcrumb)
     self._event_handler = EventHandler(level=self._log_level_sentry_event)
     sentry_sdk.init(dsn=self._sentry_dsn,
                     before_send=self._sentry_before_send)
     with sentry_sdk.configure_scope() as scope:
         if self._crawl_reference:
             scope.set_tag(
                 "CRAWL_REFERENCE",
                 self._crawl_reference,
             )
Example #3
0
def init_log():
    logging.getLogger("chardet").setLevel(logging.WARNING)
    logging.getLogger("urllib3").setLevel(logging.WARNING)

    class InterceptHandler(logging.Handler):
        def emit(self, record):
            # Retrieve context where the logging call occurred, this happens to be in the 6th frame upward
            logger_opt = logger.opt(depth=6, exception=record.exc_info)
            logger_opt.log(record.levelname, record.getMessage())

    logging.basicConfig(handlers=[InterceptHandler()], level=logging.DEBUG)

    logger.add(EventHandler(), format="{message}", level="ERROR")
    logger.add(BreadcrumbHandler(), format="{message}", level="ERROR")
    logger.add("logs/spider.log",
               rotation="1 day",
               retention="5 days",
               enqueue=True)
    if core_env == "dev":
        logger.remove(0)
        logger.add(sys.stderr, level="TRACE")
Example #4
0
def configure():
    # noinspection PyArgumentList
    logging.basicConfig(handlers=[InterceptHandler()], level=0)

    # monkey patching
    logger.__class__.debug = debug
    logger.__class__.warning = warning

    logger.remove()

    if Sentry.dsn:
        logger.add(
            BreadcrumbHandler(level=logging.DEBUG),
            level=logging.DEBUG,
            format="{name} - {message}",
            backtrace=False,
            diagnose=False,
        )
        logger.add(
            EventHandler(level=logging.ERROR),
            level=logging.ERROR,
            format="{name} - {message}",
            backtrace=False,
            diagnose=False,
        )

    for handler in LoggingHandlers:
        logger.add(**handler)

    log_filter.add_filter("discord.client", "Dispatching event ")
    log_filter.add_filter("discord.gateway", "WebSocket Event: ")
    log_filter.add_filter("discord.gateway", "websocket alive with sequence")
    log_filter.add_filter("discord.gateway", "Unknown event ")
    log_filter.add_filter("discord.http", "has received")
    log_filter.add_filter("discord.http", "has returned")
    log_filter.add_filter("PIL.TiffImagePlugin", "tag: ")
Example #5
0
class MPLogger(object):
    """Configure OpenWPM logging across processes"""
    def __init__(self,
                 log_file,
                 crawl_context=None,
                 log_level_console=logging.INFO,
                 log_level_file=logging.DEBUG,
                 log_level_sentry_breadcrumb=logging.DEBUG,
                 log_level_sentry_event=logging.ERROR):
        self._crawl_context = crawl_context
        self._log_level_console = log_level_console
        self._log_level_file = log_level_file
        self._log_level_sentry_breadcrumb = log_level_sentry_breadcrumb
        self._log_level_sentry_event = log_level_sentry_event
        # Configure log handlers
        self._status_queue = JoinableQueue()
        self._log_file = os.path.expanduser(log_file)

        self._initialize_loggers()

        # Configure sentry (if available)
        self._sentry_dsn = os.getenv('SENTRY_DSN', None)
        if self._sentry_dsn:
            self._initialize_sentry()

    def _initialize_loggers(self):
        """Set up console logging and serialized file logging.

        The logger and socket handler are set to log at the logging.DEBUG level
        and filtering happens at the outputs (console, file, and sentry)."""
        logger = logging.getLogger('openwpm')
        logger.setLevel(logging.DEBUG)

        # Remove any previous handlers to avoid registering duplicates
        if len(logger.handlers) > 0:
            logger.handlers = list()

        # Start file handler and listener thread (for serialization)
        handler = logging.FileHandler(self._log_file)
        formatter = logging.Formatter(
            "%(asctime)s - %(processName)-11s[%(threadName)-10s]"
            "- %(module)-20s - %(levelname)-8s: %(message)s")
        handler.setFormatter(formatter)
        handler.setLevel(self._log_level_file)
        self._file_handler = handler

        self._listener = threading.Thread(target=self._start_listener)
        self._listener.daemon = True
        self._listener.start()
        self.logger_address = self._status_queue.get(timeout=60)
        self._status_queue.task_done()

        # Attach console handler to log to console
        consoleHandler = logging.StreamHandler(sys.stdout)
        consoleHandler.setLevel(self._log_level_console)
        formatter = logging.Formatter(
            '%(module)-20s - %(levelname)-8s - %(message)s')
        consoleHandler.setFormatter(formatter)
        logger.addHandler(consoleHandler)

        # Attach socket handler to logger to serialize writes to file
        socketHandler = ClientSocketHandler(*self.logger_address)
        socketHandler.setLevel(logging.DEBUG)
        logger.addHandler(socketHandler)

    def _sentry_before_send(self, event, hint):
        """Update sentry events before they are sent

        Note: we want to be very conservative in handling errors here. If this
        method throws an error, Sentry silently discards it and no record is
        sent. It's much better to have Sentry send an unparsed error then no
        error.
        """

        # Strip "BROWSER X: " prefix to clean up logs
        if 'logentry' in event and 'message' in event['logentry']:
            if re.match(BROWSER_PREFIX, event['logentry']['message']):
                event['logentry']['message'] = re.sub(
                    BROWSER_PREFIX, '', event['logentry']['message'])

        # Add traceback info to fingerprint for logs that contain a traceback
        try:
            event['logentry']['message'] = event['extra']['exception'].strip()
        except KeyError:
            pass

        # Combine neterrors of the same type
        try:
            if 'about:neterror' in event['extra']['exception']:
                error_text = parse_neterror(event['extra']['exception'])
                event['fingerprint'] = ['neterror-%s' % error_text]
        except Exception:
            pass

        return event

    def _initialize_sentry(self):
        """If running a cloud crawl, we can pull the sentry endpoint
        and related config varibles from the environment"""
        self._breadcrumb_handler = BreadcrumbHandler(
            level=self._log_level_sentry_breadcrumb)
        self._event_handler = EventHandler(level=self._log_level_sentry_event)
        sentry_sdk.init(dsn=self._sentry_dsn,
                        before_send=self._sentry_before_send)
        with sentry_sdk.configure_scope() as scope:
            if self._crawl_context:
                scope.set_tag(
                    'CRAWL_REFERENCE', '%s/%s' %
                    (self._crawl_context.get('s3_bucket', 'UNKNOWN'),
                     self._crawl_context.get('s3_directory', 'UNKNOWN')))

    def _start_listener(self):
        """Start listening socket for remote logs from extension"""
        socket = serversocket(name="loggingserver")
        self._status_queue.put(socket.sock.getsockname())
        socket.start_accepting()
        self._status_queue.join()  # block to allow parent to retrieve address

        while True:
            # Check for shutdown
            if not self._status_queue.empty():
                self._status_queue.get()
                socket.close()
                time.sleep(3)  # TODO: the socket needs a better way of closing
                while not socket.queue.empty():
                    obj = socket.queue.get()
                    self._process_record(obj)
                self._status_queue.task_done()
                break

            # Process logs
            try:
                obj = socket.queue.get(True, 10)
                self._process_record(obj)
            except EmptyQueue:
                pass

    def _process_record(self, obj):
        if len(obj) == 2 and obj[0] == 'EXT':
            self._handle_extension_log(obj)
        else:
            self._handle_serialized_writes(obj)

    def _handle_extension_log(self, obj):
        """Pass messages received from the extension to logger"""
        obj = json.loads(obj[1])
        record = logging.LogRecord(name=__name__,
                                   level=obj['level'],
                                   pathname=obj['pathname'],
                                   lineno=obj['lineno'],
                                   msg=obj['msg'],
                                   args=obj['args'],
                                   exc_info=obj['exc_info'],
                                   func=obj['func'])
        logger = logging.getLogger('openwpm')
        logger.handle(record)

    def _handle_serialized_writes(self, obj):
        """Handle records that must be serialized to the main process

        This is currently records that are written to a file on disk
        and those sent to Sentry.
        """
        if obj['exc_info']:
            obj['exc_info'] = dill.loads(obj['exc_info'])
        if obj['args']:
            obj['args'] = dill.loads(obj['args'])
        record = logging.makeLogRecord(obj)
        self._file_handler.emit(record)
        if self._sentry_dsn:
            if record.levelno >= self._breadcrumb_handler.level:
                self._breadcrumb_handler.handle(record)
            if record.levelno >= self._event_handler.level:
                self._event_handler.handle(record)

    def close(self):
        self._status_queue.put("SHUTDOWN")
        self._status_queue.join()
        self._listener.join()
Example #6
0
from nonebot.log import logger
from sentry_sdk.integrations.logging import EventHandler, BreadcrumbHandler

from .config import Config

driver = get_driver()
global_config = driver.config
config = Config(**global_config.dict())

assert config.sentry_dsn, "Sentry DSN must provided!"

sentry_sdk.init(**{
    key[7:]: value
    for key, value in config.dict().items() if key != "sentry_environment"
},
                environment=config.sentry_environment or driver.env,
                default_integrations=False)


class Filter:
    def __init__(self, level="INFO") -> None:
        self.level = level

    def __call__(self, record):
        levelno = logger.level(self.level).no
        return record["level"].no >= levelno


logger.add(EventHandler("ERROR"), filter=Filter("ERROR"))
logger.add(BreadcrumbHandler("INFO"), filter=Filter("INFO"))