def _initialize_sentry(self): """If running a cloud crawl, we can pull the sentry endpoint and related config varibles from the environment""" self._breadcrumb_handler = BreadcrumbHandler( level=self._log_level_sentry_breadcrumb) self._event_handler = EventHandler(level=self._log_level_sentry_event) sentry_sdk.init(dsn=self._sentry_dsn, before_send=self._sentry_before_send) with sentry_sdk.configure_scope() as scope: if self._crawl_context: scope.set_tag( 'CRAWL_REFERENCE', '%s/%s' % (self._crawl_context.get('s3_bucket', 'UNKNOWN'), self._crawl_context.get('s3_directory', 'UNKNOWN')))
def _initialize_sentry(self): """If running a cloud crawl, we can pull the sentry endpoint and related config varibles from the environment""" self._breadcrumb_handler = BreadcrumbHandler( level=self._log_level_sentry_breadcrumb) self._event_handler = EventHandler(level=self._log_level_sentry_event) sentry_sdk.init(dsn=self._sentry_dsn, before_send=self._sentry_before_send) with sentry_sdk.configure_scope() as scope: if self._crawl_reference: scope.set_tag( "CRAWL_REFERENCE", self._crawl_reference, )
def init_log(): logging.getLogger("chardet").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARNING) class InterceptHandler(logging.Handler): def emit(self, record): # Retrieve context where the logging call occurred, this happens to be in the 6th frame upward logger_opt = logger.opt(depth=6, exception=record.exc_info) logger_opt.log(record.levelname, record.getMessage()) logging.basicConfig(handlers=[InterceptHandler()], level=logging.DEBUG) logger.add(EventHandler(), format="{message}", level="ERROR") logger.add(BreadcrumbHandler(), format="{message}", level="ERROR") logger.add("logs/spider.log", rotation="1 day", retention="5 days", enqueue=True) if core_env == "dev": logger.remove(0) logger.add(sys.stderr, level="TRACE")
def configure(): # noinspection PyArgumentList logging.basicConfig(handlers=[InterceptHandler()], level=0) # monkey patching logger.__class__.debug = debug logger.__class__.warning = warning logger.remove() if Sentry.dsn: logger.add( BreadcrumbHandler(level=logging.DEBUG), level=logging.DEBUG, format="{name} - {message}", backtrace=False, diagnose=False, ) logger.add( EventHandler(level=logging.ERROR), level=logging.ERROR, format="{name} - {message}", backtrace=False, diagnose=False, ) for handler in LoggingHandlers: logger.add(**handler) log_filter.add_filter("discord.client", "Dispatching event ") log_filter.add_filter("discord.gateway", "WebSocket Event: ") log_filter.add_filter("discord.gateway", "websocket alive with sequence") log_filter.add_filter("discord.gateway", "Unknown event ") log_filter.add_filter("discord.http", "has received") log_filter.add_filter("discord.http", "has returned") log_filter.add_filter("PIL.TiffImagePlugin", "tag: ")
class MPLogger(object): """Configure OpenWPM logging across processes""" def __init__(self, log_file, crawl_context=None, log_level_console=logging.INFO, log_level_file=logging.DEBUG, log_level_sentry_breadcrumb=logging.DEBUG, log_level_sentry_event=logging.ERROR): self._crawl_context = crawl_context self._log_level_console = log_level_console self._log_level_file = log_level_file self._log_level_sentry_breadcrumb = log_level_sentry_breadcrumb self._log_level_sentry_event = log_level_sentry_event # Configure log handlers self._status_queue = JoinableQueue() self._log_file = os.path.expanduser(log_file) self._initialize_loggers() # Configure sentry (if available) self._sentry_dsn = os.getenv('SENTRY_DSN', None) if self._sentry_dsn: self._initialize_sentry() def _initialize_loggers(self): """Set up console logging and serialized file logging. The logger and socket handler are set to log at the logging.DEBUG level and filtering happens at the outputs (console, file, and sentry).""" logger = logging.getLogger('openwpm') logger.setLevel(logging.DEBUG) # Remove any previous handlers to avoid registering duplicates if len(logger.handlers) > 0: logger.handlers = list() # Start file handler and listener thread (for serialization) handler = logging.FileHandler(self._log_file) formatter = logging.Formatter( "%(asctime)s - %(processName)-11s[%(threadName)-10s]" "- %(module)-20s - %(levelname)-8s: %(message)s") handler.setFormatter(formatter) handler.setLevel(self._log_level_file) self._file_handler = handler self._listener = threading.Thread(target=self._start_listener) self._listener.daemon = True self._listener.start() self.logger_address = self._status_queue.get(timeout=60) self._status_queue.task_done() # Attach console handler to log to console consoleHandler = logging.StreamHandler(sys.stdout) consoleHandler.setLevel(self._log_level_console) formatter = logging.Formatter( '%(module)-20s - %(levelname)-8s - %(message)s') consoleHandler.setFormatter(formatter) logger.addHandler(consoleHandler) # Attach socket handler to logger to serialize writes to file socketHandler = ClientSocketHandler(*self.logger_address) socketHandler.setLevel(logging.DEBUG) logger.addHandler(socketHandler) def _sentry_before_send(self, event, hint): """Update sentry events before they are sent Note: we want to be very conservative in handling errors here. If this method throws an error, Sentry silently discards it and no record is sent. It's much better to have Sentry send an unparsed error then no error. """ # Strip "BROWSER X: " prefix to clean up logs if 'logentry' in event and 'message' in event['logentry']: if re.match(BROWSER_PREFIX, event['logentry']['message']): event['logentry']['message'] = re.sub( BROWSER_PREFIX, '', event['logentry']['message']) # Add traceback info to fingerprint for logs that contain a traceback try: event['logentry']['message'] = event['extra']['exception'].strip() except KeyError: pass # Combine neterrors of the same type try: if 'about:neterror' in event['extra']['exception']: error_text = parse_neterror(event['extra']['exception']) event['fingerprint'] = ['neterror-%s' % error_text] except Exception: pass return event def _initialize_sentry(self): """If running a cloud crawl, we can pull the sentry endpoint and related config varibles from the environment""" self._breadcrumb_handler = BreadcrumbHandler( level=self._log_level_sentry_breadcrumb) self._event_handler = EventHandler(level=self._log_level_sentry_event) sentry_sdk.init(dsn=self._sentry_dsn, before_send=self._sentry_before_send) with sentry_sdk.configure_scope() as scope: if self._crawl_context: scope.set_tag( 'CRAWL_REFERENCE', '%s/%s' % (self._crawl_context.get('s3_bucket', 'UNKNOWN'), self._crawl_context.get('s3_directory', 'UNKNOWN'))) def _start_listener(self): """Start listening socket for remote logs from extension""" socket = serversocket(name="loggingserver") self._status_queue.put(socket.sock.getsockname()) socket.start_accepting() self._status_queue.join() # block to allow parent to retrieve address while True: # Check for shutdown if not self._status_queue.empty(): self._status_queue.get() socket.close() time.sleep(3) # TODO: the socket needs a better way of closing while not socket.queue.empty(): obj = socket.queue.get() self._process_record(obj) self._status_queue.task_done() break # Process logs try: obj = socket.queue.get(True, 10) self._process_record(obj) except EmptyQueue: pass def _process_record(self, obj): if len(obj) == 2 and obj[0] == 'EXT': self._handle_extension_log(obj) else: self._handle_serialized_writes(obj) def _handle_extension_log(self, obj): """Pass messages received from the extension to logger""" obj = json.loads(obj[1]) record = logging.LogRecord(name=__name__, level=obj['level'], pathname=obj['pathname'], lineno=obj['lineno'], msg=obj['msg'], args=obj['args'], exc_info=obj['exc_info'], func=obj['func']) logger = logging.getLogger('openwpm') logger.handle(record) def _handle_serialized_writes(self, obj): """Handle records that must be serialized to the main process This is currently records that are written to a file on disk and those sent to Sentry. """ if obj['exc_info']: obj['exc_info'] = dill.loads(obj['exc_info']) if obj['args']: obj['args'] = dill.loads(obj['args']) record = logging.makeLogRecord(obj) self._file_handler.emit(record) if self._sentry_dsn: if record.levelno >= self._breadcrumb_handler.level: self._breadcrumb_handler.handle(record) if record.levelno >= self._event_handler.level: self._event_handler.handle(record) def close(self): self._status_queue.put("SHUTDOWN") self._status_queue.join() self._listener.join()
from nonebot.log import logger from sentry_sdk.integrations.logging import EventHandler, BreadcrumbHandler from .config import Config driver = get_driver() global_config = driver.config config = Config(**global_config.dict()) assert config.sentry_dsn, "Sentry DSN must provided!" sentry_sdk.init(**{ key[7:]: value for key, value in config.dict().items() if key != "sentry_environment" }, environment=config.sentry_environment or driver.env, default_integrations=False) class Filter: def __init__(self, level="INFO") -> None: self.level = level def __call__(self, record): levelno = logger.level(self.level).no return record["level"].no >= levelno logger.add(EventHandler("ERROR"), filter=Filter("ERROR")) logger.add(BreadcrumbHandler("INFO"), filter=Filter("INFO"))