def strip_tags(html): s = HTMLTagStripper() try: s.feed(html) except HTMLParseError: get_logger().error('error stripping tags', raw_html=html) return s.get_data()
def publish(self, **kwargs): def check_schema(**kwargs): for kw in kwargs: assert kw in self.schema try: check_schema(**kwargs) self.value.update(kwargs or {}) # If we got a 'heartbeat_at' datetime argument, publish this # heartbeat with that timestamp. if 'heartbeat_at' in kwargs and \ isinstance(kwargs['heartbeat_at'], datetime): epoch = time.mktime(kwargs.get('heartbeat_at').timetuple()) self.heartbeat_at = epoch self.value['heartbeat_at'] = str(kwargs['heartbeat_at']) else: self.heartbeat_at = time.time() self.value['heartbeat_at'] = str(datetime.fromtimestamp( self.heartbeat_at)) self.store.publish( self.key, self.device_id, json.dumps(self.value), self.heartbeat_at) if 'action' in self.value: del self.value['action'] except Exception: log = get_logger() log.error('Error while writing the heartbeat status', account_id=self.key.account_id, folder_id=self.key.folder_id, device_id=self.device_id, exc_info=True)
def __init__(self, cpu_id, total_cpus, poll_interval=10): self.keep_running = True self.host = platform.node() self.cpu_id = cpu_id self.total_cpus = total_cpus self.monitor_cls_for = {mod.PROVIDER: getattr( mod, mod.SYNC_MONITOR_CLS) for mod in module_registry.values() if hasattr(mod, 'SYNC_MONITOR_CLS')} for p_name, p in providers.iteritems(): if p_name not in self.monitor_cls_for: self.monitor_cls_for[p_name] = self.monitor_cls_for["generic"] self.log = get_logger() self.log.bind(cpu_id=cpu_id) self.log.info('starting mail sync process', supported_providers=module_registry.keys()) self.syncing_accounts = set() self.email_sync_monitors = {} self.contact_sync_monitors = {} self.event_sync_monitors = {} self.poll_interval = poll_interval self.stealing_enabled = config.get('SYNC_STEAL_ACCOUNTS', True) self.sync_hosts_for_shards = {} for database in config['DATABASE_HOSTS']: for shard in database['SHARDS']: # If no sync hosts are explicitly configured for the shard, # then try to steal from it. That way if you turn up a new # shard without properly allocating sync hosts to it, accounts # on it will still be started. self.sync_hosts_for_shards[shard['ID']] = shard.get( 'SYNC_HOSTS') or [self.host]
def __init__(self, process_identifier, cpu_id, poll_interval=10): self.host = platform.node() self.cpu_id = cpu_id self.process_identifier = process_identifier self.monitor_cls_for = {mod.PROVIDER: getattr( mod, mod.SYNC_MONITOR_CLS) for mod in module_registry.values() if hasattr(mod, 'SYNC_MONITOR_CLS')} for p_name, p in providers.iteritems(): if p_name not in self.monitor_cls_for: self.monitor_cls_for[p_name] = self.monitor_cls_for["generic"] self.log = get_logger() self.log.bind(cpu_id=cpu_id) self.log.info('starting mail sync process', supported_providers=module_registry.keys()) self.syncing_accounts = set() self.email_sync_monitors = {} self.contact_sync_monitors = {} self.event_sync_monitors = {} self.poll_interval = poll_interval self.semaphore = BoundedSemaphore(1) self.stealing_enabled = config.get('SYNC_STEAL_ACCOUNTS', True) self.zone = config.get('ZONE') self.queue_client = QueueClient(self.zone)
def __init__(self, process_identifier, cpu_id, poll_interval=10): self.host = platform.node() self.cpu_id = cpu_id self.process_identifier = process_identifier self.monitor_cls_for = {mod.PROVIDER: getattr( mod, mod.SYNC_MONITOR_CLS) for mod in module_registry.values() if hasattr(mod, 'SYNC_MONITOR_CLS')} for p_name, p in providers.iteritems(): if p_name not in self.monitor_cls_for: self.monitor_cls_for[p_name] = self.monitor_cls_for["generic"] self.log = get_logger() self.log.bind(cpu_id=cpu_id) self.log.info('starting mail sync process', supported_providers=module_registry.keys()) self.syncing_accounts = set() self.email_sync_monitors = {} self.contact_sync_monitors = {} self.event_sync_monitors = {} self.poll_interval = poll_interval self.semaphore = BoundedSemaphore(1) self.stealing_enabled = config.get('SYNC_STEAL_ACCOUNTS', True) self.zone = config.get('ZONE') self.queue_client = QueueClient(self.zone) # We call cpu_percent in a non-blocking way. Because of the way # this function works, it'll always return 0.0 the first time # we call it. See: https://pythonhosted.org/psutil/#psutil.cpu_percent # for more details. psutil.cpu_percent(percpu=True)
def __init__(self, account): self.account_id = account.id self.log = get_logger() self.log.bind(account_id=account.id) if isinstance(account, GenericAccount): self.smtp_username = account.smtp_username self.ssl_required = account.ssl_required else: # Non-generic accounts have no smtp username, ssl_required self.smtp_username = account.email_address self.ssl_required = True self.email_address = account.email_address self.provider_name = account.provider self.sender_name = account.name self.smtp_endpoint = account.smtp_endpoint self.auth_type = provider_info(self.provider_name)['auth'] if self.auth_type == 'oauth2': try: self.auth_token = token_manager.get_token(account) except OAuthError: raise SendMailException( 'Could not authenticate with the SMTP server.', 403) else: assert self.auth_type == 'password' if isinstance(account, GenericAccount): self.auth_token = account.smtp_password else: # non-generic accounts have no smtp password self.auth_token = account.password
def default_json_error(ex): """ Exception -> flask JSON responder """ logger = get_logger() logger.error("Uncaught error thrown by Flask/Werkzeug", exc_info=ex) response = jsonify(message=str(ex), type="api_error") response.status_code = ex.code if isinstance(ex, HTTPException) else 500 return response
def __init__(self, process_identifier, process_number, poll_interval=SYNC_POLL_INTERVAL): self.host = platform.node() self.process_number = process_number self.process_identifier = process_identifier self.monitor_cls_for = {mod.PROVIDER: getattr( mod, mod.SYNC_MONITOR_CLS) for mod in module_registry.values() if hasattr(mod, 'SYNC_MONITOR_CLS')} for p_name, p in providers.iteritems(): if p_name not in self.monitor_cls_for: self.monitor_cls_for[p_name] = self.monitor_cls_for["generic"] self.log = get_logger() self.log.bind(process_number=process_number) self.log.info('starting mail sync process', supported_providers=module_registry.keys()) self.syncing_accounts = set() self.email_sync_monitors = {} self.contact_sync_monitors = {} self.event_sync_monitors = {} self.poll_interval = poll_interval self.semaphore = BoundedSemaphore(1) self.stealing_enabled = config.get('SYNC_STEAL_ACCOUNTS', True) self.zone = config.get('ZONE') self.queue_client = QueueClient(self.zone) self.rolling_cpu_counts = collections.deque(maxlen=NUM_CPU_SAMPLES) self.last_unloaded_account = time.time() # Fill the queue with initial values. null_cpu_values = [0.0 for cpu in psutil.cpu_percent(percpu=True)] for i in range(NUM_CPU_SAMPLES): self.rolling_cpu_counts.append(null_cpu_values)
def receive_checkout(dbapi_connection, connection_record, connection_proxy): '''Log checkedout and overflow when a connection is checked out''' hostname = gethostname().replace(".", "-") process_name = str(config.get("PROCESS_NAME", "unknown")) statsd_client.gauge(".".join( ["dbconn", database_name, hostname, process_name, "checkedout"]), connection_proxy._pool.checkedout()) statsd_client.gauge(".".join( ["dbconn", database_name, hostname, process_name, "overflow"]), connection_proxy._pool.overflow()) # Keep track of where and why this connection was checked out. log = get_logger() context = log._context._dict.copy() f, name = find_first_app_frame_and_name(ignores=['sqlalchemy', 'inbox.ignition', 'nylas.logging']) source = '{}:{}'.format(name, f.f_lineno) pool_tracker[dbapi_connection] = { 'source': source, 'context': context, 'checkedout_at': time.time() }
def test_use_starttls(): conn = SMTPConnection(account_id=1, email_address='*****@*****.**', auth_type='password', auth_token='secret_password', smtp_endpoint=('smtp.gmail.com', 587), log=get_logger()) assert isinstance(conn.connection, smtplib.SMTP)
def __init__(self, max_blocking_time=MAX_BLOCKING_TIME): self.max_blocking_time = max_blocking_time self.time_spent_by_context = collections.defaultdict(float) self.total_switches = 0 self._last_switch_time = None self._switch_flag = False self._active_greenlet = None self._main_thread_id = gevent._threading.get_ident() self._hub = gevent.hub.get_hub() self.log = get_logger()
def publish(self, **kwargs): try: self.heartbeat_at = time.time() self.store.publish(self.key, self.heartbeat_at) except Exception: log = get_logger() log.error('Error while writing the heartbeat status', account_id=self.key.account_id, folder_id=self.key.folder_id, device_id=self.device_id, exc_info=True)
def test_use_starttls(): conn = SMTPConnection( account_id=1, email_address="*****@*****.**", smtp_username="******", auth_type="password", auth_token="secret_password", smtp_endpoint=("smtp.gmail.com", 587), log=get_logger(), ) assert isinstance(conn.connection, smtplib.SMTP)
def test_use_smtp_over_ssl(): # Auth won't actually work but we just want to test connection # initialization here and below. SMTPConnection.smtp_password = mock.Mock() conn = SMTPConnection(account_id=1, email_address='*****@*****.**', auth_type='password', auth_token='secret_password', smtp_endpoint=('smtp.gmail.com', 465), log=get_logger()) assert isinstance(conn.connection, smtplib.SMTP_SSL)
def start(): g.log = get_logger() try: watch_state = request.headers[GOOGLE_RESOURCE_STATE_STRING] g.watch_channel_id = request.headers[GOOGLE_CHANNEL_ID_STRING] g.watch_resource_id = request.headers[GOOGLE_RESOURCE_ID_STRING] except KeyError: raise InputError('Malformed headers') if watch_state == 'sync': return resp(204)
def test_use_plain(): ssl = True with pytest.raises(SendMailException): conn = SMTPConnection(account_id=1, email_address='*****@*****.**', smtp_username='******', auth_type='password', auth_token='testpwd', smtp_endpoint=('tivertical.com', 587), ssl_required=ssl, log=get_logger()) ssl = False conn = SMTPConnection(account_id=1, email_address='*****@*****.**', smtp_username='******', auth_type='password', auth_token='testpwd', smtp_endpoint=('tivertical.com', 587), ssl_required=ssl, log=get_logger()) assert isinstance(conn.connection, smtplib.SMTP)
def start(): g.db_session = new_session(engine) g.log = get_logger() try: watch_state = request.headers[GOOGLE_RESOURCE_STATE_STRING] g.watch_channel_id = request.headers[GOOGLE_CHANNEL_ID_STRING] g.watch_resource_id = request.headers[GOOGLE_RESOURCE_ID_STRING] except KeyError: raise InputError("Malformed headers") if watch_state == "sync": return resp(204)
def test_handle_disconnect(monkeypatch, smtp_port): def simulate_disconnect(self): raise smtplib.SMTPServerDisconnected() monkeypatch.setattr('smtplib.SMTP.rset', simulate_disconnect) monkeypatch.setattr('smtplib.SMTP.mail', lambda *args: (550, 'NOPE')) conn = SMTPConnection(account_id=1, email_address='*****@*****.**', auth_type='password', auth_token='secret_password', smtp_endpoint=('smtp.gmail.com', smtp_port), log=get_logger()) with pytest.raises(smtplib.SMTPSenderRefused): conn.sendmail(['*****@*****.**'], 'hello there')
def has_contacts_and_events(account_id): try: client = get_redis_client(STATUS_DATABASE) batch_client = client.pipeline() batch_client.exists(HeartbeatStatusKey.contacts(account_id)) batch_client.exists(HeartbeatStatusKey.events(account_id)) values = batch_client.execute() return (values[0], values[1]) except Exception: log = get_logger() log.error('Error while reading the heartbeat status', account_id=account_id, exc_info=True) return (False, False)
def test_root_filelogger(logfile): logger = get_logger() logger.info('INFO') logger.warning('WARNING') logger.error('ERROR') # NOTE: This slurps the whole logfile. Hope it's not big. log_contents = logfile.read() loglines = [json.loads(l) for l in log_contents.strip().split('\n')] assert [l['event'] for l in loglines] == ['INFO', 'WARNING', 'ERROR'] for l in loglines: assert l['module'].startswith(__name__) assert 'greenlet_id' in l
def timed_fn(self, *args, **kwargs): start_time = time.time() ret = fn(self, *args, **kwargs) # TODO some modules like gmail.py don't have self.logger try: if self.log: fn_logger = self.log except AttributeError: fn_logger = get_logger() # out = None fn_logger.info('[timer] {0} took {1:.3f} seconds.'.format( str(fn), float(time.time() - start_time))) return ret
def test_handle_disconnect(monkeypatch, smtp_port): def simulate_disconnect(self): raise smtplib.SMTPServerDisconnected() monkeypatch.setattr("smtplib.SMTP.rset", simulate_disconnect) monkeypatch.setattr("smtplib.SMTP.mail", lambda *args: (550, "NOPE")) conn = SMTPConnection( account_id=1, email_address="*****@*****.**", smtp_username="******", auth_type="password", auth_token="secret_password", smtp_endpoint=("smtp.gmail.com", smtp_port), log=get_logger(), ) with pytest.raises(smtplib.SMTPSenderRefused): conn.sendmail(["*****@*****.**"], "hello there")
def __init__(self, max_blocking_time=MAX_BLOCKING_TIME, sampling_interval=GREENLET_SAMPLING_INTERVAL, logging_interval=LOGGING_INTERVAL): self.max_blocking_time = max_blocking_time self.sampling_interval = sampling_interval self.logging_interval = logging_interval self.time_spent_by_context = collections.defaultdict(float) self.total_switches = 0 self._last_switch_time = None self._switch_flag = False self._active_greenlet = None self._main_thread_id = gevent._threading.get_ident() self._hub = gevent.hub.get_hub() self.loadavg_1 = 0 self.loadavg_5 = 0 self.loadavg_15 = 0 self.log = get_logger()
def __init__(self, account): self.account_id = account.id self.log = get_logger() self.log.bind(account_id=account.id) self.email_address = account.email_address self.provider_name = account.provider self.sender_name = account.name self.smtp_endpoint = account.smtp_endpoint self.auth_type = provider_info(self.provider_name, self.email_address)["auth"] if self.auth_type == "oauth2": try: self.auth_token = token_manager.get_token(account) except OAuthError: raise SendMailException("Could not authenticate with the SMTP server.", 403) else: assert self.auth_type == "password" self.auth_token = account.password
def __init__(self, process_identifier, process_number, poll_interval=SYNC_POLL_INTERVAL): self.host = platform.node() self.process_number = process_number self.process_identifier = process_identifier self.monitor_cls_for = {mod.PROVIDER: getattr( mod, mod.SYNC_MONITOR_CLS) for mod in module_registry.values() if hasattr(mod, 'SYNC_MONITOR_CLS')} for p_name, p in providers.iteritems(): if p_name not in self.monitor_cls_for: self.monitor_cls_for[p_name] = self.monitor_cls_for["generic"] self.log = get_logger() self.log.bind(process_number=process_number) self.log.info('starting mail sync process', supported_providers=module_registry.keys()) self.syncing_accounts = set() self.email_sync_monitors = {} self.contact_sync_monitors = {} self.event_sync_monitors = {} # Randomize the poll_interval so we maintain at least a little fairness # when using a timeout while blocking on the redis queues. min_poll_interval = 5 self.poll_interval = int((random.random() * (poll_interval - min_poll_interval)) + min_poll_interval) self.semaphore = BoundedSemaphore(1) self.zone = config.get('ZONE') # Note that we don't partition by zone for the private queues. # There's not really a reason to since there's one queue per machine # anyways. Also, if you really want to send an Account to a mailsync # machine in another zone you can do so. self.private_queue = EventQueue(SYNC_EVENT_QUEUE_NAME.format(self.process_identifier)) self.queue_group = EventQueueGroup([ shared_sync_event_queue_for_zone(self.zone), self.private_queue, ]) self.stealing_enabled = config.get('SYNC_STEAL_ACCOUNTS', True) self._pending_avgs_provider = None self.last_unloaded_account = time.time()
def __init__(self, cpu_id, total_cpus, poll_interval=1): self.keep_running = True self.host = platform.node() self.cpu_id = cpu_id self.total_cpus = total_cpus self.monitor_cls_for = {mod.PROVIDER: getattr( mod, mod.SYNC_MONITOR_CLS) for mod in module_registry.values() if hasattr(mod, 'SYNC_MONITOR_CLS')} for p_name, p in providers.iteritems(): if p_name not in self.monitor_cls_for: self.monitor_cls_for[p_name] = self.monitor_cls_for["generic"] self.log = get_logger() self.log.bind(cpu_id=cpu_id) self.log.info('starting mail sync process', supported_providers=module_registry.keys()) self.monitors = {} self.contact_sync_monitors = {} self.event_sync_monitors = {} self.poll_interval = poll_interval
def callback(e): is_transient = isinstance(e, TRANSIENT_NETWORK_ERRS) mysql_error = None log = logger or get_logger() if isinstance(e, _mysql_exceptions.OperationalError): mysql_error = e elif isinstance(e, StatementError) and isinstance(e.orig, _mysql_exceptions.OperationalError): mysql_error = e.orig if mysql_error: for msg in TRANSIENT_MYSQL_MESSAGES: if msg in mysql_error.message: is_transient = True if is_transient: occurrences[0] += 1 if occurrences[0] < 20: return else: occurrences[0] = 1 if account_id: try: with session_scope(account_id) as db_session: account = db_session.query(Account).get(account_id) sync_error = account.sync_error if not sync_error or isinstance(sync_error, basestring): account.update_sync_error(e) db_session.commit() except: log.error( "Error saving sync_error to account object", account_id=account_id, **create_error_log_context(sys.exc_info()) ) log_uncaught_errors(logger, account_id=account_id, provider=provider, occurrences=occurrences[0])
def __init__(self, process_identifier, cpu_id, poll_interval=SYNC_POLL_INTERVAL): self.host = platform.node() self.cpu_id = cpu_id self.process_identifier = process_identifier self.monitor_cls_for = {mod.PROVIDER: getattr( mod, mod.SYNC_MONITOR_CLS) for mod in module_registry.values() if hasattr(mod, 'SYNC_MONITOR_CLS')} for p_name, p in providers.iteritems(): if p_name not in self.monitor_cls_for: self.monitor_cls_for[p_name] = self.monitor_cls_for["generic"] self.log = get_logger() self.log.bind(cpu_id=cpu_id) self.log.info('starting mail sync process', supported_providers=module_registry.keys()) self.syncing_accounts = set() self.email_sync_monitors = {} self.contact_sync_monitors = {} self.event_sync_monitors = {} self.poll_interval = poll_interval self.semaphore = BoundedSemaphore(1) self.stealing_enabled = config.get('SYNC_STEAL_ACCOUNTS', True) self.zone = config.get('ZONE') self.queue_client = QueueClient(self.zone) self.rolling_cpu_counts = collections.deque(maxlen=NUM_CPU_SAMPLES) # Fill the queue with initial values. Because of the way # cpu_percent works, it'll always return 0.0 the first time # we call it. See: https://pythonhosted.org/psutil/#psutil.cpu_percent # for more details. null_cpu_values = psutil.cpu_percent(percpu=True) for i in range(NUM_CPU_SAMPLES): self.rolling_cpu_counts.append(null_cpu_values)
def __init__(self, account): self.account = account self.account_id = account.id self.log = get_logger().new(account_id=account.id, component="search")
import time import weakref import gevent from socket import gethostname from urllib import quote_plus as urlquote from sqlalchemy import create_engine, event from inbox.sqlalchemy_ext.util import ForceStrictMode from inbox.config import config from inbox.util.stats import statsd_client from nylas.logging import get_logger, find_first_app_frame_and_name from warnings import filterwarnings filterwarnings('ignore', message='Invalid utf8mb4 character string') log = get_logger() DB_POOL_SIZE = config.get_required('DB_POOL_SIZE') # Sane default of max overflow=5 if value missing in config. DB_POOL_MAX_OVERFLOW = config.get('DB_POOL_MAX_OVERFLOW') or 5 DB_POOL_TIMEOUT = config.get('DB_POOL_TIMEOUT') or 60 pool_tracker = weakref.WeakKeyDictionary() # See # https://github.com/PyMySQL/mysqlclient-python/blob/master/samples/waiter_gevent.py def gevent_waiter(fd, hub=gevent.hub.get_hub()): hub.wait(hub.loop.io(fd, 1))
#!/usr/bin/env python # We previously didn't store IMAP path separators for generic imap accounts. # This script backfixes the accounts. import click from inbox.crispin import connection_pool from nylas.logging import get_logger, configure_logging from inbox.models.backends.generic import GenericAccount from inbox.models.session import (session_scope, global_session_scope, session_scope_by_shard_id) configure_logging() log = get_logger(purpose='separator-backfix') @click.command() @click.option('--min-id', type=int, default=None) @click.option('--max-id', type=int, default=None) @click.option('--shard-id', type=int, default=None) def main(min_id, max_id, shard_id): generic_accounts = [] failed = [] if min_id is not None or max_id is not None: # Get the list of running Gmail accounts. with global_session_scope() as db_session: generic_accounts = db_session.query(GenericAccount).filter( GenericAccount.sync_state == 'running') if min_id is not None:
import re import ssl import base64 import socket import itertools import smtplib from nylas.logging import get_logger log = get_logger() from inbox.models.session import session_scope from inbox.models.backends.imap import ImapAccount from inbox.models.backends.oauth import token_manager as default_token_manager from inbox.models.backends.gmail import g_token_manager from inbox.models.backends.generic import GenericAccount from inbox.sendmail.base import generate_attachments, SendMailException from inbox.sendmail.message import create_email from inbox.basicauth import OAuthError from inbox.providers import provider_info from inbox.util.blockstore import get_from_blockstore # TODO[k]: Other types (LOGIN, XOAUTH, PLAIN-CLIENTTOKEN, CRAM-MD5) AUTH_EXTNS = {'oauth2': 'XOAUTH2', 'password': '******'} SMTP_MAX_RETRIES = 1 # Timeout in seconds for blocking operations. If no timeout is specified, # attempts to, say, connect to the wrong port may hang forever. SMTP_TIMEOUT = 45 SMTP_OVER_SSL_PORT = 465 SMTP_OVER_SSL_TEST_PORT = 64465
talking to the same database backend things could go really badly. """ from collections import defaultdict from datetime import datetime import gevent from gevent.event import Event from gevent.queue import Queue import random from gevent.coros import BoundedSemaphore import weakref from nylas.logging import get_logger from nylas.logging.sentry import log_uncaught_errors logger = get_logger() from inbox.crispin import writable_connection_pool from inbox.ignition import engine_manager from inbox.util.concurrency import retry_with_logging from inbox.models.session import session_scope, session_scope_by_shard_id from inbox.models import ActionLog from inbox.util.misc import DummyContextManager from inbox.util.stats import statsd_client from inbox.actions.base import (mark_unread, mark_starred, move, change_labels, save_draft, update_draft, delete_draft, save_sent_email,
def _monitoring_thread(self): # Logger needs to be instantiated in new thread. self.log = get_logger() retry_with_logging(self._run_impl, self.log)