Beispiel #1
0
    def _report_transactions_latency(self, latency):
        metric_names = [
            "inbox-contacts-search.transactions.latency",
        ]

        for metric in metric_names:
            statsd_client.timing(metric, latency)
Beispiel #2
0
def _save_to_s3(data_sha256, data):
    assert 'AWS_ACCESS_KEY_ID' in config, 'Need AWS key!'
    assert 'AWS_SECRET_ACCESS_KEY' in config, 'Need AWS secret!'
    assert 'MESSAGE_STORE_BUCKET_NAME' in config, \
        'Need bucket name to store message data!'

    start = time.time()

    # Boto pools connections at the class level
    conn = S3Connection(config.get('AWS_ACCESS_KEY_ID'),
                        config.get('AWS_SECRET_ACCESS_KEY'))
    bucket = conn.get_bucket(config.get('MESSAGE_STORE_BUCKET_NAME'),
                             validate=False)

    # See if it already exists; if so, don't recreate.
    key = bucket.get_key(data_sha256)
    if key:
        return

    key = Key(bucket)
    key.key = data_sha256
    key.set_contents_from_string(data)

    end = time.time()
    latency_millis = (end - start) * 1000
    statsd_client.timing('s3.save_latency', latency_millis)
Beispiel #3
0
def _save_to_s3_bucket(data_sha256, bucket_name, data):
    assert 'AWS_ACCESS_KEY_ID' in config, 'Need AWS key!'
    assert 'AWS_SECRET_ACCESS_KEY' in config, 'Need AWS secret!'
    start = time.time()

    # Boto pools connections at the class level
    conn = S3Connection(
        config.get('AWS_ACCESS_KEY_ID'),
        config.get('AWS_SECRET_ACCESS_KEY'),
        host=config.get('AWS_HOST', 's3.amazonaws.com'),
        port=config.get('AWS_PORT'),
        calling_format=boto.s3.connection.OrdinaryCallingFormat(),
        is_secure=config.get('AWS_USE_SSL', True))
    bucket = conn.get_bucket(bucket_name, validate=False)

    # See if it already exists; if so, don't recreate.
    key = bucket.get_key(data_sha256)
    if key:
        return

    key = Key(bucket)
    key.key = data_sha256
    key.set_contents_from_string(data)

    end = time.time()
    latency_millis = (end - start) * 1000
    statsd_client.timing('s3_blockstore.save_latency', latency_millis)
Beispiel #4
0
    def _report_transactions_latency(self, latency):
        metric_names = [
            "inbox-contacts-search.transactions.latency",
        ]

        for metric in metric_names:
            statsd_client.timing(metric, latency)
Beispiel #5
0
        def after_transaction_end(session, transaction):
            start_time = transaction_start_map.get(hash(transaction))
            if not start_time:
                return

            latency = int((time.time() - start_time) * 1000)
            statsd_client.timing(metric_name, latency)
            statsd_client.incr(metric_name)
Beispiel #6
0
        def after_transaction_end(session, transaction):
            start_time = transaction_start_map.get(hash(transaction))
            if not start_time:
                return

            latency = int((time.time() - start_time) * 1000)
            statsd_client.timing(metric_name, latency)
            statsd_client.incr(metric_name)
Beispiel #7
0
 def _report_message_velocity(self, timedelta, num_uids):
     latency = (timedelta).total_seconds() * 1000
     latency_per_uid = float(latency) / num_uids
     metrics = [
         ".".join(["providers", self.provider_name, "message_velocity"]),
         ".".join(["providers", "overall", "message_velocity"]),
     ]
     for metric in metrics:
         statsd_client.timing(metric, latency_per_uid)
Beispiel #8
0
 def _report_message_velocity(self, timedelta, num_uids):
     latency = (timedelta).total_seconds() * 1000
     latency_per_uid = float(latency) / num_uids
     metrics = [
         '.'.join(['providers', self.provider_name, 'message_velocity']),
         '.'.join(['providers', 'overall', 'message_velocity'])
     ]
     for metric in metrics:
         statsd_client.timing(metric, latency_per_uid)
Beispiel #9
0
def delete_marked_accounts(shard_id, throttle=False, dry_run=False):
    start = time.time()
    deleted_count = 0
    ids_to_delete = []

    with session_scope_by_shard_id(shard_id) as db_session:
        ids_to_delete = [(acc.id, acc.namespace.id) for acc
                         in db_session.query(Account) if acc.is_deleted]

    queue_size = len(ids_to_delete)
    for account_id, namespace_id in ids_to_delete:
        # queue_size = length of queue
        # deleted_count = number of accounts deleted during loop iteration
        # this is necessary because the length of ids_to_delete doesn't
        # change during loop iteration
        statsd_client.gauge('mailsync.{}.account_deletion.queue.length'
                            .format(shard_id),
                            queue_size - deleted_count)
        try:
            with session_scope(namespace_id) as db_session:
                account = db_session.query(Account).get(account_id)
                if not account:
                    log.critical('Account with does not exist',
                                 account_id=account_id)
                    continue

                if account.sync_should_run or not account.is_deleted:
                    log.warn('Account NOT marked for deletion. '
                             'Will not delete', account_id=account_id)
                    continue

            log.info('Deleting account', account_id=account_id)
            start_time = time.time()
            # Delete data in database
            try:
                log.info('Deleting database data', account_id=account_id)
                delete_namespace(account_id, namespace_id, throttle=throttle,
                                 dry_run=dry_run)
            except Exception as e:
                log.critical('Database data deletion failed', error=e,
                             account_id=account_id)
                continue

            # Delete liveness data
            log.debug('Deleting liveness data', account_id=account_id)
            clear_heartbeat_status(account_id)
            deleted_count += 1
            statsd_client.incr('mailsync.account_deletion.queue.deleted', 1)
            statsd_client.timing('mailsync.account_deletion.queue.deleted',
                                 time.time() - start_time)
        except Exception:
            log_uncaught_errors(log, account_id=account_id)

    end = time.time()
    log.info('All data deleted successfully', shard_id=shard_id,
             time=end - start, count=deleted_count)
Beispiel #10
0
 def _report_message_velocity(self, timedelta, num_uids):
     latency = (timedelta).total_seconds() * 1000
     latency_per_uid = float(latency) / num_uids
     metrics = [
         '.'.join(['providers', self.provider_name,
                   'message_velocity']),
         '.'.join(['providers', 'overall', 'message_velocity'])
     ]
     for metric in metrics:
         statsd_client.timing(metric, latency_per_uid)
Beispiel #11
0
    def _log_to_statsd(self, action_log_status, latency=None):
        metric_names = [
            "syncback.overall.{}".format(action_log_status),
            "syncback.providers.{}.{}".format(self.provider, action_log_status)
        ]

        for metric in metric_names:
            statsd_client.incr(metric)
            if latency:
                statsd_client.timing(metric, latency * 1000)
Beispiel #12
0
    def _log_to_statsd(self, action_log_status, latency=None):
        metric_names = [
            "syncback.overall.{}".format(action_log_status),
            "syncback.providers.{}.{}".format(self.provider, action_log_status)
        ]

        for metric in metric_names:
            statsd_client.incr(metric)
            if latency:
                statsd_client.timing(metric, latency * 1000)
Beispiel #13
0
def delete_marked_accounts(shard_id, throttle=False, dry_run=False):
    start = time.time()
    deleted_count = 0
    ids_to_delete = []

    with session_scope_by_shard_id(shard_id) as db_session:
        ids_to_delete = [(acc.id, acc.namespace.id)
                         for acc in db_session.query(Account)
                         if acc.is_deleted]

    for account_id, namespace_id in ids_to_delete:
        try:
            with session_scope(namespace_id) as db_session:
                account = db_session.query(Account).get(account_id)
                if not account:
                    log.critical('Account with does not exist',
                                 account_id=account_id)
                    continue

                if account.sync_should_run or not account.is_deleted:
                    log.warn(
                        'Account NOT marked for deletion. '
                        'Will not delete',
                        account_id=account_id)
                    continue

            log.info('Deleting account', account_id=account_id)
            start_time = time.time()
            # Delete data in database
            try:
                log.info('Deleting database data', account_id=account_id)
                delete_namespace(account_id,
                                 namespace_id,
                                 throttle=throttle,
                                 dry_run=dry_run)
            except Exception as e:
                log.critical('Database data deletion failed',
                             error=e,
                             account_id=account_id)
                continue

            # Delete liveness data
            log.debug('Deleting liveness data', account_id=account_id)
            clear_heartbeat_status(account_id)
            deleted_count += 1
            statsd_client.timing('mailsync.account_deletion.queue.deleted',
                                 time.time() - start_time)
        except Exception:
            log_uncaught_errors(log, account_id=account_id)

    end = time.time()
    log.info('All data deleted successfully',
             shard_id=shard_id,
             time=end - start,
             count=deleted_count)
Beispiel #14
0
    def create_message(self, db_session, acct, folder, msg):
        assert acct is not None and acct.namespace is not None

        # Check if we somehow already saved the imapuid (shouldn't happen, but
        # possible due to race condition). If so, don't commit changes.
        existing_imapuid = (db_session.query(ImapUid).filter(
            ImapUid.account_id == acct.id,
            ImapUid.folder_id == folder.id,
            ImapUid.msg_uid == msg.uid,
        ).first())
        if existing_imapuid is not None:
            log.error(
                "Expected to create imapuid, but existing row found",
                remote_msg_uid=msg.uid,
                existing_imapuid=existing_imapuid.id,
            )
            return None

        # Check if the message is valid.
        # https://sentry.nylas.com/sentry/sync-prod/group/3387/
        if msg.body is None:
            log.warning("Server returned a message with an empty body.")
            return None

        new_uid = common.create_imap_message(db_session, acct, folder, msg)
        self.add_message_to_thread(db_session, new_uid.message, msg)

        db_session.flush()

        # We're calling import_attached_events here instead of some more
        # obvious place (like Message.create_from_synced) because the function
        # requires new_uid.message to have been flushed.
        # This is necessary because the import_attached_events does db lookups.
        if new_uid.message.has_attached_events:
            with db_session.no_autoflush:
                import_attached_events(db_session, acct, new_uid.message)

        # If we're in the polling state, then we want to report the metric
        # for latency when the message was received vs created
        if self.state == "poll":
            latency_millis = (datetime.utcnow() - new_uid.message.received_date
                              ).total_seconds() * 1000
            metrics = [
                ".".join(
                    ["mailsync", "providers", "overall", "message_latency"]),
                ".".join([
                    "mailsync", "providers", self.provider_name,
                    "message_latency"
                ]),
            ]
            for metric in metrics:
                statsd_client.timing(metric, latency_millis)

        return new_uid
Beispiel #15
0
 def _report_message_velocity(self, timedelta, num_uids):
     latency = (timedelta).total_seconds() * 1000
     latency_per_uid = float(latency) / num_uids
     metrics = [
         ".".join([
             "mailsync", "providers", self.provider_name, "message_velocity"
         ]),
         ".".join(["mailsync", "providers", "overall", "message_velocity"]),
     ]
     for metric in metrics:
         statsd_client.timing(metric, latency_per_uid)
Beispiel #16
0
        def end(session):
            start_time = transaction_start_map.get(session)
            if not start_time:
                return

            del transaction_start_map[session]

            t = time.time()
            latency = int((t - start_time) * 1000)
            statsd_client.timing(metric_name, latency)
            statsd_client.incr(metric_name)
            if latency > MAX_SANE_TRX_TIME_MS:
                log.warning('Long transaction', latency=latency,
                            modname=modname, funcname=funcname)
Beispiel #17
0
    def _report_first_message(self):
        now = datetime.utcnow()

        with session_scope(self.namespace_id) as db_session:
            account = db_session.query(Account).get(self.account_id)
            account_created = account.created_at

        latency = (now - account_created).total_seconds() * 1000
        metrics = [
            '.'.join(['providers', self.provider_name, 'first_message']),
            '.'.join(['providers', 'overall', 'first_message'])
        ]

        for metric in metrics:
            statsd_client.timing(metric, latency)
Beispiel #18
0
    def _report_first_message(self):
        now = datetime.utcnow()

        with session_scope(self.namespace_id) as db_session:
            account = db_session.query(Account).get(self.account_id)
            account_created = account.created_at

        latency = (now - account_created).total_seconds() * 1000
        metrics = [
            '.'.join(['providers', self.provider_name, 'first_message']),
            '.'.join(['providers', 'overall', 'first_message'])
        ]

        for metric in metrics:
            statsd_client.timing(metric, latency)
Beispiel #19
0
def delete_marked_accounts(shard_id, throttle=False, dry_run=False):
    start = time.time()
    deleted_count = 0
    ids_to_delete = []

    with session_scope_by_shard_id(shard_id) as db_session:
        ids_to_delete = [(acc.id, acc.namespace.id) for acc
                         in db_session.query(Account) if acc.is_deleted]

    for account_id, namespace_id in ids_to_delete:
        try:
            with session_scope(namespace_id) as db_session:
                account = db_session.query(Account).get(account_id)
                if not account:
                    log.critical('Account with does not exist',
                                 account_id=account_id)
                    continue

                if account.sync_should_run or not account.is_deleted:
                    log.warn('Account NOT marked for deletion. '
                             'Will not delete', account_id=account_id)
                    continue

            log.info('Deleting account', account_id=account_id)
            start_time = time.time()
            # Delete data in database
            try:
                log.info('Deleting database data', account_id=account_id)
                delete_namespace(account_id, namespace_id, throttle=throttle,
                                 dry_run=dry_run)
            except Exception as e:
                log.critical('Database data deletion failed', error=e,
                             account_id=account_id)
                continue

            # Delete liveness data
            log.debug('Deleting liveness data', account_id=account_id)
            clear_heartbeat_status(account_id)
            deleted_count += 1
            statsd_client.timing('mailsync.account_deletion.queue.deleted',
                                 time.time() - start_time)
            gevent.sleep(60)
        except Exception:
            log_uncaught_errors(log, account_id=account_id)

    end = time.time()
    log.info('All data deleted successfully', shard_id=shard_id,
             time=end - start, count=deleted_count)
Beispiel #20
0
    def create_message(self, db_session, acct, folder, msg):
        assert acct is not None and acct.namespace is not None

        # Check if we somehow already saved the imapuid (shouldn't happen, but
        # possible due to race condition). If so, don't commit changes.
        existing_imapuid = db_session.query(ImapUid).filter(
            ImapUid.account_id == acct.id, ImapUid.folder_id == folder.id,
            ImapUid.msg_uid == msg.uid).first()
        if existing_imapuid is not None:
            log.error('Expected to create imapuid, but existing row found',
                      remote_msg_uid=msg.uid,
                      existing_imapuid=existing_imapuid.id)
            return None

        # Check if the message is valid.
        # https://sentry.nylas.com/sentry/sync-prod/group/3387/
        if msg.body is None:
            log.warning('Server returned a message with an empty body.')
            return None

        new_uid = common.create_imap_message(db_session, acct, folder, msg)
        self.add_message_to_thread(db_session, new_uid.message, msg)

        db_session.flush()

        # We're calling import_attached_events here instead of some more
        # obvious place (like Message.create_from_synced) because the function
        # requires new_uid.message to have been flushed.
        # This is necessary because the import_attached_events does db lookups.
        if new_uid.message.has_attached_events:
            with db_session.no_autoflush:
                import_attached_events(db_session, acct, new_uid.message)

        # If we're in the polling state, then we want to report the metric
        # for latency when the message was received vs created
        if self.state == 'poll':
            latency_millis = (
                datetime.utcnow() - new_uid.message.received_date) \
                .total_seconds() * 1000
            metrics = [
                '.'.join(['accounts', 'overall', 'message_latency']),
                '.'.join(['providers', self.provider_name, 'message_latency']),
            ]
            for metric in metrics:
                statsd_client.timing(metric, latency_millis)

        return new_uid
Beispiel #21
0
def _delete_from_s3_bucket(data_sha256_hashes, bucket_name):
    data_sha256_hashes = [hash_ for hash_ in data_sha256_hashes if hash_]
    if not data_sha256_hashes:
        return None

    assert "AWS_ACCESS_KEY_ID" in config, "Need AWS key!"
    assert "AWS_SECRET_ACCESS_KEY" in config, "Need AWS secret!"
    start = time.time()

    # Boto pools connections at the class level
    bucket = get_s3_bucket(bucket_name)

    bucket.delete_keys([key for key in data_sha256_hashes], quiet=True)

    end = time.time()
    latency_millis = (end - start) * 1000
    statsd_client.timing("s3_blockstore.delete_latency", latency_millis)
Beispiel #22
0
    def create_message(self, db_session, acct, folder, msg):
        assert acct is not None and acct.namespace is not None

        # Check if we somehow already saved the imapuid (shouldn't happen, but
        # possible due to race condition). If so, don't commit changes.
        existing_imapuid = (
            db_session.query(ImapUid)
            .filter(ImapUid.account_id == acct.id, ImapUid.folder_id == folder.id, ImapUid.msg_uid == msg.uid)
            .first()
        )
        if existing_imapuid is not None:
            log.error(
                "Expected to create imapuid, but existing row found",
                remote_msg_uid=msg.uid,
                existing_imapuid=existing_imapuid.id,
            )
            return None

        new_uid = common.create_imap_message(db_session, acct, folder, msg)
        self.add_message_to_thread(db_session, new_uid.message, msg)

        db_session.flush()

        # We're calling import_attached_events here instead of some more
        # obvious place (like Message.create_from_synced) because the function
        # requires new_uid.message to have been flushed.
        # This is necessary because the import_attached_events does db lookups.
        if new_uid.message.has_attached_events:
            with db_session.no_autoflush:
                import_attached_events(db_session, acct, new_uid.message)

        # If we're in the polling state, then we want to report the metric
        # for latency when the message was received vs created
        if self.state == "poll":
            latency_millis = (datetime.utcnow() - new_uid.message.received_date).total_seconds() * 1000
            metrics = [
                ".".join(["accounts", "overall", "message_latency"]),
                ".".join(["accounts", str(acct.id), "message_latency"]),
                ".".join(["providers", self.provider_name, "message_latency"]),
            ]
            for metric in metrics:
                statsd_client.timing(metric, latency_millis)

        return new_uid
Beispiel #23
0
def _delete_from_s3_bucket(data_sha256_hashes, bucket_name):
    data_sha256_hashes = filter(None, data_sha256_hashes)
    if not data_sha256_hashes:
        return None

    assert "AWS_ACCESS_KEY_ID" in config, "Need AWS key!"
    assert "AWS_SECRET_ACCESS_KEY" in config, "Need AWS secret!"
    start = time.time()

    # Boto pools connections at the class level
    conn = S3Connection(config.get("AWS_ACCESS_KEY_ID"),
                        config.get("AWS_SECRET_ACCESS_KEY"))
    bucket = conn.get_bucket(bucket_name, validate=False)

    bucket.delete_keys([key for key in data_sha256_hashes], quiet=True)

    end = time.time()
    latency_millis = (end - start) * 1000
    statsd_client.timing("s3_blockstore.delete_latency", latency_millis)
Beispiel #24
0
        def end(session):
            start_time = transaction_start_map.get(session)
            if not start_time:
                return

            del transaction_start_map[session]

            t = time.time()
            latency = int((t - start_time) * 1000)
            if config.get("ENABLE_DB_TXN_METRICS", False):
                statsd_client.timing(metric_name, latency)
                statsd_client.incr(metric_name)
            if latency > MAX_SANE_TRX_TIME_MS:
                log.warning(
                    "Long transaction",
                    latency=latency,
                    modname=modname,
                    funcname=funcname,
                )
Beispiel #25
0
    def create_message(self, db_session, acct, folder, msg):
        assert acct is not None and acct.namespace is not None

        # Check if we somehow already saved the imapuid (shouldn't happen, but
        # possible due to race condition). If so, don't commit changes.
        existing_imapuid = db_session.query(ImapUid).filter(
            ImapUid.account_id == acct.id, ImapUid.folder_id == folder.id,
            ImapUid.msg_uid == msg.uid).first()
        if existing_imapuid is not None:
            log.error('Expected to create imapuid, but existing row found',
                      remote_msg_uid=msg.uid,
                      existing_imapuid=existing_imapuid.id)
            return None

        new_uid = common.create_imap_message(db_session, acct, folder, msg)
        self.add_message_to_thread(db_session, new_uid.message, msg)

        db_session.flush()

        # We're calling import_attached_events here instead of some more
        # obvious place (like Message.create_from_synced) because the function
        # requires new_uid.message to have been flushed.
        # This is necessary because the import_attached_events does db lookups.
        if new_uid.message.has_attached_events:
            with db_session.no_autoflush:
                import_attached_events(db_session, acct, new_uid.message)

        # If we're in the polling state, then we want to report the metric
        # for latency when the message was received vs created
        if self.state == 'poll':
            latency_millis = (
                datetime.utcnow() - new_uid.message.received_date) \
                .total_seconds() * 1000
            metrics = [
                '.'.join(['accounts', 'overall', 'message_latency']),
                '.'.join(['accounts',
                          str(acct.id), 'message_latency']),
                '.'.join(['providers', self.provider_name, 'message_latency']),
            ]
            for metric in metrics:
                statsd_client.timing(metric, latency_millis)

        return new_uid
Beispiel #26
0
    def _report_first_message(self):
        # Only record the "time to first message" in the inbox. Because users
        # can add more folders at any time, "initial sync"-style metrics for
        # other folders don't mean much.
        if self.folder_role not in ['inbox', 'all']:
            return

        now = datetime.utcnow()
        with session_scope(self.namespace_id) as db_session:
            account = db_session.query(Account).get(self.account_id)
            account_created = account.created_at

        latency = (now - account_created).total_seconds() * 1000

        metrics = [
            '.'.join(['mailsync', 'providers', self.provider_name, 'first_message']),
            '.'.join(['mailsync', 'providers', 'overall', 'first_message'])
        ]

        for metric in metrics:
            statsd_client.timing(metric, latency)
Beispiel #27
0
def _save_to_s3_bucket(data_sha256, bucket_name, data):
    # type: (str, str, bytes) -> None
    assert "AWS_ACCESS_KEY_ID" in config, "Need AWS key!"
    assert "AWS_SECRET_ACCESS_KEY" in config, "Need AWS secret!"
    start = time.time()

    # Boto pools connections at the class level
    bucket = get_s3_bucket(bucket_name)

    # See if it already exists; if so, don't recreate.
    key = bucket.get_key(data_sha256)
    if key:
        return

    key = Key(bucket)
    key.key = data_sha256
    key.set_contents_from_string(data)

    end = time.time()
    latency_millis = (end - start) * 1000
    statsd_client.timing("s3_blockstore.save_latency", latency_millis)
Beispiel #28
0
def _save_to_s3_bucket(data_sha256, bucket_name, data):
    assert "AWS_ACCESS_KEY_ID" in config, "Need AWS key!"
    assert "AWS_SECRET_ACCESS_KEY" in config, "Need AWS secret!"
    start = time.time()

    # Boto pools connections at the class level
    conn = S3Connection(config.get("AWS_ACCESS_KEY_ID"), config.get("AWS_SECRET_ACCESS_KEY"))
    bucket = conn.get_bucket(bucket_name, validate=False)

    # See if it already exists; if so, don't recreate.
    key = bucket.get_key(data_sha256)
    if key:
        return

    key = Key(bucket)
    key.key = data_sha256
    key.set_contents_from_string(data)

    end = time.time()
    latency_millis = (end - start) * 1000
    statsd_client.timing("s3_blockstore.save_latency", latency_millis)
Beispiel #29
0
    def _report_first_message(self):
        # Only record the "time to first message" in the inbox. Because users
        # can add more folders at any time, "initial sync"-style metrics for
        # other folders don't mean much.
        if self.folder_role not in ['inbox', 'all']:
            return

        now = datetime.utcnow()
        with session_scope(self.namespace_id) as db_session:
            account = db_session.query(Account).get(self.account_id)
            account_created = account.created_at

        latency = (now - account_created).total_seconds() * 1000

        metrics = [
            '.'.join([
                'mailsync', 'providers', self.provider_name, 'first_message'
            ]), '.'.join(['mailsync', 'providers', 'overall', 'first_message'])
        ]

        for metric in metrics:
            statsd_client.timing(metric, latency)
Beispiel #30
0
def delete_marked_accounts(shard_id, ids_to_delete, throttle=False, dry_run=False):
    start = time.time()

    deleted_count = 0
    for account_id, namespace_id in ids_to_delete:
        try:
            with session_scope(namespace_id) as db_session:
                account = db_session.query(Account).get(account_id)
                if not account:
                    log.critical("Account with does not exist", account_id=account_id)
                    continue

                if account.sync_should_run or not account.is_deleted:
                    log.warn("Account NOT marked for deletion. " "Will not delete", account_id=account_id)
                    continue

            log.info("Deleting account", account_id=account_id)
            start_time = time.time()
            # Delete data in database
            try:
                log.info("Deleting database data", account_id=account_id)
                delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run)
            except Exception as e:
                log.critical("Database data deletion failed", error=e, account_id=account_id)
                continue

            # Delete liveness data
            log.debug("Deleting liveness data", account_id=account_id)
            clear_heartbeat_status(account_id)

            deleted_count += 1
            statsd_client.timing("mailsync.account_deletion.queue.deleted", time.time() - start_time)
        except Exception:
            log_uncaught_errors(log, account_id=account_id)

    end = time.time()
    log.info("All data deleted successfully", shard_id=shard_id, time=end - start, count=deleted_count)
Beispiel #31
0
def delete_namespace(namespace_id, throttle=False, dry_run=False):
    """
    Delete all the data associated with a namespace from the database.
    USE WITH CAUTION.

    NOTE: This function is only called from bin/delete-account-data.
    It prints to stdout.

    Raises AccountDeletionErrror with message if there are problems
    """

    with session_scope(namespace_id) as db_session:
        try:
            account = (db_session.query(Account).join(Namespace).filter(
                Namespace.id == namespace_id).one())
        except NoResultFound:
            raise AccountDeletionErrror("Could not find account in database")

        if not account.is_marked_for_deletion:
            raise AccountDeletionErrror(
                "Account is_marked_for_deletion is False. "
                "Change this to proceed with deletion.")
        account_id = account.id
        account_discriminator = account.discriminator

    log.info("Deleting account", account_id=account_id)
    start_time = time.time()

    # These folders are used to configure batch deletion in chunks for
    # specific tables that are prone to transaction blocking during
    # large concurrent write volume.  See _batch_delete
    # NOTE: ImapFolderInfo doesn't reall fall into this category but
    # we include here for simplicity anyway.

    filters = OrderedDict()
    for table in [
            "message",
            "block",
            "thread",
            "transaction",
            "actionlog",
            "event",
            "contact",
            "dataprocessingcache",
    ]:
        filters[table] = ("namespace_id", namespace_id)

    if account_discriminator == "easaccount":
        filters["easuid"] = ("easaccount_id", account_id)
        filters["easfoldersyncstatus"] = ("account_id", account_id)
    else:
        filters["imapuid"] = ("account_id", account_id)
        filters["imapfoldersyncstatus"] = ("account_id", account_id)
        filters["imapfolderinfo"] = ("account_id", account_id)

    from inbox.ignition import engine_manager

    # Bypass the ORM for performant bulk deletion;
    # we do /not/ want Transaction records created for these deletions,
    # so this is okay.
    engine = engine_manager.get_for_id(namespace_id)

    for cls in filters:
        _batch_delete(engine,
                      cls,
                      filters[cls],
                      account_id,
                      throttle=throttle,
                      dry_run=dry_run)

    # Use a single delete for the other tables. Rows from tables which contain
    # cascade-deleted foreign keys to other tables deleted here (or above)
    # are also not always explicitly deleted, except where needed for
    # performance.
    #
    # NOTE: Namespace, Account are deleted at the end too.

    query = "DELETE FROM {} WHERE {}={};"

    filters = OrderedDict()
    for table in ("category", "calendar"):
        filters[table] = ("namespace_id", namespace_id)
    for table in ("folder", "label"):
        filters[table] = ("account_id", account_id)
    filters["namespace"] = ("id", namespace_id)

    for table, (column, id_) in iteritems(filters):
        log.info("Performing bulk deletion", table=table)
        start = time.time()

        if throttle:
            bulk_throttle()

        if not dry_run:
            engine.execute(query.format(table, column, id_))
        else:
            log.debug(query.format(table, column, id_))

        end = time.time()
        log.info("Completed bulk deletion", table=table, time=end - start)

    # Delete the account object manually to get rid of the various objects
    # associated with it (e.g: secrets, tokens, etc.)
    with session_scope(account_id) as db_session:
        account = db_session.query(Account).get(account_id)
        if dry_run is False:
            db_session.delete(account)
            db_session.commit()

    # Delete liveness data ( heartbeats)
    log.debug("Deleting liveness data", account_id=account_id)
    clear_heartbeat_status(account_id)

    statsd_client.timing("mailsync.account_deletion.queue.deleted",
                         time.time() - start_time)
Beispiel #32
0
def delete_namespace(namespace_id, throttle=False, dry_run=False):
    """
    Delete all the data associated with a namespace from the database.
    USE WITH CAUTION.

    NOTE: This function is only called from bin/delete-account-data.
    It prints to stdout.

    Raises AccountDeletionErrror with message if there are problems
    """

    with session_scope(namespace_id) as db_session:
        try:
            account = db_session.query(Account).join(Namespace).filter(Namespace.id == namespace_id).one()
        except NoResultFound:
            raise AccountDeletionErrror(
                'Could not find account in database')

        if not account.is_marked_for_deletion:
            raise AccountDeletionErrror(
                'Account is_marked_for_deletion is False. '
                'Change this to proceed with deletion.')
        account_id = account.id
        account_discriminator = account.discriminator

    log.info('Deleting account', account_id=account_id)
    start_time = time.time()

    # These folders are used to configure batch deletion in chunks for
    # specific tables that are prone to transaction blocking during
    # large concurrent write volume.  See _batch_delete
    # NOTE: ImapFolderInfo doesn't reall fall into this category but
    # we include here for simplicity anyway.

    filters = OrderedDict()
    for table in ['message', 'block', 'thread', 'transaction', 'actionlog',
                  'contact', 'event', 'dataprocessingcache']:
        filters[table] = ('namespace_id', namespace_id)

    if account_discriminator == 'easaccount':
        filters['easuid'] = ('easaccount_id', account_id)
        filters['easfoldersyncstatus'] = ('account_id', account_id)
    else:
        filters['imapuid'] = ('account_id', account_id)
        filters['imapfoldersyncstatus'] = ('account_id', account_id)
        filters['imapfolderinfo'] = ('account_id', account_id)

    from inbox.ignition import engine_manager
    # Bypass the ORM for performant bulk deletion;
    # we do /not/ want Transaction records created for these deletions,
    # so this is okay.
    engine = engine_manager.get_for_id(namespace_id)

    for cls in filters:
        _batch_delete(engine, cls, filters[cls], throttle=throttle,
                      dry_run=dry_run)

    # Use a single delete for the other tables. Rows from tables which contain
    # cascade-deleted foreign keys to other tables deleted here (or above)
    # are also not always explicitly deleted, except where needed for
    # performance.
    #
    # NOTE: Namespace, Account are deleted at the end too.

    query = 'DELETE FROM {} WHERE {}={};'

    filters = OrderedDict()
    for table in ('category', 'calendar'):
        filters[table] = ('namespace_id', namespace_id)
    for table in ('folder', 'label'):
        filters[table] = ('account_id', account_id)
    filters['namespace'] = ('id', namespace_id)

    for table, (column, id_) in filters.iteritems():
        log.info('Performing bulk deletion', table=table)
        start = time.time()

        if throttle and check_throttle():
            log.info("Throttling deletion")
            gevent.sleep(60)

        if not dry_run:
            engine.execute(query.format(table, column, id_))
        else:
            log.debug(query.format(table, column, id_))

        end = time.time()
        log.info('Completed bulk deletion', table=table, time=end - start)

    # Delete the account object manually to get rid of the various objects
    # associated with it (e.g: secrets, tokens, etc.)
    with session_scope(account_id) as db_session:
        account = db_session.query(Account).get(account_id)
        if dry_run is False:
            db_session.delete(account)
            db_session.commit()

    # Delete liveness data ( heartbeats)
    log.debug('Deleting liveness data', account_id=account_id)
    clear_heartbeat_status(account_id)

    statsd_client.timing('mailsync.account_deletion.queue.deleted',
                         time.time() - start_time)
Beispiel #33
0
def delete_marked_accounts(shard_id, throttle=False, dry_run=False):
    start = time.time()
    deleted_count = 0
    ids_to_delete = []

    with session_scope_by_shard_id(shard_id) as db_session:
        ids_to_delete = [(acc.id, acc.namespace.id)
                         for acc in db_session.query(Account)
                         if acc.is_deleted]

    queue_size = len(ids_to_delete)
    for account_id, namespace_id in ids_to_delete:
        # queue_size = length of queue
        # deleted_count = number of accounts deleted during loop iteration
        # this is necessary because the length of ids_to_delete doesn't
        # change during loop iteration
        statsd_client.gauge(
            'mailsync.{}.account_deletion.queue.length'.format(shard_id),
            queue_size - deleted_count)
        try:
            with session_scope(namespace_id) as db_session:
                account = db_session.query(Account).get(account_id)
                if not account:
                    log.critical('Account with does not exist',
                                 account_id=account_id)
                    continue

                if account.sync_should_run or not account.is_deleted:
                    log.warn(
                        'Account NOT marked for deletion. '
                        'Will not delete',
                        account_id=account_id)
                    continue

            log.info('Deleting account', account_id=account_id)
            start_time = time.time()
            # Delete data in database
            try:
                log.info('Deleting database data', account_id=account_id)
                delete_namespace(account_id,
                                 namespace_id,
                                 throttle=throttle,
                                 dry_run=dry_run)
            except Exception as e:
                log.critical('Database data deletion failed',
                             error=e,
                             account_id=account_id)
                continue

            # Delete liveness data
            log.debug('Deleting liveness data', account_id=account_id)
            clear_heartbeat_status(account_id)
            deleted_count += 1
            statsd_client.incr('mailsync.account_deletion.queue.deleted', 1)
            statsd_client.timing('mailsync.account_deletion.queue.deleted',
                                 time.time() - start_time)
        except Exception:
            log_uncaught_errors(log, account_id=account_id)

    end = time.time()
    log.info('All data deleted successfully',
             shard_id=shard_id,
             time=end - start,
             count=deleted_count)