Ejemplo n.º 1
0
    def _publish_heartbeat(self):
        metric_names = [
            "inbox-contacts-search.heartbeat",
        ]

        for metric in metric_names:
            statsd_client.incr(metric)
Ejemplo n.º 2
0
    def _report_batch_upload(self):
        metric_names = [
            "inbox-contacts-search.transactions.batch_upload",
        ]

        for metric in metric_names:
            statsd_client.incr(metric)
Ejemplo n.º 3
0
 def _run_impl(self):
     current_time = time.time()
     timeout = event_queue.SOCKET_TIMEOUT - 2  # Minus 2 to give us some leeway.
     next_deferral = self._try_get_next_deferral()
     while next_deferral is not None:
         if next_deferral.deadline >= current_time:
             timeout = int(
                 min(max(next_deferral.deadline - current_time, 1), timeout)
             )
             log.info(
                 "Next deferral deadline is in the future, sleeping",
                 deferral_id=next_deferral.id,
                 deadline=next_deferral.deadline,
                 desired_host=next_deferral.desired_host,
                 account_id=next_deferral.account_id,
                 timeout=timeout,
             )
             break
         log.info(
             "Executing deferral",
             deferral_id=next_deferral.id,
             deadline=next_deferral.deadline,
             desired_host=next_deferral.desired_host,
             account_id=next_deferral.account_id,
         )
         next_deferral.execute(self.redis)
         self.redis.zrem(DEFERRED_ACCOUNT_MIGRATION_PQUEUE, next_deferral.id)
         next_deferral = self._try_get_next_deferral()
     self.event_queue.receive_event(timeout=timeout)
     statsd_client.incr("migrator.heartbeat")
Ejemplo n.º 4
0
    def _report_batch_upload(self):
        metric_names = [
            "inbox-contacts-search.transactions.batch_upload",
        ]

        for metric in metric_names:
            statsd_client.incr(metric)
Ejemplo n.º 5
0
    def _publish_heartbeat(self):
        metric_names = [
            "inbox-contacts-search.heartbeat",
        ]

        for metric in metric_names:
            statsd_client.incr(metric)
Ejemplo n.º 6
0
    def _publish_heartbeat(self):
        metric_names = [
            "contacts_search_index.heartbeat",
        ]

        for metric in metric_names:
            statsd_client.incr(metric)
Ejemplo n.º 7
0
    def _publish_heartbeat(self):
        metric_names = [
            "contacts_search_index.heartbeat",
        ]

        for metric in metric_names:
            statsd_client.incr(metric)
Ejemplo n.º 8
0
        def after_transaction_end(session, transaction):
            start_time = transaction_start_map.get(hash(transaction))
            if not start_time:
                return

            latency = int((time.time() - start_time) * 1000)
            statsd_client.timing(metric_name, latency)
            statsd_client.incr(metric_name)
Ejemplo n.º 9
0
        def after_transaction_end(session, transaction):
            start_time = transaction_start_map.get(hash(transaction))
            if not start_time:
                return

            latency = int((time.time() - start_time) * 1000)
            statsd_client.timing(metric_name, latency)
            statsd_client.incr(metric_name)
Ejemplo n.º 10
0
 def _run_impl(self):
     self.enqueue_new_accounts()
     self.unassign_disabled_accounts()
     statsd_client.gauge('syncqueue.queue.{}.length'.format(self.zone),
                         self.queue_client.qsize())
     statsd_client.incr('syncqueue.service.{}.heartbeat'.
                        format(self.zone))
     gevent.sleep(self.poll_interval)
Ejemplo n.º 11
0
def delete_marked_accounts(shard_id, throttle=False, dry_run=False):
    start = time.time()
    deleted_count = 0
    ids_to_delete = []

    with session_scope_by_shard_id(shard_id) as db_session:
        ids_to_delete = [(acc.id, acc.namespace.id) for acc
                         in db_session.query(Account) if acc.is_deleted]

    queue_size = len(ids_to_delete)
    for account_id, namespace_id in ids_to_delete:
        # queue_size = length of queue
        # deleted_count = number of accounts deleted during loop iteration
        # this is necessary because the length of ids_to_delete doesn't
        # change during loop iteration
        statsd_client.gauge('mailsync.{}.account_deletion.queue.length'
                            .format(shard_id),
                            queue_size - deleted_count)
        try:
            with session_scope(namespace_id) as db_session:
                account = db_session.query(Account).get(account_id)
                if not account:
                    log.critical('Account with does not exist',
                                 account_id=account_id)
                    continue

                if account.sync_should_run or not account.is_deleted:
                    log.warn('Account NOT marked for deletion. '
                             'Will not delete', account_id=account_id)
                    continue

            log.info('Deleting account', account_id=account_id)
            start_time = time.time()
            # Delete data in database
            try:
                log.info('Deleting database data', account_id=account_id)
                delete_namespace(account_id, namespace_id, throttle=throttle,
                                 dry_run=dry_run)
            except Exception as e:
                log.critical('Database data deletion failed', error=e,
                             account_id=account_id)
                continue

            # Delete liveness data
            log.debug('Deleting liveness data', account_id=account_id)
            clear_heartbeat_status(account_id)
            deleted_count += 1
            statsd_client.incr('mailsync.account_deletion.queue.deleted', 1)
            statsd_client.timing('mailsync.account_deletion.queue.deleted',
                                 time.time() - start_time)
        except Exception:
            log_uncaught_errors(log, account_id=account_id)

    end = time.time()
    log.info('All data deleted successfully', shard_id=shard_id,
             time=end - start, count=deleted_count)
Ejemplo n.º 12
0
 def _run_impl(self):
     log.info('Queueing accounts', zone=self.zone, shards=self.shards)
     while True:
         self.enqueue_new_accounts()
         self.unassign_disabled_accounts()
         statsd_client.gauge('syncqueue.queue.{}.length'.format(self.zone),
                             self.queue_client.qsize())
         statsd_client.incr('syncqueue.service.{}.heartbeat'.format(
             self.zone))
         gevent.sleep(self.poll_interval)
Ejemplo n.º 13
0
 def _run_impl(self):
     log.info('Queueing accounts', zone=self.zone, shards=self.shards)
     while True:
         self.enqueue_new_accounts()
         self.unassign_disabled_accounts()
         statsd_client.gauge('syncqueue.queue.{}.length'.format(self.zone),
                             self.queue_client.qsize())
         statsd_client.incr('syncqueue.service.{}.heartbeat'.
                            format(self.zone))
         gevent.sleep(self.poll_interval)
Ejemplo n.º 14
0
    def _log_to_statsd(self, action_log_status, latency=None):
        metric_names = [
            "syncback.overall.{}".format(action_log_status),
            "syncback.providers.{}.{}".format(self.provider, action_log_status)
        ]

        for metric in metric_names:
            statsd_client.incr(metric)
            if latency:
                statsd_client.timing(metric, latency * 1000)
Ejemplo n.º 15
0
    def _log_to_statsd(self, action_log_status, latency=None):
        metric_names = [
            "syncback.overall.{}".format(action_log_status),
            "syncback.providers.{}.{}".format(self.provider, action_log_status)
        ]

        for metric in metric_names:
            statsd_client.incr(metric)
            if latency:
                statsd_client.timing(metric, latency * 1000)
Ejemplo n.º 16
0
        def end(session):
            start_time = transaction_start_map.get(session)
            if not start_time:
                return

            del transaction_start_map[session]

            t = time.time()
            latency = int((t - start_time) * 1000)
            statsd_client.timing(metric_name, latency)
            statsd_client.incr(metric_name)
            if latency > MAX_SANE_TRX_TIME_MS:
                log.warning('Long transaction', latency=latency,
                            modname=modname, funcname=funcname)
Ejemplo n.º 17
0
        def end(session):
            start_time = transaction_start_map.get(session)
            if not start_time:
                return

            del transaction_start_map[session]

            t = time.time()
            latency = int((t - start_time) * 1000)
            if config.get("ENABLE_DB_TXN_METRICS", False):
                statsd_client.timing(metric_name, latency)
                statsd_client.incr(metric_name)
            if latency > MAX_SANE_TRX_TIME_MS:
                log.warning(
                    "Long transaction",
                    latency=latency,
                    modname=modname,
                    funcname=funcname,
                )
Ejemplo n.º 18
0
 def _run_impl(self):
     current_time = time.time()
     timeout = event_queue.SOCKET_TIMEOUT - 2    # Minus 2 to give us some leeway.
     next_deferral = self._try_get_next_deferral()
     while next_deferral is not None:
         if next_deferral.deadline >= current_time:
             timeout = int(min(max(next_deferral.deadline - current_time, 1), timeout))
             log.info('Next deferral deadline is in the future, sleeping',
                      deferral_id=next_deferral.id,
                      deadline=next_deferral.deadline,
                      desired_host=next_deferral.desired_host,
                      account_id=next_deferral.account_id,
                      timeout=timeout)
             break
         log.info('Executing deferral',
                  deferral_id=next_deferral.id,
                  deadline=next_deferral.deadline,
                  desired_host=next_deferral.desired_host,
                  account_id=next_deferral.account_id)
         next_deferral.execute(self.redis)
         self.redis.zrem(DEFERRED_ACCOUNT_MIGRATION_PQUEUE, next_deferral.id)
         next_deferral = self._try_get_next_deferral()
     self.event_queue.receive_event(timeout=timeout)
     statsd_client.incr("migrator.heartbeat")
Ejemplo n.º 19
0
    def _batch_log_entries(self, db_session, log_entries):
        """
        Batch action log entries together and return a batch task after
        verifying we can process them. All actions must belong to the same
        account.
        """
        valid_log_entries = []
        account_id = None

        has_more = len(log_entries) == self.fetch_batch_size

        for log_entry in log_entries:
            if log_entry is None:
                self.log.error("Got no action, skipping")
                continue

            if log_entry.id in self.running_action_ids:
                self.log.debug("Skipping already running action",
                               action_log_id=log_entry.id)
                # We're already running an action for this account, so don't
                # queue up any additional actions for this account until the
                # previous batch has finished.
                return None

            namespace = log_entry.namespace
            if account_id is None:
                account_id = namespace.account.id
            else:
                assert account_id is namespace.account.id

            if namespace.account.sync_state in ("invalid", "stopped"):
                sync_state = namespace.account.sync_state
                self.log.warning(
                    "Skipping action for {} account".format(sync_state),
                    account_id=account_id,
                    action_log_id=log_entry.id,
                    action=log_entry.action,
                )

                action_age = (datetime.utcnow() -
                              log_entry.created_at).total_seconds()

                if action_age > INVALID_ACCOUNT_GRACE_PERIOD:
                    log_entry.status = "failed"
                    db_session.commit()
                    self.log.warning(
                        "Marking action as failed for {} account, older than grace period"
                        .format(sync_state),
                        account_id=account_id,
                        action_log_id=log_entry.id,
                        action=log_entry.action,
                    )
                    statsd_client.incr(
                        "syncback.{}_failed.total".format(sync_state))
                    statsd_client.incr("syncback.{}_failed.{}".format(
                        sync_state, account_id))
                continue

            # If there is a recently failed action, don't execute any actions
            # for this account.
            if log_entry.retries > 0:
                action_updated_age = (datetime.utcnow() -
                                      log_entry.updated_at).total_seconds()

                # TODO(T6974): We might want to do some kind of exponential
                # backoff with jitter to avoid the thundering herd problem if a
                # provider suddenly starts having issues for a short period of
                # time.
                if action_updated_age < self.retry_interval:
                    self.log.info(
                        "Skipping tasks due to recently failed action",
                        account_id=account_id,
                        action_log_id=log_entry.id,
                        retries=log_entry.retries,
                    )
                    return

            valid_log_entries.append(log_entry)

        batch_task = self._get_batch_task(db_session, valid_log_entries,
                                          has_more)
        if not batch_task:
            return
        for task in batch_task.tasks:
            self.running_action_ids.update(task.action_log_ids)
            self.log.debug(
                "Syncback added task",
                process=self.process_number,
                account_id=account_id,
                action_log_ids=task.action_log_ids,
                num_actions=len(task.action_log_ids),
                msg=task.action_name,
                task_count=self.task_queue.qsize(),
                extra_args=task.extra_args,
            )
        return batch_task
Ejemplo n.º 20
0
 def _run_impl(self):
     self.enqueue_new_accounts()
     self.unassign_disabled_accounts()
     statsd_client.gauge("syncqueue.queue.{}.length".format(self.zone), self.queue_client.qsize())
     statsd_client.incr("syncqueue.service.{}.heartbeat".format(self.zone))
     gevent.sleep(self.poll_interval)
Ejemplo n.º 21
0
    def _batch_log_entries(self, db_session, log_entries):
        tasks = []
        semaphore = None
        account_id = None
        for log_entry in log_entries:
            if log_entry is None:
                self.log.error('Got no action, skipping')
                continue

            if log_entry.id in self.running_action_ids:
                self.log.info('Skipping already running action',
                              action_id=log_entry.id)
                # We're already running an action for this account, so don't
                # queue up any additional actions for this account until the
                # previous batch has finished.
                return None

            namespace = log_entry.namespace
            if account_id is None:
                account_id = namespace.account.id
            else:
                assert account_id is namespace.account.id

            if namespace.account.sync_state == 'invalid':
                self.log.warning('Skipping action for invalid account',
                                 account_id=account_id,
                                 action_id=log_entry.id,
                                 action=log_entry.action)

                action_age = (datetime.utcnow() -
                              log_entry.created_at).total_seconds()

                if action_age > INVALID_ACCOUNT_GRACE_PERIOD:
                    log_entry.status = 'failed'
                    db_session.commit()
                    self.log.warning(
                        'Marking action as failed for '
                        'invalid account, older than '
                        'grace period',
                        account_id=account_id,
                        action_id=log_entry.id,
                        action=log_entry.action)
                    statsd_client.incr('syncback.invalid_failed.total')
                    statsd_client.incr(
                        'syncback.invalid_failed.{}'.format(account_id))
                continue

            if semaphore is None:
                semaphore = self.account_semaphores[account_id]
            else:
                assert semaphore is self.account_semaphores[account_id]
            tasks.append(
                SyncbackTask(action_name=log_entry.action,
                             semaphore=semaphore,
                             action_log_id=log_entry.id,
                             record_id=log_entry.record_id,
                             account_id=account_id,
                             provider=namespace.account.verbose_provider,
                             service=self,
                             retry_interval=self.retry_interval,
                             extra_args=log_entry.extra_args))
        if len(tasks) == 0:
            return None

        for task in tasks:
            self.running_action_ids.add(task.action_log_id)
            self.log.info('Syncback added task',
                          process=self.process_number,
                          action_id=task.action_log_id,
                          msg=task.action_name,
                          task_count=self.task_queue.qsize())
        return SyncbackBatchTask(semaphore, tasks, account_id)
Ejemplo n.º 22
0
def delete_marked_accounts(shard_id, throttle=False, dry_run=False):
    start = time.time()
    deleted_count = 0
    ids_to_delete = []

    with session_scope_by_shard_id(shard_id) as db_session:
        ids_to_delete = [(acc.id, acc.namespace.id)
                         for acc in db_session.query(Account)
                         if acc.is_deleted]

    queue_size = len(ids_to_delete)
    for account_id, namespace_id in ids_to_delete:
        # queue_size = length of queue
        # deleted_count = number of accounts deleted during loop iteration
        # this is necessary because the length of ids_to_delete doesn't
        # change during loop iteration
        statsd_client.gauge(
            'mailsync.{}.account_deletion.queue.length'.format(shard_id),
            queue_size - deleted_count)
        try:
            with session_scope(namespace_id) as db_session:
                account = db_session.query(Account).get(account_id)
                if not account:
                    log.critical('Account with does not exist',
                                 account_id=account_id)
                    continue

                if account.sync_should_run or not account.is_deleted:
                    log.warn(
                        'Account NOT marked for deletion. '
                        'Will not delete',
                        account_id=account_id)
                    continue

            log.info('Deleting account', account_id=account_id)
            start_time = time.time()
            # Delete data in database
            try:
                log.info('Deleting database data', account_id=account_id)
                delete_namespace(account_id,
                                 namespace_id,
                                 throttle=throttle,
                                 dry_run=dry_run)
            except Exception as e:
                log.critical('Database data deletion failed',
                             error=e,
                             account_id=account_id)
                continue

            # Delete liveness data
            log.debug('Deleting liveness data', account_id=account_id)
            clear_heartbeat_status(account_id)
            deleted_count += 1
            statsd_client.incr('mailsync.account_deletion.queue.deleted', 1)
            statsd_client.timing('mailsync.account_deletion.queue.deleted',
                                 time.time() - start_time)
        except Exception:
            log_uncaught_errors(log, account_id=account_id)

    end = time.time()
    log.info('All data deleted successfully',
             shard_id=shard_id,
             time=end - start,
             count=deleted_count)
Ejemplo n.º 23
0
    def _batch_log_entries(self, db_session, log_entries):
        tasks = []
        semaphore = None
        account_id = None
        last_task = None
        for log_entry in log_entries:
            if log_entry is None:
                self.log.error('Got no action, skipping')
                continue

            if log_entry.id in self.running_action_ids:
                self.log.debug('Skipping already running action',
                               action_log_id=log_entry.id)
                # We're already running an action for this account, so don't
                # queue up any additional actions for this account until the
                # previous batch has finished.
                return None

            namespace = log_entry.namespace
            if account_id is None:
                account_id = namespace.account.id
            else:
                assert account_id is namespace.account.id

            if namespace.account.sync_state in ('invalid', 'stopped'):
                sync_state = namespace.account.sync_state
                self.log.warning(
                    'Skipping action for {} account'.format(sync_state),
                    account_id=account_id,
                    action_log_id=log_entry.id,
                    action=log_entry.action)

                action_age = (datetime.utcnow() -
                              log_entry.created_at).total_seconds()

                if action_age > INVALID_ACCOUNT_GRACE_PERIOD:
                    log_entry.status = 'failed'
                    db_session.commit()
                    self.log.warning(
                        'Marking action as failed for {} account, older than grace period'
                        .format(sync_state),
                        account_id=account_id,
                        action_log_id=log_entry.id,
                        action=log_entry.action)
                    statsd_client.incr(
                        'syncback.{}_failed.total'.format(sync_state))
                    statsd_client.incr('syncback.{}_failed.{}'.format(
                        sync_state, account_id))
                continue

            if semaphore is None:
                semaphore = self.account_semaphores[account_id]
            else:
                assert semaphore is self.account_semaphores[account_id]
            task = SyncbackTask(action_name=log_entry.action,
                                semaphore=semaphore,
                                action_log_ids=[log_entry.id],
                                record_ids=[log_entry.record_id],
                                account_id=account_id,
                                provider=namespace.account.verbose_provider,
                                service=self,
                                retry_interval=self.retry_interval,
                                extra_args=log_entry.extra_args)
            if last_task is None:
                last_task = task
            else:
                merged_task = last_task.try_merge_with(task)
                if merged_task is None:
                    tasks.append(last_task)
                    last_task = task
                else:
                    last_task = merged_task
        if last_task is not None:
            assert len(tasks) == 0 or last_task != tasks[-1]
            tasks.append(last_task)

        if len(tasks) == 0:
            return None

        for task in tasks:
            self.running_action_ids.update(task.action_log_ids)
            self.log.debug('Syncback added task',
                           process=self.process_number,
                           action_log_ids=task.action_log_ids,
                           num_actions=len(task.action_log_ids),
                           msg=task.action_name,
                           task_count=self.task_queue.qsize())
        return SyncbackBatchTask(semaphore, tasks, account_id)
Ejemplo n.º 24
0
    def _process_log(self):
        for key in self.keys:
            with session_scope_by_shard_id(key) as db_session:

                # Get the list of namespace ids with pending actions
                namespace_ids = [
                    ns_id[0]
                    for ns_id in db_session.query(ActionLog.namespace_id).
                    filter(ActionLog.discriminator == 'actionlog',
                           ActionLog.status == 'pending').distinct()
                ]

                running_action_ids = {
                    worker.action_log_id
                    for worker in self.workers
                }

                # Pick NUM_PARALLEL_ACCOUNTS randomly to make sure we're
                # executing actions equally for each namespace_id --- we
                # don't want a single account with 100k actions hogging
                # the action log.
                namespaces_to_process = []
                if len(namespace_ids) <= NUM_PARALLEL_ACCOUNTS:
                    namespaces_to_process = namespace_ids
                else:
                    namespaces_to_process = random.sample(
                        namespace_ids, NUM_PARALLEL_ACCOUNTS)

                self.log.info('Number of actively running syncback workers',
                              workers=len(self.workers),
                              shard_id=key)

                for ns_id in namespaces_to_process:
                    # The discriminator filter restricts actions to IMAP. EAS
                    # uses a different system.
                    query = db_session.query(ActionLog).filter(
                        ActionLog.discriminator == 'actionlog',
                        ActionLog.status == 'pending',
                        ActionLog.namespace_id == ns_id).order_by(ActionLog.id).\
                        limit(1)

                    log_entry = query.first()

                    if log_entry is None:
                        self.log.error('Got a non-existing action, skipping')
                        continue

                    if log_entry.id in running_action_ids:
                        self.log.info('Skipping already running action',
                                      action_id=log_entry.id)
                        continue

                    namespace = log_entry.namespace
                    if namespace.account.sync_state == 'invalid':
                        self.log.warning('Skipping action for invalid account',
                                         account_id=namespace.account.id,
                                         action_id=log_entry.id,
                                         action=log_entry.action)

                        action_age = (datetime.utcnow() -
                                      log_entry.created_at).total_seconds()

                        if action_age > INVALID_ACCOUNT_GRACE_PERIOD:
                            log_entry.status = 'failed'
                            db_session.commit()
                            self.log.warning(
                                'Marking action as failed for '
                                'invalid account, older than '
                                'grace period',
                                account_id=namespace.account.id,
                                action_id=log_entry.id,
                                action=log_entry.action)
                            statsd_client.incr('syncback.invalid_failed.total')
                            statsd_client.incr(
                                'syncback.invalid_failed.{}'.format(
                                    namespace.account.id))
                        continue

                    self.log.info('delegating action',
                                  action_id=log_entry.id,
                                  msg=log_entry.action)

                    semaphore = self.account_semaphores[namespace.account_id]
                    worker = SyncbackWorker(
                        action_name=log_entry.action,
                        semaphore=semaphore,
                        action_log_id=log_entry.id,
                        record_id=log_entry.record_id,
                        account_id=namespace.account_id,
                        provider=namespace.account.verbose_provider,
                        retry_interval=self.retry_interval,
                        extra_args=log_entry.extra_args)
                    self.workers.add(worker)
                    worker.start()
Ejemplo n.º 25
0
    def _batch_log_entries(self, db_session, log_entries):
        tasks = []
        semaphore = None
        account_id = None
        for log_entry in log_entries:
            if log_entry is None:
                self.log.error('Got no action, skipping')
                continue

            if log_entry.id in self.running_action_ids:
                self.log.info('Skipping already running action',
                              action_id=log_entry.id)
                # We're already running an action for this account, so don't
                # queue up any additional actions for this account until the
                # previous batch has finished.
                return None

            namespace = log_entry.namespace
            if account_id is None:
                account_id = namespace.account.id
            else:
                assert account_id is namespace.account.id

            if namespace.account.sync_state == 'invalid':
                self.log.warning('Skipping action for invalid account',
                                 account_id=account_id,
                                 action_id=log_entry.id,
                                 action=log_entry.action)

                action_age = (datetime.utcnow() -
                              log_entry.created_at).total_seconds()

                if action_age > INVALID_ACCOUNT_GRACE_PERIOD:
                    log_entry.status = 'failed'
                    db_session.commit()
                    self.log.warning('Marking action as failed for '
                                     'invalid account, older than '
                                     'grace period',
                                     account_id=account_id,
                                     action_id=log_entry.id,
                                     action=log_entry.action)
                    statsd_client.incr('syncback.invalid_failed.total')
                    statsd_client.incr('syncback.invalid_failed.{}'.
                                       format(account_id))
                continue

            if semaphore is None:
                semaphore = self.account_semaphores[account_id]
            else:
                assert semaphore is self.account_semaphores[account_id]
            tasks.append(
                SyncbackTask(action_name=log_entry.action,
                             semaphore=semaphore,
                             action_log_id=log_entry.id,
                             record_id=log_entry.record_id,
                             account_id=account_id,
                             provider=namespace.account.
                             verbose_provider,
                             service=self,
                             retry_interval=self.retry_interval,
                             extra_args=log_entry.extra_args))
        if len(tasks) == 0:
            return None

        for task in tasks:
            self.running_action_ids.add(task.action_log_id)
            self.log.info('Syncback added task',
                          process=self.process_number,
                          action_id=task.action_log_id,
                          msg=task.action_name,
                          task_count=self.task_queue.qsize())
        return SyncbackBatchTask(semaphore, tasks, account_id)
Ejemplo n.º 26
0
    def _process_log(self):
        before = datetime.utcnow()
        for key in self.keys:
            with session_scope_by_shard_id(key) as db_session:

                # Get the list of namespace ids with pending actions
                namespace_ids = [ns_id[0] for ns_id in db_session.query(ActionLog.namespace_id).filter(
                    ActionLog.discriminator == 'actionlog',
                    ActionLog.status == 'pending').distinct()]

                running_action_ids = {worker.action_log_id for worker in
                                      self.workers}

                # Pick NUM_PARALLEL_ACCOUNTS randomly to make sure we're
                # executing actions equally for each namespace_id --- we
                # don't want a single account with 100k actions hogging
                # the action log.
                namespaces_to_process = []
                if len(namespace_ids) <= NUM_PARALLEL_ACCOUNTS:
                    namespaces_to_process = namespace_ids
                else:
                    namespaces_to_process = random.sample(namespace_ids,
                                                          NUM_PARALLEL_ACCOUNTS)
                self.log.info('Syncback namespace_ids count', shard_id=key,
                              process=self.process_number,
                              num_namespace_ids=len(namespace_ids))

                for ns_id in namespaces_to_process:
                    # The discriminator filter restricts actions to IMAP. EAS
                    # uses a different system.
                    query = db_session.query(ActionLog).filter(
                        ActionLog.discriminator == 'actionlog',
                        ActionLog.status == 'pending',
                        ActionLog.namespace_id == ns_id).order_by(ActionLog.id).\
                        limit(1)

                    log_entry = query.first()

                    if log_entry is None:
                        self.log.error('Got a non-existing action, skipping')
                        continue

                    if log_entry.id in running_action_ids:
                        self.log.info('Skipping already running action',
                                      action_id=log_entry.id)
                        continue

                    namespace = log_entry.namespace
                    if namespace.account.sync_state == 'invalid':
                        self.log.warning('Skipping action for invalid account',
                                         account_id=namespace.account.id,
                                         action_id=log_entry.id,
                                         action=log_entry.action)

                        action_age = (datetime.utcnow() -
                                      log_entry.created_at).total_seconds()

                        if action_age > INVALID_ACCOUNT_GRACE_PERIOD:
                            log_entry.status = 'failed'
                            db_session.commit()
                            self.log.warning('Marking action as failed for '
                                             'invalid account, older than '
                                             'grace period',
                                             account_id=namespace.account.id,
                                             action_id=log_entry.id,
                                             action=log_entry.action)
                            statsd_client.incr('syncback.invalid_failed.total')
                            statsd_client.incr('syncback.invalid_failed.{}'.
                                               format(namespace.account.id))
                        continue

                    self.log.info('delegating action',
                                  action_id=log_entry.id,
                                  msg=log_entry.action)

                    semaphore = self.account_semaphores[namespace.account_id]
                    worker = SyncbackWorker(action_name=log_entry.action,
                                            semaphore=semaphore,
                                            action_log_id=log_entry.id,
                                            record_id=log_entry.record_id,
                                            account_id=namespace.account_id,
                                            provider=namespace.account.
                                            verbose_provider,
                                            service=self,
                                            retry_interval=self.retry_interval,
                                            extra_args=log_entry.extra_args)
                    self.workers.add(worker)
                    self.log.info('Syncback added worker',
                                  process=self.process_number,
                                  worker_count=len(self.workers))
                    worker.start()
        after = datetime.utcnow()
        self.log.info('Syncback completed one iteration',
                      process=self.process_number,
                      duration=(after - before).total_seconds())
Ejemplo n.º 27
0
    def data(self):
        if self.size == 0:
            log.warning('Block size is 0')
            return ''
        elif hasattr(self, '_data'):
            # On initial download we temporarily store data in memory
            value = self._data
        else:
            value = blockstore.get_from_blockstore(self.data_sha256)

        if value is None:
            log.warning("Couldn't find data on S3 for block",
                        sha_hash=self.data_sha256)

            from inbox.models.block import Block
            if isinstance(self, Block):
                if self.parts:
                    # This block is an attachment of a message that was
                    # deleted. We will attempt to fetch the raw
                    # message and parse out the needed attachment.

                    message = self.parts[0].message  # only grab one
                    account = message.namespace.account

                    statsd_string = 'api.direct_fetching.{}.{}'.format(
                        account.provider, account.id)

                    # Try to fetch the message from S3 first.
                    with statsd_client.timer('{}.blockstore_latency'.format(
                                             statsd_string)):
                        raw_mime = blockstore.get_from_blockstore(message.data_sha256)

                    # If it's not there, get it from the provider.
                    if raw_mime is None:
                        statsd_client.incr('{}.cache_misses'.format(statsd_string))

                        with statsd_client.timer('{}.provider_latency'.format(
                                                 statsd_string)):
                            raw_mime = get_raw_from_provider(message)

                        msg_sha256 = sha256(raw_mime).hexdigest()

                        # Cache the raw message in the blockstore so that
                        # we don't have to fetch it over and over.

                        with statsd_client.timer('{}.blockstore_save_latency'.format(
                                                 statsd_string)):
                            blockstore.save_to_blockstore(msg_sha256, raw_mime)
                    else:
                        # We found it in the blockstore --- report this.
                        statsd_client.incr('{}.cache_hits'.format(statsd_string))

                    # If we couldn't find it there, give up.
                    if raw_mime is None:
                        log.error("Don't have raw message for hash {}"
                                  .format(message.data_sha256))
                        return None

                    parsed = mime.from_string(raw_mime)
                    if parsed is not None:
                        for mimepart in parsed.walk(
                                with_self=parsed.content_type.is_singlepart()):
                            if mimepart.content_type.is_multipart():
                                continue  # TODO should we store relations?

                            data = mimepart.body

                            if isinstance(data, unicode):
                                data = data.encode('utf-8', 'strict')

                            if data is None:
                                continue

                            # Found it!
                            if sha256(data).hexdigest() == self.data_sha256:
                                log.info('Found subpart with hash {}'.format(
                                    self.data_sha256))

                                with statsd_client.timer('{}.blockstore_save_latency'.format(
                                                         statsd_string)):
                                    blockstore.save_to_blockstore(self.data_sha256, data)
                                    return data
                    log.error("Couldn't find the attachment in the raw message", message_id=message.id)

            log.error('No data returned!')
            return value

        assert self.data_sha256 == sha256(value).hexdigest(), \
            "Returned data doesn't match stored hash!"
        return value
Ejemplo n.º 28
0
    def _batch_log_entries(self, db_session, log_entries):
        tasks = []
        semaphore = None
        account_id = None
        last_task = None
        for log_entry in log_entries:
            if log_entry is None:
                self.log.error('Got no action, skipping')
                continue

            if log_entry.id in self.running_action_ids:
                self.log.debug('Skipping already running action',
                               action_log_id=log_entry.id)
                # We're already running an action for this account, so don't
                # queue up any additional actions for this account until the
                # previous batch has finished.
                return None

            namespace = log_entry.namespace
            if account_id is None:
                account_id = namespace.account.id
            else:
                assert account_id is namespace.account.id

            if namespace.account.sync_state in ('invalid', 'stopped'):
                sync_state = namespace.account.sync_state
                self.log.warning('Skipping action for {} account'.format(sync_state),
                                 account_id=account_id,
                                 action_log_id=log_entry.id,
                                 action=log_entry.action)

                action_age = (datetime.utcnow() -
                              log_entry.created_at).total_seconds()

                if action_age > INVALID_ACCOUNT_GRACE_PERIOD:
                    log_entry.status = 'failed'
                    db_session.commit()
                    self.log.warning('Marking action as failed for {} account, older than grace period'.format(sync_state),
                                     account_id=account_id,
                                     action_log_id=log_entry.id,
                                     action=log_entry.action)
                    statsd_client.incr('syncback.{}_failed.total'.format(sync_state))
                    statsd_client.incr('syncback.{}_failed.{}'.format(sync_state, account_id))
                continue

            if semaphore is None:
                semaphore = self.account_semaphores[account_id]
            else:
                assert semaphore is self.account_semaphores[account_id]
            task = SyncbackTask(action_name=log_entry.action,
                                semaphore=semaphore,
                                action_log_ids=[log_entry.id],
                                record_ids=[log_entry.record_id],
                                account_id=account_id,
                                provider=namespace.account.
                                verbose_provider,
                                service=self,
                                retry_interval=self.retry_interval,
                                extra_args=log_entry.extra_args)
            if last_task is None:
                last_task = task
            else:
                merged_task = last_task.try_merge_with(task)
                if merged_task is None:
                    tasks.append(last_task)
                    last_task = task
                else:
                    last_task = merged_task
        if last_task is not None:
            assert len(tasks) == 0 or last_task != tasks[-1]
            tasks.append(last_task)

        if len(tasks) == 0:
            return None

        for task in tasks:
            self.running_action_ids.update(task.action_log_ids)
            self.log.debug('Syncback added task',
                           process=self.process_number,
                           action_log_ids=task.action_log_ids,
                           num_actions=len(task.action_log_ids),
                           msg=task.action_name,
                           task_count=self.task_queue.qsize())
        return SyncbackBatchTask(semaphore, tasks, account_id)
Ejemplo n.º 29
0
    def data(self):
        if self.size == 0:
            log.warning('Block size is 0')
            return ''
        elif hasattr(self, '_data'):
            # On initial download we temporarily store data in memory
            value = self._data
        else:
            value = blockstore.get_from_blockstore(self.data_sha256)

        if value is None:
            log.warning("Couldn't find data on S3 for block",
                        sha_hash=self.data_sha256)

            from inbox.models.block import Block
            if isinstance(self, Block):
                if self.parts:
                    # This block is an attachment of a message that was
                    # deleted. We will attempt to fetch the raw
                    # message and parse out the needed attachment.

                    message = self.parts[0].message  # only grab one
                    account = message.namespace.account

                    statsd_string = 'api.direct_fetching.{}.{}'.format(
                        account.provider, account.id)

                    # Try to fetch the message from S3 first.
                    with statsd_client.timer(
                            '{}.blockstore_latency'.format(statsd_string)):
                        raw_mime = blockstore.get_from_blockstore(
                            message.data_sha256)

                    # If it's not there, get it from the provider.
                    if raw_mime is None:
                        statsd_client.incr(
                            '{}.cache_misses'.format(statsd_string))

                        with statsd_client.timer(
                                '{}.provider_latency'.format(statsd_string)):
                            raw_mime = get_raw_from_provider(message)

                        msg_sha256 = sha256(raw_mime).hexdigest()

                        # Cache the raw message in the blockstore so that
                        # we don't have to fetch it over and over.

                        with statsd_client.timer(
                                '{}.blockstore_save_latency'.format(
                                    statsd_string)):
                            blockstore.save_to_blockstore(msg_sha256, raw_mime)
                    else:
                        # We found it in the blockstore --- report this.
                        statsd_client.incr(
                            '{}.cache_hits'.format(statsd_string))

                    # If we couldn't find it there, give up.
                    if raw_mime is None:
                        log.error("Don't have raw message for hash {}".format(
                            message.data_sha256))
                        return None

                    parsed = mime.from_string(raw_mime)
                    if parsed is not None:
                        for mimepart in parsed.walk(
                                with_self=parsed.content_type.is_singlepart()):
                            if mimepart.content_type.is_multipart():
                                continue  # TODO should we store relations?

                            data = mimepart.body

                            if isinstance(data, unicode):
                                data = data.encode('utf-8', 'strict')

                            if data is None:
                                continue

                            # Found it!
                            if sha256(data).hexdigest() == self.data_sha256:
                                log.info('Found subpart with hash {}'.format(
                                    self.data_sha256))

                                with statsd_client.timer(
                                        '{}.blockstore_save_latency'.format(
                                            statsd_string)):
                                    blockstore.save_to_blockstore(
                                        self.data_sha256, data)
                                    return data
                    log.error(
                        "Couldn't find the attachment in the raw message",
                        message_id=message.id)

            log.error('No data returned!')
            return value

        assert self.data_sha256 == sha256(value).hexdigest(), \
            "Returned data doesn't match stored hash!"
        return value