Example #1
0
def trigger_replication(auth_db_rev=None, transactional=False):
  """Enqueues a task to push auth db to replicas.

  Args:
    auth_db_rev: revision to push, if at the moment the task is executing
        current revision is different, the task will be skipped. By default uses
        a revision at the moment 'trigger_replication' is called.
    transactional: if True enqueue the task transactionally.

  Raises:
    ReplicationTriggerError on error.
  """
  if auth_db_rev is None:
    auth_db_rev = model.get_replication_state().auth_db_rev

  # Use explicit task queue call instead of 'deferred' module to route tasks
  # through WSGI app set up in backend/handlers.py. It has global state
  # correctly configured (ereporter config, etc). 'deferred' module uses its
  # own WSGI app. Task '/internal/taskqueue/replication/<rev>' translates
  # to a call to 'update_replicas_task(<rev>)'.
  if not utils.enqueue_task(
      url='/internal/taskqueue/replication/%d' % auth_db_rev,
      queue_name='replication',
      transactional=transactional):
    raise ReplicationTriggerError()
Example #2
0
def trigger_replication(auth_db_rev=None, transactional=False):
    """Enqueues a task to push auth db to replicas.

  Args:
    auth_db_rev: revision to push, if at the moment the task is executing
        current revision is different, the task will be skipped. By default uses
        a revision at the moment 'trigger_replication' is called.
    transactional: if True enqueue the task transactionally.

  Raises:
    ReplicationTriggerError on error.
  """
    if auth_db_rev is None:
        auth_db_rev = model.get_replication_state().auth_db_rev

    # Use explicit task queue call instead of 'deferred' module to route tasks
    # through WSGI app set up in backend/handlers.py. It has global state
    # correctly configured (ereporter config, etc). 'deferred' module uses its
    # own WSGI app. Task '/internal/taskqueue/replication/<rev>' translates
    # to a call to 'update_replicas_task(<rev>)'.
    if not utils.enqueue_task(
            url='/internal/taskqueue/replication/%d' % auth_db_rev,
            queue_name='replication',
            transactional=transactional):
        raise ReplicationTriggerError()
Example #3
0
def refresh_replicated_authdb():
  """Triggers AuthDB replication mechanism if it hasn't been done in last 24h.

  Called periodically as a cron job. If it detects that the last AuthDB revision
  was produced more than 24h ago, bumps AuthDB revision number and triggers
  the replication mechanism (actual contents of AuthDB is not changed).

  This is important for low traffic servers to make sure the AuthDB replication
  configuration doesn't rot and that the exported AuthDB blob has a relatively
  fresh signature.

  Effectively noop for busy servers: AuthDB replication mechanism is triggered
  naturally for them as part of normal AuthDB updates, so AuthDB is never stale.
  """
  AUTHDB_MAX_AGE = datetime.timedelta(hours=24)

  state = model.get_replication_state()
  if not state:
    logging.warning('AuthDB is not initialized yet')
    return

  age = utils.utcnow() - state.modified_ts
  if age < AUTHDB_MAX_AGE:
    logging.info('Replicated AuthDB is fresh: %s < %s', age, AUTHDB_MAX_AGE)
    return
  logging.warning('Refreshing replicated AuthDB: %s > %s', age, AUTHDB_MAX_AGE)

  @ndb.transactional
  def trigger():
    cur = model.get_replication_state()
    if cur.auth_db_rev == state.auth_db_rev:
      model.replicate_auth_db()
  trigger()
Example #4
0
 def get(self):
   services = sorted(
       replication.AuthReplicaState.query(
           ancestor=replication.replicas_root_key()),
       key=lambda x: x.key.id())
   last_auth_state = model.get_replication_state()
   self.send_response({
     'services': [
       x.to_serializable_dict(with_id_as='app_id') for x in services
     ],
     'auth_code_version': version.__version__,
     'auth_db_rev': {
       'primary_id': last_auth_state.primary_id,
       'rev': last_auth_state.auth_db_rev,
       'ts': utils.datetime_to_timestamp(last_auth_state.modified_ts),
     },
     'now': utils.datetime_to_timestamp(utils.utcnow()),
   })
 def get(self):
     services = sorted(replication.AuthReplicaState.query(
         ancestor=replication.replicas_root_key()),
                       key=lambda x: x.key.id())
     last_auth_state = model.get_replication_state()
     self.send_response({
         'services':
         [x.to_serializable_dict(with_id_as='app_id') for x in services],
         'auth_code_version':
         version.__version__,
         'auth_db_rev': {
             'primary_id': last_auth_state.primary_id,
             'rev': last_auth_state.auth_db_rev,
             'ts': utils.datetime_to_timestamp(last_auth_state.modified_ts),
         },
         'now':
         utils.datetime_to_timestamp(utils.utcnow()),
     })
Example #6
0
def update_replicas_task(auth_db_rev):
  """Packs AuthDB and pushes it to all out-of-date Replicas.

  Called via /internal/taskqueue/replication/<auth_db_rev> task (see
  backend/handlers.py) enqueued by 'trigger_replication'.

  Will check that AuthReplicationState.auth_db_rev is still equal to
  |auth_db_rev| before doing anything.

  Returns:
    True if all replicas are up-to-date now, False if task should be retried.
  """
  # Check that the task is not stale before doing any heavy lifting.
  replication_state = model.get_replication_state()
  if replication_state.auth_db_rev != auth_db_rev:
    logging.info(
        'Skipping stale task, current rev is %d, task was enqueued for rev %d)',
        replication_state.auth_db_rev, auth_db_rev)
    return True

  # Grab last known replicas state and push only to replicas that are behind.
  stale_replicas = [
    entity for entity in AuthReplicaState.query(ancestor=replicas_root_key())
    if entity.auth_db_rev is None or entity.auth_db_rev < auth_db_rev
  ]
  if not stale_replicas:
    logging.info('All replicas are up-to-date.')
    return True

  # Pack an entire AuthDB into a blob to be pushed to Replicas.
  auth_db_blob, key_name, sig = pack_auth_db()

  # Push the blob to all out-of-date replicas, in parallel.
  push_started_ts = utils.utcnow()
  futures = {
    push_to_replica(
        replica.replica_url, auth_db_blob, key_name, sig): replica
    for replica in stale_replicas
  }

  # Wait for all attempts to complete.
  retry = []
  while futures:
    completed = ndb.Future.wait_any(futures)
    replica = futures.pop(completed)

    exception = completed.get_exception()
    success = exception is None

    current_revision = None
    auth_code_version = None
    if success:
      current_revision, auth_code_version = completed.get_result()

    if not success:
      logging.error(
          'Error when pushing update to replica: %s (%s).\nReplica id is %s.',
          exception.__class__.__name__, exception, replica.key.id())
      # Give up only on explicit fatal error, retry on any other exception.
      if not isinstance(exception, FatalReplicaUpdateError):
        retry.append(replica)

    # Eagerly update known replica state in local DB as soon as response is
    # received. That way if 'update_replicas_task' is killed midway, at least
    # the state of some replicas will be updated. Note that this transaction is
    # modifying a single entity group (replicas_root_key()) and thus can't be
    # called very often (due to 1 QPS limit on entity group updates).
    # If contention here becomes an issue, adding simple time.sleep(X) before
    # the transaction is totally fine (since 'update_replicas_task' is executed
    # on background task queue).
    try:
      if success:
        stored_rev = _update_state_on_success(
            key=replica.key,
            started_ts=push_started_ts,
            finished_ts=utils.utcnow(),
            current_revision=current_revision,
            auth_code_version=auth_code_version)
        logging.info(
            'Replica %s is updated to rev %d', replica.key.id(), stored_rev)
      else:
        stored_rev = _update_state_on_fail(
            key=replica.key,
            started_ts=push_started_ts,
            finished_ts=utils.utcnow(),
            old_auth_db_rev=replica.auth_db_rev,
            exc=exception)
        # If current push failed, but some other concurrent push (if any)
        # succeeded (and so replica is up-to-date), do not retry current push.
        if stored_rev is None or stored_rev > auth_db_rev:
          if replica in retry:
            retry.remove(replica)
    except (
        datastore_errors.InternalError,
        datastore_errors.Timeout,
        datastore_errors.TransactionFailedError) as exc:
      logging.exception(
          'Datastore error when updating replica state: %s.\n'
          'Replica id is %s.', exc.__class__.__name__, replica.key.id())
      # Should retry the task because of this.
      retry.add(replica)

  # Retry the task if at least one replica reported a retryable error.
  return not retry
Example #7
0
def update_replicas_task(auth_db_rev):
    """Packs AuthDB and pushes it to all out-of-date Replicas.

  Called via /internal/taskqueue/replication/<auth_db_rev> task (see
  backend/handlers.py) enqueued by 'trigger_replication'.

  Will check that AuthReplicationState.auth_db_rev is still equal to
  |auth_db_rev| before doing anything.

  Returns:
    True if all replicas are up-to-date now, False if task should be retried.
  """
    # Check that the task is not stale before doing any heavy lifting.
    replication_state = model.get_replication_state()
    if replication_state.auth_db_rev != auth_db_rev:
        logging.info(
            'Skipping stale task, current rev is %d, task was enqueued for rev %d)',
            replication_state.auth_db_rev, auth_db_rev)
        return True

    # Pack an entire AuthDB into a blob to be to stored in the datastore and
    # pushed to Replicas.
    replication_state, auth_db_blob = pack_auth_db()

    # Put the blob into datastore. Also updates pointer to the latest stored blob.
    store_auth_db_snapshot(replication_state, auth_db_blob)

    # Notify PubSub subscribers that new snapshot is available.
    pubsub.publish_authdb_change(replication_state)

    # Grab last known replicas state and push only to replicas that are behind.
    stale_replicas = [
        entity
        for entity in AuthReplicaState.query(ancestor=replicas_root_key())
        if entity.auth_db_rev is None or entity.auth_db_rev < auth_db_rev
    ]
    if not stale_replicas:
        logging.info('All replicas are up-to-date.')
        return True

    # Sign the blob, replicas check the signature.
    key_name, sig = sign_auth_db_blob(auth_db_blob)

    # Push the blob to all out-of-date replicas, in parallel.
    push_started_ts = utils.utcnow()
    futures = {
        push_to_replica(replica.replica_url, auth_db_blob, key_name, sig):
        replica
        for replica in stale_replicas
    }

    # Wait for all attempts to complete.
    retry = []
    while futures:
        completed = ndb.Future.wait_any(futures)
        replica = futures.pop(completed)

        exception = completed.get_exception()
        success = exception is None

        current_revision = None
        auth_code_version = None
        if success:
            current_revision, auth_code_version = completed.get_result()

        if not success:
            logging.error(
                'Error when pushing update to replica: %s (%s).\nReplica id is %s.',
                exception.__class__.__name__, exception, replica.key.id())
            # Give up only on explicit fatal error, retry on any other exception.
            if not isinstance(exception, FatalReplicaUpdateError):
                retry.append(replica)

        # Eagerly update known replica state in local DB as soon as response is
        # received. That way if 'update_replicas_task' is killed midway, at least
        # the state of some replicas will be updated. Note that this transaction is
        # modifying a single entity group (replicas_root_key()) and thus can't be
        # called very often (due to 1 QPS limit on entity group updates).
        # If contention here becomes an issue, adding simple time.sleep(X) before
        # the transaction is totally fine (since 'update_replicas_task' is executed
        # on background task queue).
        try:
            if success:
                stored_rev = _update_state_on_success(
                    key=replica.key,
                    started_ts=push_started_ts,
                    finished_ts=utils.utcnow(),
                    current_revision=current_revision,
                    auth_code_version=auth_code_version)
                logging.info('Replica %s is updated to rev %d',
                             replica.key.id(), stored_rev)
            else:
                stored_rev = _update_state_on_fail(
                    key=replica.key,
                    started_ts=push_started_ts,
                    finished_ts=utils.utcnow(),
                    old_auth_db_rev=replica.auth_db_rev,
                    exc=exception)
                # If current push failed, but some other concurrent push (if any)
                # succeeded (and so replica is up-to-date), do not retry current push.
                if stored_rev is None or stored_rev > auth_db_rev:
                    if replica in retry:
                        retry.remove(replica)
        except (datastore_errors.InternalError, datastore_errors.Timeout,
                datastore_errors.TransactionFailedError) as exc:
            logging.exception(
                'Datastore error when updating replica state: %s.\n'
                'Replica id is %s.', exc.__class__.__name__, replica.key.id())
            # Should retry the task because of this.
            retry.add(replica)

    # Retry the task if at least one replica reported a retryable error.
    return not retry
Example #8
0
 def trigger():
   cur = model.get_replication_state()
   if cur.auth_db_rev == state.auth_db_rev:
     model.replicate_auth_db()