Exemple #1
0
    def _ProcessPostRequest(self):
        """Triggers backup mapper jobs and returns their ids."""
        backup = self.request.get('backup_name').strip()
        if not backup:
            return [('error', 'Unspecified Backup name.')]

        if BackupInformation.name_exists(backup):
            return [('error', 'Backup "%s" already exists.' % backup)]

        kinds = self.request.get('kind', allow_multiple=True)
        queue = self.request.get('queue')
        job_name = 'datastore_backup_%s_%%(kind)s' % re.sub(
            r'[^\w]', '_', backup)
        try:
            job_operation = utils.StartOperation('Backup: %s' % backup)
            backup_info = BackupInformation(parent=job_operation)
            backup_info.name = backup
            backup_info.kinds = kinds
            backup_info.put(config=datastore_rpc.Configuration(
                force_writes=True))
            mapreduce_params = {
                'done_callback_handler': self.BACKUP_COMPLETE_HANDLER,
                'backup_info_pk': str(backup_info.key()),
                'force_ops_writes': True
            }
            if len(kinds) <= 10:
                return [('job', job) for job in _run_map_jobs(
                    job_operation.key(), backup_info.key(), kinds, job_name,
                    self.BACKUP_HANDLER, self.INPUT_READER, self.OUTPUT_WRITER,
                    self._GetBasicMapperParams(), mapreduce_params, queue)]
            else:
                retry_options = taskqueue.TaskRetryOptions(task_retry_limit=1)
                return [
                    ('task',
                     deferred.defer(_run_map_jobs,
                                    job_operation.key(),
                                    backup_info.key(),
                                    kinds,
                                    job_name,
                                    self.BACKUP_HANDLER,
                                    self.INPUT_READER,
                                    self.OUTPUT_WRITER,
                                    self._GetBasicMapperParams(),
                                    mapreduce_params,
                                    queue,
                                    _queue=queue,
                                    _url=utils.ConfigDefaults.DEFERRED_PATH,
                                    _retry_options=retry_options).name)
                ]
        except Exception:
            logging.exception(
                'Failed to start a datastore backup job[s] for "%s".',
                job_name)
            if job_operation:
                job_operation.status = utils.DatastoreAdminOperation.STATUS_FAILED
                job_operation.put(config=datastore_rpc.Configuration(
                    force_writes=True))
            raise
Exemple #2
0
def create_datastore_write_config(mapreduce_spec):
  """Creates datastore config to use in write operations.

  Args:
    mapreduce_spec: current mapreduce specification as MapreduceSpec.

  Returns:
    an instance of datastore_rpc.Configuration to use for all write
    operations in the mapreduce.
  """
  force_writes = parse_bool(mapreduce_spec.params.get("force_writes", "false"))
  if force_writes:
    return datastore_rpc.Configuration(force_writes=force_writes)
  else:
    return datastore_rpc.Configuration()
Exemple #3
0
 def testCallHooks_Pending_CallbackAddsMore(self):
     self.SetUpCallHooks()
     conf = datastore_rpc.Configuration(on_completion=self.NastyCallback)
     key = model.Key(flat=['Expando', 1])
     self.conn.async_get(conf, [key])
     self.conn.wait_for_all_pending_rpcs()
     self.assertEqual(self.conn.get_pending_rpcs(), set())
Exemple #4
0
def finalize_backup_info(backup_info_key):
    backup_info = BackupInformation.get(backup_info_key)
    backup_info.complete_time = datetime.datetime.now()

    backup_info.blob_files = drop_empty_files(backup_info.blob_files)
    backup_info.put(config=datastore_rpc.Configuration(force_writes=True))
    logging.info('Backup %s completed', backup_info.name)
Exemple #5
0
    def _apply_jobs(self):
        """Apply all jobs in current key range."""
        while True:
            # Creates an unapplied query and fetches unapplied jobs in the result
            # range.
            unapplied_query = self._current_key_range.make_ascending_datastore_query(
                kind=None, keys_only=True)
            unapplied_query[
                ConsistentKeyReader.UNAPPLIED_LOG_FILTER] = self.start_time_us
            unapplied_jobs = unapplied_query.Get(limit=self._batch_size)

            if not unapplied_jobs:
                return

            # There were some unapplied jobs. Roll them forward.
            keys_to_apply = []
            for key in unapplied_jobs:
                # To apply the entity group we need to read something from it.
                # We use dummy kind and id because we don't actually need any data.
                path = key.to_path() + [
                    ConsistentKeyReader.DUMMY_KIND,
                    ConsistentKeyReader.DUMMY_ID
                ]
                keys_to_apply.append(
                    db.Key.from_path(_app=key.app(),
                                     namespace=key.namespace(),
                                     *path))
            db.get(keys_to_apply,
                   config=datastore_rpc.Configuration(
                       deadline=10,
                       read_policy=datastore_rpc.Configuration.
                       APPLY_ALL_JOBS_CONSISTENCY))
    def _apply_jobs(self, k_range):
        """Apply all jobs in the given key range."""
        while True:
            unapplied_query = k_range.make_ascending_datastore_query(
                kind=None, keys_only=True)
            unapplied_query[
                ConsistentKeyReader.UNAPPLIED_LOG_FILTER] = self.start_time_us
            unapplied_jobs = unapplied_query.Get(limit=self._batch_size)

            if not unapplied_jobs:
                return

            keys_to_apply = []
            for key in unapplied_jobs:
                path = key.to_path() + [
                    ConsistentKeyReader.DUMMY_KIND,
                    ConsistentKeyReader.DUMMY_ID
                ]
                keys_to_apply.append(
                    db.Key.from_path(_app=key.app(),
                                     namespace=key.namespace(),
                                     *path))
            db.get(keys_to_apply,
                   config=datastore_rpc.Configuration(
                       deadline=10,
                       read_policy=datastore_rpc.Configuration.
                       APPLY_ALL_JOBS_CONSISTENCY))
 def _apply_jobs(self, unapplied_jobs):
   """Apply all jobs implied by the given keys."""
   # There were some unapplied jobs. Roll them forward.
   keys_to_apply = []
   for key in unapplied_jobs:
     # To apply the entity group we need to read something from it.
     # We use dummy kind and id because we don't actually need any data.
     path = key.to_path() + [ConsistentKeyReader.DUMMY_KIND,
                             ConsistentKeyReader.DUMMY_ID]
     keys_to_apply.append(
         db.Key.from_path(_app=key.app(), namespace=key.namespace(), *path))
   db.get(keys_to_apply, config=datastore_rpc.Configuration(
       deadline=10,
       read_policy=datastore_rpc.Configuration.APPLY_ALL_JOBS_CONSISTENCY))
Exemple #8
0
    def testRunWithRpcs(self):
        record = []

        def foo(arg):
            record.append(arg)

        eventloop.queue_call(0.1, foo, 42)
        config = datastore_rpc.Configuration(on_completion=foo)
        rpc = self.conn.async_get(config, [])
        self.assertEqual(len(rpc.rpcs), 1)
        eventloop.queue_rpc(rpc)
        eventloop.run()
        self.assertEqual(record, [rpc.rpcs[0], 42])
        self.assertEqual(rpc.state, 2)  # TODO: Use apiproxy_rpc.RPC.FINISHING.
Exemple #9
0
 def testRunWithRpcs(self):
   apiproxy_stub_map.apiproxy = apiproxy_stub_map.APIProxyStubMap()
   stub = datastore_file_stub.DatastoreFileStub('_', None)
   apiproxy_stub_map.apiproxy.RegisterStub('datastore_v3', stub)
   record = []
   def foo(arg):
     record.append(arg)
   eventloop.queue_call(0.1, foo, 42)
   conn = datastore_rpc.Connection()
   config = datastore_rpc.Configuration(on_completion=foo)
   rpc = conn.async_get(config, [])
   self.assertEqual(len(rpc.rpcs), 1)
   eventloop.queue_rpc(rpc)
   eventloop.run()
   self.assertEqual(record, [rpc.rpcs[0], 42])
   self.assertEqual(rpc.state, 2)  # TODO: Use apiproxy_rpc.RPC.FINISHING.
Exemple #10
0
def _create_and_save_state(job_config, mapreduce_spec):
    """Save mapreduce state to datastore.

  Save state to datastore so that UI can see it immediately.

  Args:
    job_config: map_job.JobConfig.
    mapreduce_spec: model.MapreduceSpec,
  """
    state = model.MapreduceState.create_new(job_config.job_id)
    state.mapreduce_spec = mapreduce_spec
    state.active = True
    state.active_shards = 0
    state.app_id = job_config._app
    config = datastore_rpc.Configuration(force_writes=job_config._force_writes)
    state.put(config=config)
Exemple #11
0
 def testContext_MultiRpc(self):
   # This test really tests the proper handling of MultiRpc by
   # queue_rpc() in eventloop.py.  It's easier to test from here, and
   # gives more assurance that it works.
   config = datastore_rpc.Configuration(max_get_keys=3, max_put_entities=3)
   self.ctx._conn = model.make_connection(config, default_model=model.Expando)
   @tasklets.tasklet
   def foo():
     ents = [model.Expando() for _ in range(10)]
     futs = [self.ctx.put(ent) for ent in ents]
     keys = yield futs
     futs = [self.ctx.get(key) for key in keys]
     ents2 = yield futs
     self.assertEqual(ents2, ents)
     raise tasklets.Return(keys)
   keys = foo().get_result()
   self.assertEqual(len(keys), 10)
Exemple #12
0
def _run_map_jobs(job_operation_key, backup_info_key, kinds, job_name,
                  backup_handler, input_reader, output_writer, mapper_params,
                  mapreduce_params, queue):
    backup_info = BackupInformation.get(backup_info_key)
    if not backup_info:
        return []
    jobs = utils.RunMapForKinds(job_operation_key,
                                kinds,
                                job_name,
                                backup_handler,
                                input_reader,
                                output_writer,
                                mapper_params,
                                mapreduce_params,
                                queue_name=queue)
    backup_info.active_jobs = jobs
    backup_info.put(config=datastore_rpc.Configuration(force_writes=True))
    return jobs
Exemple #13
0
def BackupCompleteHandler(operation, job_id, mapreduce_state):
    """Updates BackupInformation record for a completed mapper job."""
    mapreduce_spec = mapreduce_state.mapreduce_spec
    backup_info = BackupInformation.get(
        mapreduce_spec.params['backup_info_pk'])
    if backup_info:
        filenames = mapreduce_state.writer_state['filenames']

        backup_info.blob_files = list(set(backup_info.blob_files + filenames))
        if job_id in backup_info.active_jobs:
            backup_info.active_jobs.remove(job_id)
            backup_info.completed_jobs = list(
                set(backup_info.completed_jobs + [job_id]))
        backup_info.put(config=datastore_rpc.Configuration(force_writes=True))
        if operation.status == utils.DatastoreAdminOperation.STATUS_COMPLETED:
            deferred.defer(finalize_backup_info,
                           backup_info.key(),
                           _url=utils.ConfigDefaults.DEFERRED_PATH)
    else:
        logging.warn('BackupInfo was not found for %s',
                     mapreduce_spec.params['backup_info_pk'])
Exemple #14
0
def BackupCompleteHandler(operation, job_id, mapreduce_state):
  """Updates BackupInformation record for a completed mapper job."""
  mapreduce_spec = mapreduce_state.mapreduce_spec
  backup_info = BackupInformation.get(mapreduce_spec.params['backup_info_pk'])
  if backup_info:
    filenames = mapreduce_state.writer_state['filenames']


    if backup_info.filesystem == files.BLOBSTORE_FILESYSTEM:
      filenames = drop_empty_files(filenames)
    backup_info.blob_files = list(set(backup_info.blob_files + filenames))
    if job_id in backup_info.active_jobs:
      backup_info.active_jobs.remove(job_id)
      backup_info.completed_jobs = list(
          set(backup_info.completed_jobs + [job_id]))
    backup_info.put(config=datastore_rpc.Configuration(force_writes=True))
    if operation.status == utils.DatastoreAdminOperation.STATUS_COMPLETED:
      finalize_backup_info(backup_info)
  else:
    logging.warn('BackupInfo was not found for %s',
                 mapreduce_spec.params['backup_info_pk'])
Exemple #15
0
def populate_pathological(start, end):
    """Populates both PhotoA and PhotoB with entities that produce worst case
  runtime when zigzaging between:
    coloration = Photo.COLORATION_BLACK_AND_WHITE
  and
    aspect = Photo.ASPECT_PANORAMIC

  This function is designed to be immutable.

  Args:
    count: The number of entities to create
  """
    entities = []
    for i in xrange(start, end):
        # Creating identical entities for both PhotoA and PhotoB

        if i % 2:  # perfectly interweave in key order
            coloration = COLORATION_BLACK_AND_WHITE
            aspect = random.choice(ASPECTS[:-1])
            key_name = 'path%dA' % (i / 2)
        else:
            coloration = random.choice(COLORATIONS[1:])
            aspect = ASPECT_PANORAMIC
            key_name = 'path%dB' % (i / 2)

        seed = random.random()
        photoA = randomly_populate_photo(PhotoA(key_name=key_name), seed)
        photoB = randomly_populate_photo(PhotoB(key_name=key_name), seed)

        photoA.coloration = coloration
        photoB.coloration = coloration
        photoA.aspect = aspect
        photoB.aspect = aspect

        entities.append(photoA)
        entities.append(photoB)

    # Putting all entities in parallel_
    config = datastore_rpc.Configuration(max_entity_groups_per_rpc=10)
    finish_rpcs([db.put_async(entities, config=config)])
Exemple #16
0
    def _ProcessPostRequest(self):
        """Triggers backup restore mapper jobs and returns their ids."""
        backup_id = self.request.get('backup_id')
        if not backup_id:
            return [('error', 'Unspecified Backup.')]

        backup = db.get(db.Key(backup_id))
        if not backup:
            return [('error', 'Invalid Backup id.')]

        queue = self.request.get('queue')
        job_name = 'datastore_backup_%s' % re.sub(r'[^\w]', '_', backup.name)
        job_operation = None
        try:
            job_operation = utils.StartOperation('Restore from backup: %s' %
                                                 backup.name)
            mapper_params = self._GetBasicMapperParams()
            mapper_params['files'] = backup.blob_files
            mapreduce_params = {
                'backup_name': backup.name,
                'force_ops_writes': True
            }
            return [('job',
                     utils.StartMap(job_operation.key(),
                                    job_name,
                                    self.BACKUP_RESTORE_HANDLER,
                                    self.INPUT_READER,
                                    None,
                                    mapper_params,
                                    mapreduce_params,
                                    queue_name=queue))]
        except Exception:
            logging.exception(
                'Failed to start a restore from backup job "%s".', job_name)
            if job_operation:
                job_operation.status = utils.DatastoreAdminOperation.STATUS_FAILED
                job_operation.put(config=datastore_rpc.Configuration(
                    force_writes=True))
            raise
Exemple #17
0
def _CreateDatastoreConfig():
    """Create datastore config for use during datastore admin operations."""
    return datastore_rpc.Configuration(force_writes=True)
Exemple #18
0
 def flush(self):
     """Flush all puts to datastore."""
     if self.puts:
         datastore_rpc.Connection(config=datastore_rpc.Configuration(
             deadline=60)).put(self.puts)
     self.puts = []
Exemple #19
0
def finalize_backup_info(backup_info):
  backup_info.complete_time = datetime.datetime.now()

  backup_info.put(config=datastore_rpc.Configuration(force_writes=True))
  logging.info('Backup %s completed', backup_info.name)