Ejemplo n.º 1
0
  def setUp(self):
    super(RunMapForKindsTest, self).setUp()

    self.operation = utils.StartOperation('test operation')
    self.reader_class = input_readers.DatastoreKeyInputReader
    self.reader_class_spec = (self.reader_class.__module__ +
                              "." + self.reader_class.__name__)
Ejemplo n.º 2
0
    def post(self):
        """Handler for post requests to datastore_admin/delete.do.

    Jobs are executed and user is redirected to the get handler.
    """
        namespace = self.request.get('namespace')
        kinds = self.request.get('kind', allow_multiple=True)
        (namespace_str, kinds_str) = utils.GetPrintableStrs(namespace, kinds)
        token = self.request.get('xsrf_token')

        jobs = []
        if utils.ValidateXsrfToken(token, XSRF_ACTION):
            try:
                op = utils.StartOperation('Deleting %s%s' %
                                          (kinds_str, namespace_str))
                name_template = 'Delete all %(kind)s objects%(namespace)s'
                jobs = utils.RunMapForKinds(op.key(), kinds, name_template,
                                            self.DELETE_HANDLER,
                                            self.INPUT_READER, None, {})
                error = ''

            except Exception, e:
                error = self._HandleException(e)

            parameters = [('job', job) for job in jobs]
            if error:
                parameters.append(('error', error))
Ejemplo n.º 3
0
  def testSuccessfulJob(self):
    """Verify that with appropriate request parameters form is constructed."""
    TestEntity().put()
    admin_operation = utils.StartOperation("Test Operation")
    self.mapreduce_id = utils.StartMap(
        admin_operation,
        'test_job',
        '__main__.foo',
        ('google.appengine.ext.mapreduce.input_readers.'
         'DatastoreKeyInputReader'),
        {'entity_kind': 'TestEntity'})
    testutil.execute_all_tasks(self.taskqueue)
    self.assertObjectsExist()

    testutil.execute_until_empty(self.taskqueue)

    self.handler.request.headers['Mapreduce-Id'] = self.mapreduce_id
    self.handler.post()

    self.assertIsNone(model.MapreduceState.get_by_key_name(self.mapreduce_id))
    self.assertListEqual(
        [],
        model.ShardState.find_by_mapreduce_id(self.mapreduce_id))
    admin_operation = admin_operation.get(admin_operation.key())
    self.assertEqual(0, admin_operation.active_jobs)
    self.assertEqual(1, admin_operation.completed_jobs)
    self.assertEqual('Completed', admin_operation.status)
Ejemplo n.º 4
0
    def _ProcessPostRequest(self):
        """Triggers backup mapper jobs and returns their ids."""
        backup = self.request.get('backup_name').strip()
        if not backup:
            return [('error', 'Unspecified Backup name.')]

        if BackupInformation.name_exists(backup):
            return [('error', 'Backup "%s" already exists.' % backup)]

        kinds = self.request.get('kind', allow_multiple=True)
        queue = self.request.get('queue')
        job_name = 'datastore_backup_%s_%%(kind)s' % re.sub(
            r'[^\w]', '_', backup)
        try:
            job_operation = utils.StartOperation('Backup: %s' % backup)
            backup_info = BackupInformation(parent=job_operation)
            backup_info.name = backup
            backup_info.kinds = kinds
            backup_info.put(config=datastore_rpc.Configuration(
                force_writes=True))
            mapreduce_params = {
                'done_callback_handler': self.BACKUP_COMPLETE_HANDLER,
                'backup_info_pk': str(backup_info.key()),
                'force_ops_writes': True
            }
            if len(kinds) <= 10:
                return [('job', job) for job in _run_map_jobs(
                    job_operation.key(), backup_info.key(), kinds, job_name,
                    self.BACKUP_HANDLER, self.INPUT_READER, self.OUTPUT_WRITER,
                    self._GetBasicMapperParams(), mapreduce_params, queue)]
            else:
                retry_options = taskqueue.TaskRetryOptions(task_retry_limit=1)
                return [
                    ('task',
                     deferred.defer(_run_map_jobs,
                                    job_operation.key(),
                                    backup_info.key(),
                                    kinds,
                                    job_name,
                                    self.BACKUP_HANDLER,
                                    self.INPUT_READER,
                                    self.OUTPUT_WRITER,
                                    self._GetBasicMapperParams(),
                                    mapreduce_params,
                                    queue,
                                    _queue=queue,
                                    _url=utils.ConfigDefaults.DEFERRED_PATH,
                                    _retry_options=retry_options).name)
                ]
        except Exception:
            logging.exception(
                'Failed to start a datastore backup job[s] for "%s".',
                job_name)
            if job_operation:
                job_operation.status = utils.DatastoreAdminOperation.STATUS_FAILED
                job_operation.put(config=datastore_rpc.Configuration(
                    force_writes=True))
            raise
Ejemplo n.º 5
0
  def _ProcessPostRequest(self):
    """Triggers backup restore mapper jobs and returns their ids."""
    backup_id = self.request.get('backup_id')
    if not backup_id:
      return [('error', 'Unspecified Backup.')]

    backup = db.get(db.Key(backup_id))
    if not backup:
      return [('error', 'Invalid Backup id.')]

    queue = self.request.get('queue')
    job_name = 'datastore_backup_restore_%s' % re.sub(r'[^\w]', '_',
                                                      backup.name)
    job_operation = None
    kinds = set(self.request.get_all('kind'))
    if not (backup.blob_files or kinds):
      return [('error', 'No kinds were selected')]
    backup_kinds = set(backup.kinds)
    difference = kinds.difference(backup_kinds)
    if difference:
      return [('error', 'Backup does not have kind[s] %s' %
               ', '.join(difference))]
    kinds = list(kinds) if len(backup_kinds) != len(kinds) else []
    try:
      operation_name = 'Restoring %s from backup: %s' % (
          ', '.join(kinds) if kinds else 'all', backup.name)
      job_operation = utils.StartOperation(operation_name)
      mapper_params = self._GetBasicMapperParams()
      mapper_params['files'] = get_backup_files(backup, kinds)
      mapper_params['kind_filter'] = kinds
      mapper_params['original_app'] = backup.original_app
      mapreduce_params = {
          'backup_name': backup.name,
          'force_ops_writes': True
      }
      shard_count = min(max(utils.MAPREDUCE_MIN_SHARDS,
                            len(mapper_params['files'])),
                        utils.MAPREDUCE_MAX_SHARDS)
      job = utils.StartMap(job_operation.key(), job_name,
                           self.BACKUP_RESTORE_HANDLER, self.INPUT_READER, None,
                           mapper_params, mapreduce_params, queue_name=queue,
                           shard_count=shard_count)
      return [('job', job)]
    except Exception:
      logging.exception('Failed to start a restore from backup job "%s".',
                        job_name)
      if job_operation:
        job_operation.status = utils.DatastoreAdminOperation.STATUS_FAILED
        job_operation.put(force_writes=True)
      raise
    finally:



      if self.request.get('delete_backup_after_restore', '').lower() == 'true':
        delete_backup_info(backup, delete_files=False)
Ejemplo n.º 6
0
  def post(self):
    """Handler for post requests to datastore_admin/copy.do.

    Jobs are executed and user is redirected to the get handler.
    """
    namespace = self.request.get('namespace')
    kinds = self.request.get_all('kind')
    (namespace_str, kinds_str) = utils.GetPrintableStrs(namespace, kinds)
    token = self.request.get('xsrf_token')
    remote_url = self.request.get('remote_url')
    extra_header = self.request.get('extra_header')

    jobs = []
    if not remote_url:
      parameters = [('error', 'Unspecified remote URL.')]
    elif not utils.ValidateXsrfToken(token, XSRF_ACTION):
      parameters = [('xsrf_error', '1')]
    else:
      try:


        if extra_header:
          extra_headers = dict([extra_header.split(':', 1)])
        else:
          extra_headers = None
        target_app = remote_api_put_stub.get_remote_appid(remote_url,
                                                          extra_headers)
        op = utils.StartOperation(
            'Copying %s%s to %s' % (kinds_str, namespace_str, target_app))
        name_template = 'Copy all %(kind)s objects%(namespace)s'
        mapper_params = {
            'target_app': target_app,
            'remote_url': remote_url,
            'extra_header': extra_header,
        }
        jobs = utils.RunMapForKinds(
            op.key(),
            kinds,
            name_template,
            self.COPY_HANDLER,
            self.INPUT_READER,
            None,
            mapper_params)

        error = ''


      except Exception, e:
        logging.exception('Handling exception.')
        error = self._HandleException(e)

      parameters = [('job', job) for job in jobs]
      if error:
        parameters.append(('error', error))
Ejemplo n.º 7
0
    def post(self):
        """Handler for post requests to datastore_admin/delete.do.

    Jobs are executed and user is redirected to the get handler.
    """
        namespace = self.request.get('namespace')
        kinds = self.request.get_all('kind')
        (namespace_str, kinds_str) = utils.GetPrintableStrs(namespace, kinds)
        token = self.request.get('xsrf_token')
        readonly_warning = self.request.get('readonly_warning')

        jobs = []

        if (readonly_warning
                == 'True') and not self.request.get('confirm_readonly_delete'):
            parameters = [('noconfirm_error', '1')]
        else:
            if utils.ValidateXsrfToken(token, XSRF_ACTION):
                try:
                    op = utils.StartOperation('Deleting %s%s' %
                                              (kinds_str, namespace_str))
                    name_template = 'Delete all %(kind)s objects%(namespace)s'
                    mapreduce_params = {'force_ops_writes': True}
                    queue = self.request.get('queue')
                    queue = queue or os.environ.get(
                        'HTTP_X_APPENGINE_QUEUENAME', 'default')
                    if queue[0] == '_':

                        queue = 'default'
                    jobs = utils.RunMapForKinds(
                        op.key(),
                        kinds,
                        name_template,
                        self.DELETE_HANDLER,
                        self.INPUT_READER,
                        None, {},
                        mapreduce_params=mapreduce_params,
                        queue_name=queue,
                        max_shard_count=utils.MAPREDUCE_DEFAULT_SHARDS)
                    error = ''

                except Exception as e:
                    error = self._HandleException(e)

                parameters = [('job', job) for job in jobs]
                if error:
                    parameters.append(('error', error))
            else:
                parameters = [('xsrf_error', '1')]

        query = urllib.parse.urlencode(parameters)

        self.redirect('%s/%s?%s' % (config.BASE_PATH, self.SUFFIX, query))
Ejemplo n.º 8
0
    def _ProcessPostRequest(self):
        """Triggers backup restore mapper jobs and returns their ids."""
        backup_id = self.request.get('backup_id')
        if not backup_id:
            return [('error', 'Unspecified Backup.')]

        backup = db.get(db.Key(backup_id))
        if not backup:
            return [('error', 'Invalid Backup id.')]

        queue = self.request.get('queue')
        job_name = 'datastore_backup_%s' % re.sub(r'[^\w]', '_', backup.name)
        job_operation = None
        try:
            job_operation = utils.StartOperation('Restore from backup: %s' %
                                                 backup.name)
            mapper_params = self._GetBasicMapperParams()
            mapper_params['files'] = backup.blob_files
            mapreduce_params = {
                'backup_name': backup.name,
                'force_ops_writes': True
            }
            return [('job',
                     utils.StartMap(job_operation.key(),
                                    job_name,
                                    self.BACKUP_RESTORE_HANDLER,
                                    self.INPUT_READER,
                                    None,
                                    mapper_params,
                                    mapreduce_params,
                                    queue_name=queue))]
        except Exception:
            logging.exception(
                'Failed to start a restore from backup job "%s".', job_name)
            if job_operation:
                job_operation.status = utils.DatastoreAdminOperation.STATUS_FAILED
                job_operation.put(config=datastore_rpc.Configuration(
                    force_writes=True))
            raise
Ejemplo n.º 9
0
    def post(self):
        """Handler for post requests to datastore_admin/delete.do.

    Jobs are executed and user is redirected to the get handler.
    """
        namespace = self.request.get('namespace')
        kinds = self.request.get_all('kind')
        (namespace_str, kinds_str) = utils.GetPrintableStrs(namespace, kinds)
        token = self.request.get('xsrf_token')

        jobs = []
        if utils.ValidateXsrfToken(token, XSRF_ACTION):
            try:
                op = utils.StartOperation('Deleting %s%s' %
                                          (kinds_str, namespace_str))
                name_template = 'Delete all %(kind)s objects%(namespace)s'
                queue = self.request.get('queue')
                queue = queue or os.environ.get('HTTP_X_APPENGINE_QUEUENAME',
                                                'default')
                if queue[0] == '_':

                    queue = 'default'
                jobs = utils.RunMapForKinds(
                    op.key(),
                    kinds,
                    name_template,
                    self.DELETE_HANDLER,
                    self.INPUT_READER,
                    None, {},
                    queue_name=queue,
                    max_shard_count=utils.MAPREDUCE_DEFAULT_SHARDS)
                error = ''

            except Exception, e:
                error = self._HandleException(e)

            parameters = [('job', job) for job in jobs]
            if error:
                parameters.append(('error', error))
Ejemplo n.º 10
0
def _perform_backup(kinds, filesystem, gs_bucket_name, backup, queue,
                    mapper_params, max_jobs):
    """Triggers backup mapper jobs.

  Args:
    kinds: a sequence of kind names
    filesystem: files.BLOBSTORE_FILESYSTEM or files.GS_FILESYSTEM
        or None to default to blobstore
    gs_bucket_name: the GS file system bucket in which to store the backup
        when using the GS file system, and otherwise ignored
    backup: the backup name
    queue: the task queue for the backup task
    mapper_params: the mapper parameters
    max_jobs: if backup needs more jobs than this, defer them

  Returns:
    The job or task ids.

  Raises:
    BackupValidationException: On validation error.
    Exception: On other error.
  """
    BACKUP_COMPLETE_HANDLER = __name__ + '.BackupCompleteHandler'
    BACKUP_HANDLER = __name__ + '.BackupEntity.map'
    INPUT_READER = input_readers.__name__ + '.DatastoreEntityInputReader'
    OUTPUT_WRITER = output_writers.__name__ + '.FileRecordsOutputWriter'

    if not filesystem:
        filesystem = files.BLOBSTORE_FILESYSTEM
    if filesystem == files.GS_FILESYSTEM:

        if not gs_bucket_name:
            raise BackupValidationException('Bucket name missing.')
        bucket_name = gs_bucket_name.split('/', 1)[0]
        validate_gs_bucket_name(bucket_name)
        if not is_accessible_bucket_name(bucket_name):
            raise BackupValidationException('Bucket "%s" is not accessible' %
                                            bucket_name)
    elif filesystem == files.BLOBSTORE_FILESYSTEM:
        pass
    else:
        raise BackupValidationException('Unknown filesystem "%s".' %
                                        filesystem)

    job_name = 'datastore_backup_%s_%%(kind)s' % re.sub(r'[^\w]', '_', backup)
    try:
        job_operation = utils.StartOperation('Backup: %s' % backup)
        backup_info = BackupInformation(parent=job_operation)
        backup_info.filesystem = filesystem
        backup_info.name = backup
        backup_info.kinds = kinds
        backup_info.put(force_writes=True)
        mapreduce_params = {
            'done_callback_handler': BACKUP_COMPLETE_HANDLER,
            'backup_info_pk': str(backup_info.key()),
            'force_ops_writes': True,
        }
        mapper_params = dict(mapper_params)
        mapper_params['filesystem'] = filesystem
        if filesystem == files.GS_FILESYSTEM:
            mapper_params['gs_bucket_name'] = gs_bucket_name
        if len(kinds) <= max_jobs:
            return [('job', job) for job in _run_map_jobs(
                job_operation.key(), backup_info.key(), kinds, job_name,
                BACKUP_HANDLER, INPUT_READER, OUTPUT_WRITER, mapper_params,
                mapreduce_params, queue)]
        else:
            retry_options = taskqueue.TaskRetryOptions(task_retry_limit=1)
            deferred_task = deferred.defer(
                _run_map_jobs,
                job_operation.key(),
                backup_info.key(),
                kinds,
                job_name,
                BACKUP_HANDLER,
                INPUT_READER,
                OUTPUT_WRITER,
                mapper_params,
                mapreduce_params,
                queue,
                _queue=queue,
                _url=utils.ConfigDefaults.DEFERRED_PATH,
                _retry_options=retry_options)
            return [('task', deferred_task.name)]
    except Exception:
        logging.exception(
            'Failed to start a datastore backup job[s] for "%s".', job_name)
        if backup_info:
            delete_backup_info(backup_info)
        if job_operation:
            job_operation.status = utils.DatastoreAdminOperation.STATUS_FAILED
            job_operation.put(force_writes=True)
        raise