def _ProcessPostRequest(self): """Triggers backup mapper jobs and returns their ids.""" backup = self.request.get('backup_name').strip() if not backup: return [('error', 'Unspecified Backup name.')] if BackupInformation.name_exists(backup): return [('error', 'Backup "%s" already exists.' % backup)] kinds = self.request.get('kind', allow_multiple=True) queue = self.request.get('queue') job_name = 'datastore_backup_%s_%%(kind)s' % re.sub( r'[^\w]', '_', backup) try: job_operation = utils.StartOperation('Backup: %s' % backup) backup_info = BackupInformation(parent=job_operation) backup_info.name = backup backup_info.kinds = kinds backup_info.put(config=datastore_rpc.Configuration( force_writes=True)) mapreduce_params = { 'done_callback_handler': self.BACKUP_COMPLETE_HANDLER, 'backup_info_pk': str(backup_info.key()), 'force_ops_writes': True } if len(kinds) <= 10: return [('job', job) for job in _run_map_jobs( job_operation.key(), backup_info.key(), kinds, job_name, self.BACKUP_HANDLER, self.INPUT_READER, self.OUTPUT_WRITER, self._GetBasicMapperParams(), mapreduce_params, queue)] else: retry_options = taskqueue.TaskRetryOptions(task_retry_limit=1) return [ ('task', deferred.defer(_run_map_jobs, job_operation.key(), backup_info.key(), kinds, job_name, self.BACKUP_HANDLER, self.INPUT_READER, self.OUTPUT_WRITER, self._GetBasicMapperParams(), mapreduce_params, queue, _queue=queue, _url=utils.ConfigDefaults.DEFERRED_PATH, _retry_options=retry_options).name) ] except Exception: logging.exception( 'Failed to start a datastore backup job[s] for "%s".', job_name) if job_operation: job_operation.status = utils.DatastoreAdminOperation.STATUS_FAILED job_operation.put(config=datastore_rpc.Configuration( force_writes=True)) raise
def create_datastore_write_config(mapreduce_spec): """Creates datastore config to use in write operations. Args: mapreduce_spec: current mapreduce specification as MapreduceSpec. Returns: an instance of datastore_rpc.Configuration to use for all write operations in the mapreduce. """ force_writes = parse_bool(mapreduce_spec.params.get("force_writes", "false")) if force_writes: return datastore_rpc.Configuration(force_writes=force_writes) else: return datastore_rpc.Configuration()
def testCallHooks_Pending_CallbackAddsMore(self): self.SetUpCallHooks() conf = datastore_rpc.Configuration(on_completion=self.NastyCallback) key = model.Key(flat=['Expando', 1]) self.conn.async_get(conf, [key]) self.conn.wait_for_all_pending_rpcs() self.assertEqual(self.conn.get_pending_rpcs(), set())
def finalize_backup_info(backup_info_key): backup_info = BackupInformation.get(backup_info_key) backup_info.complete_time = datetime.datetime.now() backup_info.blob_files = drop_empty_files(backup_info.blob_files) backup_info.put(config=datastore_rpc.Configuration(force_writes=True)) logging.info('Backup %s completed', backup_info.name)
def _apply_jobs(self): """Apply all jobs in current key range.""" while True: # Creates an unapplied query and fetches unapplied jobs in the result # range. unapplied_query = self._current_key_range.make_ascending_datastore_query( kind=None, keys_only=True) unapplied_query[ ConsistentKeyReader.UNAPPLIED_LOG_FILTER] = self.start_time_us unapplied_jobs = unapplied_query.Get(limit=self._batch_size) if not unapplied_jobs: return # There were some unapplied jobs. Roll them forward. keys_to_apply = [] for key in unapplied_jobs: # To apply the entity group we need to read something from it. # We use dummy kind and id because we don't actually need any data. path = key.to_path() + [ ConsistentKeyReader.DUMMY_KIND, ConsistentKeyReader.DUMMY_ID ] keys_to_apply.append( db.Key.from_path(_app=key.app(), namespace=key.namespace(), *path)) db.get(keys_to_apply, config=datastore_rpc.Configuration( deadline=10, read_policy=datastore_rpc.Configuration. APPLY_ALL_JOBS_CONSISTENCY))
def _apply_jobs(self, k_range): """Apply all jobs in the given key range.""" while True: unapplied_query = k_range.make_ascending_datastore_query( kind=None, keys_only=True) unapplied_query[ ConsistentKeyReader.UNAPPLIED_LOG_FILTER] = self.start_time_us unapplied_jobs = unapplied_query.Get(limit=self._batch_size) if not unapplied_jobs: return keys_to_apply = [] for key in unapplied_jobs: path = key.to_path() + [ ConsistentKeyReader.DUMMY_KIND, ConsistentKeyReader.DUMMY_ID ] keys_to_apply.append( db.Key.from_path(_app=key.app(), namespace=key.namespace(), *path)) db.get(keys_to_apply, config=datastore_rpc.Configuration( deadline=10, read_policy=datastore_rpc.Configuration. APPLY_ALL_JOBS_CONSISTENCY))
def _apply_jobs(self, unapplied_jobs): """Apply all jobs implied by the given keys.""" # There were some unapplied jobs. Roll them forward. keys_to_apply = [] for key in unapplied_jobs: # To apply the entity group we need to read something from it. # We use dummy kind and id because we don't actually need any data. path = key.to_path() + [ConsistentKeyReader.DUMMY_KIND, ConsistentKeyReader.DUMMY_ID] keys_to_apply.append( db.Key.from_path(_app=key.app(), namespace=key.namespace(), *path)) db.get(keys_to_apply, config=datastore_rpc.Configuration( deadline=10, read_policy=datastore_rpc.Configuration.APPLY_ALL_JOBS_CONSISTENCY))
def testRunWithRpcs(self): record = [] def foo(arg): record.append(arg) eventloop.queue_call(0.1, foo, 42) config = datastore_rpc.Configuration(on_completion=foo) rpc = self.conn.async_get(config, []) self.assertEqual(len(rpc.rpcs), 1) eventloop.queue_rpc(rpc) eventloop.run() self.assertEqual(record, [rpc.rpcs[0], 42]) self.assertEqual(rpc.state, 2) # TODO: Use apiproxy_rpc.RPC.FINISHING.
def testRunWithRpcs(self): apiproxy_stub_map.apiproxy = apiproxy_stub_map.APIProxyStubMap() stub = datastore_file_stub.DatastoreFileStub('_', None) apiproxy_stub_map.apiproxy.RegisterStub('datastore_v3', stub) record = [] def foo(arg): record.append(arg) eventloop.queue_call(0.1, foo, 42) conn = datastore_rpc.Connection() config = datastore_rpc.Configuration(on_completion=foo) rpc = conn.async_get(config, []) self.assertEqual(len(rpc.rpcs), 1) eventloop.queue_rpc(rpc) eventloop.run() self.assertEqual(record, [rpc.rpcs[0], 42]) self.assertEqual(rpc.state, 2) # TODO: Use apiproxy_rpc.RPC.FINISHING.
def _create_and_save_state(job_config, mapreduce_spec): """Save mapreduce state to datastore. Save state to datastore so that UI can see it immediately. Args: job_config: map_job.JobConfig. mapreduce_spec: model.MapreduceSpec, """ state = model.MapreduceState.create_new(job_config.job_id) state.mapreduce_spec = mapreduce_spec state.active = True state.active_shards = 0 state.app_id = job_config._app config = datastore_rpc.Configuration(force_writes=job_config._force_writes) state.put(config=config)
def testContext_MultiRpc(self): # This test really tests the proper handling of MultiRpc by # queue_rpc() in eventloop.py. It's easier to test from here, and # gives more assurance that it works. config = datastore_rpc.Configuration(max_get_keys=3, max_put_entities=3) self.ctx._conn = model.make_connection(config, default_model=model.Expando) @tasklets.tasklet def foo(): ents = [model.Expando() for _ in range(10)] futs = [self.ctx.put(ent) for ent in ents] keys = yield futs futs = [self.ctx.get(key) for key in keys] ents2 = yield futs self.assertEqual(ents2, ents) raise tasklets.Return(keys) keys = foo().get_result() self.assertEqual(len(keys), 10)
def _run_map_jobs(job_operation_key, backup_info_key, kinds, job_name, backup_handler, input_reader, output_writer, mapper_params, mapreduce_params, queue): backup_info = BackupInformation.get(backup_info_key) if not backup_info: return [] jobs = utils.RunMapForKinds(job_operation_key, kinds, job_name, backup_handler, input_reader, output_writer, mapper_params, mapreduce_params, queue_name=queue) backup_info.active_jobs = jobs backup_info.put(config=datastore_rpc.Configuration(force_writes=True)) return jobs
def BackupCompleteHandler(operation, job_id, mapreduce_state): """Updates BackupInformation record for a completed mapper job.""" mapreduce_spec = mapreduce_state.mapreduce_spec backup_info = BackupInformation.get( mapreduce_spec.params['backup_info_pk']) if backup_info: filenames = mapreduce_state.writer_state['filenames'] backup_info.blob_files = list(set(backup_info.blob_files + filenames)) if job_id in backup_info.active_jobs: backup_info.active_jobs.remove(job_id) backup_info.completed_jobs = list( set(backup_info.completed_jobs + [job_id])) backup_info.put(config=datastore_rpc.Configuration(force_writes=True)) if operation.status == utils.DatastoreAdminOperation.STATUS_COMPLETED: deferred.defer(finalize_backup_info, backup_info.key(), _url=utils.ConfigDefaults.DEFERRED_PATH) else: logging.warn('BackupInfo was not found for %s', mapreduce_spec.params['backup_info_pk'])
def BackupCompleteHandler(operation, job_id, mapreduce_state): """Updates BackupInformation record for a completed mapper job.""" mapreduce_spec = mapreduce_state.mapreduce_spec backup_info = BackupInformation.get(mapreduce_spec.params['backup_info_pk']) if backup_info: filenames = mapreduce_state.writer_state['filenames'] if backup_info.filesystem == files.BLOBSTORE_FILESYSTEM: filenames = drop_empty_files(filenames) backup_info.blob_files = list(set(backup_info.blob_files + filenames)) if job_id in backup_info.active_jobs: backup_info.active_jobs.remove(job_id) backup_info.completed_jobs = list( set(backup_info.completed_jobs + [job_id])) backup_info.put(config=datastore_rpc.Configuration(force_writes=True)) if operation.status == utils.DatastoreAdminOperation.STATUS_COMPLETED: finalize_backup_info(backup_info) else: logging.warn('BackupInfo was not found for %s', mapreduce_spec.params['backup_info_pk'])
def populate_pathological(start, end): """Populates both PhotoA and PhotoB with entities that produce worst case runtime when zigzaging between: coloration = Photo.COLORATION_BLACK_AND_WHITE and aspect = Photo.ASPECT_PANORAMIC This function is designed to be immutable. Args: count: The number of entities to create """ entities = [] for i in xrange(start, end): # Creating identical entities for both PhotoA and PhotoB if i % 2: # perfectly interweave in key order coloration = COLORATION_BLACK_AND_WHITE aspect = random.choice(ASPECTS[:-1]) key_name = 'path%dA' % (i / 2) else: coloration = random.choice(COLORATIONS[1:]) aspect = ASPECT_PANORAMIC key_name = 'path%dB' % (i / 2) seed = random.random() photoA = randomly_populate_photo(PhotoA(key_name=key_name), seed) photoB = randomly_populate_photo(PhotoB(key_name=key_name), seed) photoA.coloration = coloration photoB.coloration = coloration photoA.aspect = aspect photoB.aspect = aspect entities.append(photoA) entities.append(photoB) # Putting all entities in parallel_ config = datastore_rpc.Configuration(max_entity_groups_per_rpc=10) finish_rpcs([db.put_async(entities, config=config)])
def _ProcessPostRequest(self): """Triggers backup restore mapper jobs and returns their ids.""" backup_id = self.request.get('backup_id') if not backup_id: return [('error', 'Unspecified Backup.')] backup = db.get(db.Key(backup_id)) if not backup: return [('error', 'Invalid Backup id.')] queue = self.request.get('queue') job_name = 'datastore_backup_%s' % re.sub(r'[^\w]', '_', backup.name) job_operation = None try: job_operation = utils.StartOperation('Restore from backup: %s' % backup.name) mapper_params = self._GetBasicMapperParams() mapper_params['files'] = backup.blob_files mapreduce_params = { 'backup_name': backup.name, 'force_ops_writes': True } return [('job', utils.StartMap(job_operation.key(), job_name, self.BACKUP_RESTORE_HANDLER, self.INPUT_READER, None, mapper_params, mapreduce_params, queue_name=queue))] except Exception: logging.exception( 'Failed to start a restore from backup job "%s".', job_name) if job_operation: job_operation.status = utils.DatastoreAdminOperation.STATUS_FAILED job_operation.put(config=datastore_rpc.Configuration( force_writes=True)) raise
def _CreateDatastoreConfig(): """Create datastore config for use during datastore admin operations.""" return datastore_rpc.Configuration(force_writes=True)
def flush(self): """Flush all puts to datastore.""" if self.puts: datastore_rpc.Connection(config=datastore_rpc.Configuration( deadline=60)).put(self.puts) self.puts = []
def finalize_backup_info(backup_info): backup_info.complete_time = datetime.datetime.now() backup_info.put(config=datastore_rpc.Configuration(force_writes=True)) logging.info('Backup %s completed', backup_info.name)