def delete_entry_and_gs_entry(keys_to_delete): """Deletes synchronously a list of ContentEntry and their GS files. It deletes the ContentEntry first, then the files in GS. The worst case is that the GS files are left behind and will be reaped by a lost GS task queue. The reverse is much worse, having a ContentEntry pointing to a deleted GS entry will lead to lookup failures. """ # Always delete ContentEntry first. ndb.delete_multi(keys_to_delete) # Note that some content entries may NOT have corresponding GS files. That # happens for small entries stored inline in the datastore or memcache. Since # this function operates only on keys, it can't distinguish "large" entries # stored in GS from "small" ones stored inline. So instead it tries to delete # all corresponding GS files, silently skipping ones that are not there. gcs.delete_files(config.settings().gs_bucket, (i.id() for i in keys_to_delete), ignore_missing=True)
def delete_entry_and_gs_entry(keys_to_delete): """Deletes synchronously a list of ContentEntry and their GS files. It deletes the ContentEntry first, then the files in GS. The worst case is that the GS files are left behind and will be reaped by a lost GS task queue. The reverse is much worse, having a ContentEntry pointing to a deleted GS entry will lead to lookup failures. """ # Always delete ContentEntry first. ndb.delete_multi(keys_to_delete) # Note that some content entries may NOT have corresponding GS files. That # happens for small entries stored inline in the datastore or memcache. Since # this function operates only on keys, it can't distinguish "large" entries # stored in GS from "small" ones stored inline. So instead it tries to delete # all corresponding GS files, silently skipping ones that are not there. gcs.delete_files( config.settings().gs_bucket, (i.id() for i in keys_to_delete), ignore_missing=True)
def post(self): logging.info('Deleting ContentEntry') incremental_delete(model.ContentEntry.query().iter(keys_only=True), ndb.delete_multi_async) gs_bucket = config.settings().gs_bucket logging.info('Deleting GS bucket %s', gs_bucket) incremental_delete( (i[0] for i in gcs.list_files(gs_bucket)), lambda filenames: gcs.delete_files(gs_bucket, filenames)) logging.info('Flushing memcache') # High priority (.isolated files) are cached explicitly. Make sure ghosts # are zapped too. memcache.flush_all() logging.info('Finally done!')
def post(self): logging.info('Deleting ContentEntry') incremental_delete( model.ContentEntry.query().iter(keys_only=True), ndb.delete_multi_async) gs_bucket = config.settings().gs_bucket logging.info('Deleting GS bucket %s', gs_bucket) incremental_delete( (i[0] for i in gcs.list_files(gs_bucket)), lambda filenames: gcs.delete_files(gs_bucket, filenames)) logging.info('Flushing memcache') # High priority (.isolated files) are cached explicitly. Make sure ghosts # are zapped too. memcache.flush_all() logging.info('Finally done!')
def post(self): """Enumerates all GS files and delete those that do not have an associated ContentEntry. """ gs_bucket = config.settings().gs_bucket def filter_missing(): futures = {} cutoff = time.time() - 60*60 for filepath, filestats in gcs.list_files(gs_bucket): # If the file was uploaded in the last hour, ignore it. if filestats.st_ctime >= cutoff: continue # This must match the logic in model.entry_key(). Since this request # will in practice touch every item, do not use memcache since it'll # mess it up by loading every items in it. # TODO(maruel): Batch requests to use get_multi_async() similar to # datastore_utils.page_queries(). future = model.entry_key_from_id(filepath).get_async( use_cache=False, use_memcache=False) futures[future] = filepath if len(futures) > 20: future = ndb.Future.wait_any(futures) filepath = futures.pop(future) if future.get_result(): continue yield filepath while futures: future = ndb.Future.wait_any(futures) filepath = futures.pop(future) if future.get_result(): continue yield filepath gs_delete = lambda filenames: gcs.delete_files(gs_bucket, filenames) total = incremental_delete(filter_missing(), gs_delete) logging.info('Deleted %d lost GS files', total)
def post(self): """Enumerates all GS files and delete those that do not have an associated ContentEntry. """ gs_bucket = config.settings().gs_bucket def filter_missing(): futures = {} cutoff = time.time() - 60 * 60 for filepath, filestats in gcs.list_files(gs_bucket): # If the file was uploaded in the last hour, ignore it. if filestats.st_ctime >= cutoff: continue # This must match the logic in model.get_entry_key(). Since this request # will in practice touch every item, do not use memcache since it'll # mess it up by loading every items in it. # TODO(maruel): Batch requests to use get_multi_async() similar to # datastore_utils.page_queries(). future = model.entry_key_from_id(filepath).get_async( use_cache=False, use_memcache=False) futures[future] = filepath if len(futures) > 20: future = ndb.Future.wait_any(futures) filepath = futures.pop(future) if future.get_result(): continue yield filepath while futures: future = ndb.Future.wait_any(futures) filepath = futures.pop(future) if future.get_result(): continue yield filepath gs_delete = lambda filenames: gcs.delete_files(gs_bucket, filenames) total = incremental_delete(filter_missing(), gs_delete) logging.info('Deleted %d lost GS files', total)