def s3_cleanup(glob_path, time_old, dry_run=False, conf_paths=None): """Delete all files older than *time_old* in *path*. If *dry_run* is ``True``, then just log the files that need to be deleted without actually deleting them """ runner = EMRJobRunner(conf_paths=conf_paths) log.info('Deleting all files in %s that are older than %s' % (glob_path, time_old)) for path in runner.ls(glob_path): bucket_name, key_name = parse_s3_uri(path) bucket = runner.fs.get_bucket(bucket_name) for key in bucket.list(key_name): last_modified = iso8601_to_datetime(key.last_modified) age = datetime.utcnow() - last_modified if age > time_old: # Delete it log.info('Deleting %s; is %s old' % (key.name, age)) if not dry_run: key.delete()
def s3_cleanup(glob_path, time_old, dry_run=False, conf_path=None): """Delete all files older than *time_old* in *path*. If *dry_run* is ``True``, then just log the files that need to be deleted without actually deleting them """ runner = EMRJobRunner(conf_path=conf_path) s3_conn = runner.make_s3_conn() log.info("Deleting all files in %s that are older than %s" % (glob_path, time_old)) for path in runner.ls(glob_path): bucket_name, key_name = parse_s3_uri(path) bucket = s3_conn.get_bucket(bucket_name) for key in bucket.list(key_name): last_modified = iso8601_to_datetime(key.last_modified) age = datetime.utcnow() - last_modified if age > time_old: # Delete it log.info("Deleting %s; is %s old" % (key.name, age)) if not dry_run: key.delete()
def _s3_cleanup(glob_path, time_old, dry_run=False, **runner_kwargs): """Delete all files older than *time_old* in *path*. If *dry_run* is true, then just log the files that need to be deleted without actually deleting them """ runner = EMRJobRunner(**runner_kwargs) log.info('Deleting all files in %s that are older than %s' % (glob_path, time_old)) for path in runner.fs.ls(glob_path): bucket_name, key_name = parse_s3_uri(path) bucket = runner.fs.get_bucket(bucket_name) for key in bucket.list(key_name): last_modified = iso8601_to_datetime(key.last_modified) age = datetime.utcnow() - last_modified if age > time_old: # Delete it log.info('Deleting %s; is %s old' % (key.name, age)) if not dry_run: key.delete()