コード例 #1
0
ファイル: s3_tmpwatch.py プロジェクト: tianhuil/mrjob
def s3_cleanup(glob_path, time_old, dry_run=False, conf_paths=None):
    """Delete all files older than *time_old* in *path*.
       If *dry_run* is ``True``, then just log the files that need to be
       deleted without actually deleting them
       """
    runner = EMRJobRunner(conf_paths=conf_paths)

    log.info('Deleting all files in %s that are older than %s' %
             (glob_path, time_old))

    for path in runner.ls(glob_path):
        bucket_name, key_name = parse_s3_uri(path)
        bucket = runner.fs.get_bucket(bucket_name)

        for key in bucket.list(key_name):
            last_modified = iso8601_to_datetime(key.last_modified)
            age = datetime.utcnow() - last_modified
            if age > time_old:
                # Delete it
                log.info('Deleting %s; is %s old' % (key.name, age))
                if not dry_run:
                    key.delete()
コード例 #2
0
ファイル: s3_tmpwatch.py プロジェクト: ealliaume/mrjob
def s3_cleanup(glob_path, time_old, dry_run=False, conf_path=None):
    """Delete all files older than *time_old* in *path*.
       If *dry_run* is ``True``, then just log the files that need to be
       deleted without actually deleting them
       """
    runner = EMRJobRunner(conf_path=conf_path)
    s3_conn = runner.make_s3_conn()

    log.info("Deleting all files in %s that are older than %s" % (glob_path, time_old))

    for path in runner.ls(glob_path):
        bucket_name, key_name = parse_s3_uri(path)
        bucket = s3_conn.get_bucket(bucket_name)

        for key in bucket.list(key_name):
            last_modified = iso8601_to_datetime(key.last_modified)
            age = datetime.utcnow() - last_modified
            if age > time_old:
                # Delete it
                log.info("Deleting %s; is %s old" % (key.name, age))
                if not dry_run:
                    key.delete()
コード例 #3
0
ファイル: s3_tmpwatch.py プロジェクト: Dean838/mrjob
def _s3_cleanup(glob_path, time_old, dry_run=False, **runner_kwargs):
    """Delete all files older than *time_old* in *path*.

    If *dry_run* is true, then just log the files that need to be
    deleted without actually deleting them
    """
    runner = EMRJobRunner(**runner_kwargs)

    log.info('Deleting all files in %s that are older than %s' %
             (glob_path, time_old))

    for path in runner.fs.ls(glob_path):
        bucket_name, key_name = parse_s3_uri(path)
        bucket = runner.fs.get_bucket(bucket_name)

        for key in bucket.list(key_name):
            last_modified = iso8601_to_datetime(key.last_modified)
            age = datetime.utcnow() - last_modified
            if age > time_old:
                # Delete it
                log.info('Deleting %s; is %s old' % (key.name, age))
                if not dry_run:
                    key.delete()