예제 #1
0
def record_set(session_factory, bucket, key_prefix, start_date):
    """Retrieve all s3 records for the given policy output url

    From the given start date.
    """

    s3 = local_session(session_factory).client('s3')

    records = []
    key_count = 0

    marker = key_prefix.strip("/") + "/" + start_date.strftime(
        '%Y/%m/%d/00') + "/resources.json.gz"

    p = s3.get_paginator('list_objects').paginate(
        Bucket=bucket, Prefix=key_prefix.strip('/') + '/', Marker=marker)

    with ThreadPoolExecutor(max_workers=20) as w:
        for key_set in p:
            if 'Contents' not in key_set:
                continue
            keys = [
                k for k in key_set['Contents']
                if k['Key'].endswith('resources.json.gz')
            ]
            key_count += len(keys)
            futures = map(
                lambda k: w.submit(get_records, bucket, k, session_factory),
                keys)

            for f in as_completed(futures):
                records.extend(f.result())

    log.info("Fetched %d records across %d files" % (len(records), key_count))
    return records
예제 #2
0
def index_account_trails(config, account, region, date, directory):
    es_client = get_es_client(config)

    s3 = local_session(lambda: SessionFactory(region,
                                              profile=account.get('profile'),
                                              assume_role=account.get('role'))
                       ()).client('s3')

    bucket = account['bucket']
    key_prefix = "accounts/{}/{}/traildb".format(account['name'], region)
    marker = "{}/{}/trail.db.bz2".format(key_prefix, date)

    p = s3.get_paginator('list_objects_v2').paginate(
        Bucket=bucket,
        Prefix=key_prefix,
        StartAfter=marker,
    )

    with ThreadPoolExecutor(max_workers=20) as w:
        for key_set in p:
            if 'Contents' not in key_set:
                continue
            keys = []
            for k in key_set['Contents']:
                if (k['Key'].endswith('trail.db.bz2')
                        and valid_date(k['Key'], date)):
                    keys.append(k)

            futures = map(
                lambda k: w.submit(
                    get_traildb, bucket, k, lambda: SessionFactory(
                        region,
                        profile=account.get('profile'),
                        assume_role=account.get('role'))(), directory), keys)

            for f in as_completed(futures):
                local_db_file = f.result()
                connection = sqlite3.connect(local_db_file)
                connection.row_factory = dict_factory
                cursor = connection.cursor()
                index_events(es_client,
                             fetch_events(cursor, config, account['name']))
                connection.close()

                try:
                    os.remove(local_db_file)
                except:
                    log.warning("Failed to remove temporary file: {}".format(
                        local_db_file))
                    pass
예제 #3
0
def log_entries_from_s3(session_factory, output, start, end):
    client = local_session(session_factory).client('s3')
    key_prefix = output.key_prefix.strip('/')
    local_tz = tz.tzlocal()
    start = datetime.fromtimestamp(
        _timestamp_from_string(start) / 1000
    )
    end = datetime.fromtimestamp(
        _timestamp_from_string(end) / 1000
    ).replace(tzinfo=local_tz)
    records = []
    key_count = 0
    log_filename = 'custodian-run.log.gz'
    marker = '{}/{}/{}'.format(
        key_prefix,
        start.strftime('%Y/%m/%d/00'),
        log_filename,
    )
    p = client.get_paginator('list_objects_v2').paginate(
        Bucket=output.bucket,
        Prefix=key_prefix + '/',
        StartAfter=marker,
    )
    with ThreadPoolExecutor(max_workers=20) as w:
        for key_set in p:
            if 'Contents' not in key_set:
                continue
            log_keys = [k for k in key_set['Contents']
                    if k['Key'].endswith(log_filename)]
            keys = [k for k in log_keys if k['LastModified'] < end]
            if len(log_keys) >= 1 and len(keys) == 0:
                # there were logs, but we're now past the end date
                break
            key_count += len(keys)
            futures = map(
                lambda k:
                    w.submit(get_records, output.bucket, k, session_factory),
                keys,
            )

            for f in as_completed(futures):
                records.extend(f.result())

    log.info('Fetched {} records across {} files'.format(
        len(records),
        key_count,
    ))
    return records