Esempio n. 1
0
def test_collector(historical_role, buckets, mock_lambda_environment,
                   swag_accounts, current_s3_table):
    from historical.s3.collector import handler

    now = datetime.utcnow().replace(tzinfo=None, microsecond=0)
    create_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             source="aws.s3",
                             eventName="CreateBucket",
                             eventTime=now))
    data = json.dumps(create_event, default=serialize)
    data = KinesisRecordsFactory(
        records=[KinesisRecordFactory(kinesis=KinesisDataFactory(data=data))])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, None)
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert len(result) == 1
    # Verify that the tags are duplicated in the top level and configuration:
    assert len(result[0].Tags.attribute_values) == len(
        result[0].configuration.attribute_values["Tags"]) == 1
    assert result[0].Tags.attribute_values["theBucketName"] == \
           result[0].configuration.attribute_values["Tags"]["theBucketName"] == "testbucket1"  # noqa

    # Polling (make sure the date is included):
    polling_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={
            "bucketName": "testbucket1",
            "creationDate": now
        },
                             source="aws.s3",
                             eventName="DescribeBucket",
                             eventTime=now))
    data = json.dumps(polling_event, default=serialize)
    data = KinesisRecordsFactory(
        records=[KinesisRecordFactory(kinesis=KinesisDataFactory(data=data))])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, None)
    assert CurrentS3Model.count() == 1

    # Load the config and verify the polling timestamp is in there:
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert result[0].configuration["CreationDate"] == now.isoformat() + "Z"

    # And deletion:
    delete_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             source="aws.s3",
                             eventName="DeleteBucket",
                             eventTime=now))
    data = json.dumps(delete_event, default=serialize)
    data = KinesisRecordsFactory(
        records=[KinesisRecordFactory(kinesis=KinesisDataFactory(data=data))])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)
    handler(data, None)
    assert CurrentS3Model.count() == 0
Esempio n. 2
0
def test_collector(historical_role, buckets, mock_lambda_environment,
                   swag_accounts, current_s3_table):
    """Test the Collector."""
    from historical.s3.models import CurrentS3Model
    from historical.s3.collector import handler

    now = datetime.utcnow().replace(tzinfo=None, microsecond=0)
    create_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             eventSource="aws.s3",
                             eventName="CreateBucket",
                             eventTime=now))
    data = json.dumps(create_event, default=serialize)
    data = RecordsFactory(records=[SQSDataFactory(body=data)])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, mock_lambda_environment)
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert len(result) == 1
    assert result[0].Tags.attribute_values["theBucketName"] == "testbucket1"
    assert result[0].eventSource == "aws.s3"

    # Polling (make sure the date is included):
    polling_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={
            "bucketName": "testbucket1",
            "creationDate": now
        },
                             eventSource="historical.s3.poller",
                             eventName="PollS3",
                             eventTime=now))
    data = json.dumps(polling_event, default=serialize)
    data = RecordsFactory(records=[SQSDataFactory(body=data)])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, mock_lambda_environment)
    assert CurrentS3Model.count() == 1

    # Load the config and verify the polling timestamp is in there:
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert result[0].configuration["CreationDate"] == now.isoformat() + "Z"
    assert result[0].eventSource == "historical.s3.poller"

    # And deletion:
    delete_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             eventSource="aws.s3",
                             eventName="DeleteBucket",
                             eventTime=now))
    data = json.dumps(delete_event, default=serialize)
    data = RecordsFactory(records=[SQSDataFactory(body=data)])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)
    handler(data, mock_lambda_environment)
    assert CurrentS3Model.count() == 0
Esempio n. 3
0
def test_current_table(current_s3_table):
    from historical.s3.models import CurrentS3Model

    CurrentS3Model(**S3_BUCKET).save()

    items = list(CurrentS3Model.query('arn:aws:s3:::testbucket1'))

    assert len(items) == 1
    assert isinstance(items[0].ttl, int)
    assert items[0].ttl > 0
Esempio n. 4
0
def test_current_table(current_s3_table):  # pylint: disable=W0613
    """Tests for the Current PynamoDB model."""
    from historical.s3.models import CurrentS3Model

    CurrentS3Model(**S3_BUCKET).save()

    items = list(CurrentS3Model.query('arn:aws:s3:::testbucket1'))

    assert len(items) == 1
    assert isinstance(items[0].ttl, int)
    assert items[0].ttl > 0
Esempio n. 5
0
def process_durable_event(record, s3_report):
    """Processes a group of Historical Durable Table events."""
    if record.get(EVENT_TOO_BIG_FLAG):
        result = list(CurrentS3Model.query(record['arn']))

        # Is the record too big and also not found in the Current Table? Then delete it:
        if not result:
            record['item'] = {'configuration': {}, 'BucketName': record['arn'].split('arn:aws:s3:::')[1]}

        else:
            record['item'] = dict(result[0])

    if not record['item']['configuration']:
        log.debug(f"[ ] Processing deletion for: {record['item']['BucketName']}")
        s3_report["buckets"].pop(record['item']['BucketName'], None)
    else:
        log.debug(f"[ ] Processing: {record['item']['BucketName']}")
        s3_report["all_buckets"].append(record['item'])
Esempio n. 6
0
def test_snsproxy_dynamodb_differ(historical_role, current_s3_table,
                                  durable_s3_table, mock_lambda_environment,
                                  buckets):
    """
    This mostly checks that the differ is able to properly load the reduced dataset from the SNSProxy.
    """
    # Create the item in the current table:
    from historical.s3.collector import handler as current_handler
    from historical.s3.differ import handler as diff_handler
    from historical.s3.models import CurrentS3Model, DurableS3Model
    from historical.common.sns import shrink_sns_blob

    # Mock out the loggers:
    import historical.common.dynamodb
    old_logger = historical.common.dynamodb.log
    mocked_logger = MagicMock()
    historical.common.dynamodb.log = mocked_logger

    now = datetime.utcnow().replace(tzinfo=None, microsecond=0)
    create_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             eventSource="aws.s3",
                             eventName="CreateBucket",
                             eventTime=now))
    data = json.dumps(create_event, default=serialize)
    data = RecordsFactory(records=[SQSDataFactory(body=data)])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    current_handler(data, mock_lambda_environment)
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert len(result) == 1

    # Mock out the DDB Stream for this creation and for an item that is NOT in the current table::
    ttl = int(time.time() + TTL_EXPIRY)
    new_bucket = S3_BUCKET.copy()
    new_bucket['eventTime'] = datetime(
        year=2017, month=5, day=12, hour=10, minute=30,
        second=0).isoformat() + 'Z'
    new_bucket['ttl'] = ttl
    ddb_existing_item = DynamoDBRecordFactory(dynamodb=DynamoDBDataFactory(
        NewImage=new_bucket,
        Keys={'arn': new_bucket['arn']},
        OldImage=new_bucket),
                                              eventName='INSERT')

    missing_bucket = S3_BUCKET.copy()
    missing_bucket['eventTime'] = datetime(
        year=2017, month=5, day=12, hour=10, minute=30,
        second=0).isoformat() + 'Z'
    missing_bucket['ttl'] = ttl
    missing_bucket['BucketName'] = 'notinthecurrenttable'
    missing_bucket['arn'] = 'arn:aws:s3:::notinthecurrenttable'
    missing_bucket['configuration']['Name'] = 'notinthecurrenttable'
    ddb_missing_item = DynamoDBRecordFactory(dynamodb=DynamoDBDataFactory(
        NewImage=missing_bucket,
        Keys={'arn': 'arn:aws:s3:::notinthecurrenttable'},
        OldImage=new_bucket),
                                             eventName='INSERT')

    # Get the shrunken blob:
    shrunken_existing = json.dumps(
        shrink_sns_blob(
            json.loads(json.dumps(ddb_existing_item, default=serialize))))
    shrunken_missing = json.dumps(
        shrink_sns_blob(
            json.loads(json.dumps(ddb_missing_item, default=serialize))))

    records = RecordsFactory(records=[
        SQSDataFactory(body=json.dumps(
            SnsDataFactory(Message=shrunken_existing), default=serialize)),
        SQSDataFactory(body=json.dumps(
            SnsDataFactory(Message=shrunken_missing), default=serialize))
    ])
    records_event = json.loads(json.dumps(records, default=serialize))

    # Run the differ:
    diff_handler(records_event, mock_lambda_environment)

    # Verify that the existing bucket in the Current table is in the Durable table with the correct configuration:
    result = list(DurableS3Model.query("arn:aws:s3:::testbucket1"))
    assert len(result) == 1
    assert result[0].configuration.attribute_values['Name'] == 'testbucket1'

    # Verify that the missing bucket is ignored -- as it will be processed presumably later:
    result = list(DurableS3Model.query("arn:aws:s3:::notinthecurrenttable"))
    assert not result

    # Verify that the proper log statements were reached:
    assert mocked_logger.debug.called
    assert mocked_logger.error.called
    debug_calls = [
        '[-->] Item with ARN: arn:aws:s3:::notinthecurrenttable was too big for SNS '
        '-- fetching it from the Current table...',
        '[+] Saving new revision to durable table.',
        '[-->] Item with ARN: arn:aws:s3:::testbucket1 was too big for SNS -- fetching it from the Current table...'
    ]
    for dc in debug_calls:
        mocked_logger.debug.assert_any_call(dc)

    mocked_logger.error.assert_called_once_with(
        '[?] Received item too big for SNS, and was not able to '
        'find the original item with ARN: arn:aws:s3:::notinthecurrenttable')

    # Unmock the logger:
    historical.common.dynamodb.log = old_logger