Beispiel #1
0
def test_collector_on_deleted_bucket(historical_role, buckets, mock_lambda_environment, swag_accounts,
                                     current_s3_table):
    from historical.s3.collector import handler

    # If an event arrives on a bucket that is deleted, then it should skip
    # and wait until the Deletion event arrives.
    create_event = CloudwatchEventFactory(
        detail=DetailFactory(
            requestParameters={
                "bucketName": "not-a-bucket"
            },
            source="aws.s3",
            eventName="PutBucketPolicy",
        )
    )
    create_event_data = json.dumps(create_event, default=serialize)
    data = KinesisRecordsFactory(
        records=[
            KinesisRecordFactory(
                kinesis=KinesisDataFactory(data=create_event_data))
        ]
    )
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, None)
    assert CurrentS3Model.count() == 0
Beispiel #2
0
def test_poller(historical_role, buckets, mock_lambda_environment,
                historical_sqs, swag_accounts):
    from historical.s3.poller import handler
    handler({}, None)

    # Need to ensure that 51 total buckets were added into SQS:
    sqs = boto3.client("sqs", region_name="us-east-1")
    queue_url = get_queue_url(os.environ['POLLER_QUEUE_NAME'])

    all_buckets = {"SWAG": True}
    for i in range(0, 50):
        all_buckets["testbucket{}".format(i)] = True

    # Loop through the queue and make sure all buckets are accounted for:
    for i in range(0, 6):
        messages = sqs.receive_message(QueueUrl=queue_url,
                                       MaxNumberOfMessages=10)['Messages']
        message_ids = []

        for m in messages:
            message_ids.append({
                "Id": m['MessageId'],
                "ReceiptHandle": m['ReceiptHandle']
            })
            data = s3_polling_schema.loads(m['Body']).data

            assert all_buckets[data["detail"]["request_parameters"]
                               ["bucket_name"]]
            assert datetime.strptime(
                data["detail"]["request_parameters"]["creation_date"],
                '%Y-%m-%dT%H:%M:%SZ')
            assert data["detail"]["event_source"] == "historical.s3.poller"

            # Remove from the dict (at the end, there should be 0 items left)
            del all_buckets[data["detail"]["request_parameters"]
                            ["bucket_name"]]

        sqs.delete_message_batch(QueueUrl=queue_url, Entries=message_ids)

    assert len(all_buckets) == 0

    # Check that an exception raised doesn't break things:
    import historical.s3.poller

    def mocked_poller(account, stream):
        raise ClientError({"Error": {
            "Message": "",
            "Code": "AccessDenied"
        }}, "sts:AssumeRole")

    old_method = historical.s3.poller.produce_events  # For pytest inter-test issues...
    historical.s3.poller.produce_events = mocked_poller
    handler({}, None)
    historical.s3.poller.produce_events = old_method
Beispiel #3
0
def test_poller(historical_role, buckets, mock_lambda_environment,
                historical_kinesis, swag_accounts):
    from historical.s3.poller import handler
    os.environ["MAX_BUCKET_BATCH"] = "4"
    handler({}, None)

    # Need to ensure that 50 Buckets were added to the stream:
    kinesis = boto3.client("kinesis", region_name="us-east-1")

    all_buckets = {"SWAG": True}
    for i in range(0, 50):
        all_buckets["testbucket{}".format(i)] = True

    # Loop through the stream and make sure all buckets are accounted for:
    shard_id = kinesis.describe_stream(
        StreamName="historicalstream"
    )["StreamDescription"]["Shards"][0]["ShardId"]
    iterator = kinesis.get_shard_iterator(
        StreamName="historicalstream",
        ShardId=shard_id,
        ShardIteratorType="AT_SEQUENCE_NUMBER",
        StartingSequenceNumber="0")
    records = kinesis.get_records(ShardIterator=iterator["ShardIterator"])
    for r in records["Records"]:
        data = s3_polling_schema.loads(r["Data"]).data

        assert all_buckets[data["detail"]["request_parameters"]["bucket_name"]]
        assert datetime.strptime(
            data["detail"]["request_parameters"]["creation_date"],
            '%Y-%m-%dT%H:%M:%SZ')

        # Remove from the dict (at the end, there should be 0 items left)
        del all_buckets[data["detail"]["request_parameters"]["bucket_name"]]

    assert len(all_buckets) == 0

    # Check that an exception raised doesn't break things:
    import historical.s3.poller

    def mocked_poller(account, stream):
        raise ClientError({"Error": {
            "Message": "",
            "Code": "AccessDenied"
        }}, "sts:AssumeRole")

    old_method = historical.s3.poller.create_polling_event  # For pytest inter-test issues...
    historical.s3.poller.create_polling_event = mocked_poller
    handler({}, None)
    historical.s3.poller.create_polling_event = old_method
Beispiel #4
0
def make_poller_events():
    """A sort-of fixture to make polling events for tests."""
    from historical.s3.poller import poller_tasker_handler as handler
    handler({}, None)

    # Need to ensure that all of the accounts and regions were properly tasked (only 1 region for S3):
    sqs = boto3.client("sqs", region_name="us-east-1")
    queue_url = get_queue_url(os.environ['POLLER_TASKER_QUEUE_NAME'])
    messages = sqs.receive_message(QueueUrl=queue_url,
                                   MaxNumberOfMessages=10)['Messages']

    # 'Body' needs to be made into 'body' for proper parsing later:
    for m in messages:
        m['body'] = m.pop('Body')

    return messages
Beispiel #5
0
def test_collector(historical_role, buckets, mock_lambda_environment,
                   swag_accounts, current_s3_table):
    from historical.s3.collector import handler

    now = datetime.utcnow().replace(tzinfo=None, microsecond=0)
    create_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             source="aws.s3",
                             eventName="CreateBucket",
                             eventTime=now))
    data = json.dumps(create_event, default=serialize)
    data = KinesisRecordsFactory(
        records=[KinesisRecordFactory(kinesis=KinesisDataFactory(data=data))])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, None)
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert len(result) == 1
    # Verify that the tags are duplicated in the top level and configuration:
    assert len(result[0].Tags.attribute_values) == len(
        result[0].configuration.attribute_values["Tags"]) == 1
    assert result[0].Tags.attribute_values["theBucketName"] == \
           result[0].configuration.attribute_values["Tags"]["theBucketName"] == "testbucket1"  # noqa

    # Polling (make sure the date is included):
    polling_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={
            "bucketName": "testbucket1",
            "creationDate": now
        },
                             source="aws.s3",
                             eventName="DescribeBucket",
                             eventTime=now))
    data = json.dumps(polling_event, default=serialize)
    data = KinesisRecordsFactory(
        records=[KinesisRecordFactory(kinesis=KinesisDataFactory(data=data))])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, None)
    assert CurrentS3Model.count() == 1

    # Load the config and verify the polling timestamp is in there:
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert result[0].configuration["CreationDate"] == now.isoformat() + "Z"

    # And deletion:
    delete_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             source="aws.s3",
                             eventName="DeleteBucket",
                             eventTime=now))
    data = json.dumps(delete_event, default=serialize)
    data = KinesisRecordsFactory(
        records=[KinesisRecordFactory(kinesis=KinesisDataFactory(data=data))])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)
    handler(data, None)
    assert CurrentS3Model.count() == 0
Beispiel #6
0
def test_collector(historical_role, buckets, mock_lambda_environment,
                   swag_accounts, current_s3_table):
    """Test the Collector."""
    from historical.s3.models import CurrentS3Model
    from historical.s3.collector import handler

    now = datetime.utcnow().replace(tzinfo=None, microsecond=0)
    create_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             eventSource="aws.s3",
                             eventName="CreateBucket",
                             eventTime=now))
    data = json.dumps(create_event, default=serialize)
    data = RecordsFactory(records=[SQSDataFactory(body=data)])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, mock_lambda_environment)
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert len(result) == 1
    assert result[0].Tags.attribute_values["theBucketName"] == "testbucket1"
    assert result[0].eventSource == "aws.s3"

    # Polling (make sure the date is included):
    polling_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={
            "bucketName": "testbucket1",
            "creationDate": now
        },
                             eventSource="historical.s3.poller",
                             eventName="PollS3",
                             eventTime=now))
    data = json.dumps(polling_event, default=serialize)
    data = RecordsFactory(records=[SQSDataFactory(body=data)])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)

    handler(data, mock_lambda_environment)
    assert CurrentS3Model.count() == 1

    # Load the config and verify the polling timestamp is in there:
    result = list(CurrentS3Model.query("arn:aws:s3:::testbucket1"))
    assert result[0].configuration["CreationDate"] == now.isoformat() + "Z"
    assert result[0].eventSource == "historical.s3.poller"

    # And deletion:
    delete_event = CloudwatchEventFactory(
        detail=DetailFactory(requestParameters={"bucketName": "testbucket1"},
                             eventSource="aws.s3",
                             eventName="DeleteBucket",
                             eventTime=now))
    data = json.dumps(delete_event, default=serialize)
    data = RecordsFactory(records=[SQSDataFactory(body=data)])
    data = json.dumps(data, default=serialize)
    data = json.loads(data)
    handler(data, mock_lambda_environment)
    assert CurrentS3Model.count() == 0
Beispiel #7
0
def test_differ(durable_s3_table, mock_lambda_environment):
    from historical.s3.models import DurableS3Model
    from historical.s3.differ import handler
    from historical.models import TTL_EXPIRY

    ttl = int(time.time() + TTL_EXPIRY)
    new_bucket = S3_BUCKET.copy()
    new_bucket['eventTime'] = datetime(
        year=2017, month=5, day=12, hour=10, minute=30,
        second=0).isoformat() + 'Z'
    new_bucket["ttl"] = ttl
    new_item = DynamoDBRecordsFactory(records=[
        DynamoDBRecordFactory(dynamodb=DynamoDBDataFactory(
            NewImage=new_bucket, Keys={'arn': new_bucket['arn']}),
                              eventName='INSERT')
    ])
    data = json.loads(json.dumps(new_item, default=serialize))
    handler(data, None)
    assert DurableS3Model.count() == 1

    # Test duplicates don't change anything:
    data = json.loads(json.dumps(new_item, default=serialize))
    handler(data, None)
    assert DurableS3Model.count() == 1

    # Test ephemeral changes don't add new models:
    ephemeral_changes = S3_BUCKET.copy()
    ephemeral_changes["eventTime"] = \
        datetime(year=2017, month=5, day=12, hour=11, minute=30, second=0).isoformat() + 'Z'
    ephemeral_changes["configuration"]["_version"] = 99999
    ephemeral_changes["ttl"] = ttl

    data = DynamoDBRecordsFactory(records=[
        DynamoDBRecordFactory(dynamodb=DynamoDBDataFactory(
            NewImage=ephemeral_changes, Keys={'arn': ephemeral_changes['arn']
                                              }),
                              eventName='MODIFY')
    ])
    data = json.loads(json.dumps(data, default=serialize))
    handler(data, None)
    assert DurableS3Model.count() == 1

    # Add an update:
    new_changes = S3_BUCKET.copy()
    new_date = datetime(
        year=2017, month=5, day=12, hour=11, minute=30,
        second=0).isoformat() + 'Z'
    new_changes["eventTime"] = new_date
    new_changes["Tags"] = {"ANew": "Tag"}
    new_changes["configuration"]["Tags"] = {"ANew": "Tag"}
    new_changes["ttl"] = ttl
    data = DynamoDBRecordsFactory(records=[
        DynamoDBRecordFactory(dynamodb=DynamoDBDataFactory(
            NewImage=new_changes, Keys={'arn': new_changes['arn']}),
                              eventName='MODIFY')
    ])
    data = json.loads(json.dumps(data, default=serialize))
    handler(data, None)
    results = list(DurableS3Model.query("arn:aws:s3:::testbucket1"))
    assert len(results) == 2
    assert results[1].Tags["ANew"] == results[
        1].configuration.attribute_values["Tags"]["ANew"] == "Tag"
    assert results[1].eventTime == new_date

    # And deletion (ensure new record -- testing TTL):
    delete_bucket = S3_BUCKET.copy()
    delete_bucket["eventTime"] = datetime(
        year=2017, month=5, day=12, hour=12, minute=30,
        second=0).isoformat() + 'Z'
    delete_bucket["ttl"] = ttl
    data = DynamoDBRecordsFactory(records=[
        DynamoDBRecordFactory(dynamodb=DynamoDBDataFactory(
            OldImage=delete_bucket, Keys={'arn': delete_bucket['arn']}),
                              eventName='REMOVE',
                              userIdentity=UserIdentityFactory(
                                  type='Service',
                                  principalId='dynamodb.amazonaws.com'))
    ])
    data = json.loads(json.dumps(data, default=serialize))
    handler(data, None)
    assert DurableS3Model.count() == 3
Beispiel #8
0
def test_poller_processor_handler(historical_role, buckets,
                                  mock_lambda_environment, historical_sqs,
                                  swag_accounts):
    """Test the Poller's processing component that tasks the collector."""
    from historical.s3.poller import poller_processor_handler as handler

    # Create the events and SQS records:
    messages = make_poller_events()
    event = json.loads(
        json.dumps(RecordsFactory(records=messages), default=serialize))

    # Run the collector:
    handler(event, None)

    # Need to ensure that 51 total buckets were added into SQS:
    sqs = boto3.client("sqs", region_name="us-east-1")
    queue_url = get_queue_url(os.environ['POLLER_QUEUE_NAME'])

    all_buckets = {"SWAG": True}
    for i in range(0, 50):
        all_buckets[f"testbucket{i}"] = True

    # Loop through the queue and make sure all buckets are accounted for:
    for i in range(0, 6):
        messages = sqs.receive_message(QueueUrl=queue_url,
                                       MaxNumberOfMessages=10)['Messages']
        message_ids = []

        for msg in messages:
            message_ids.append({
                "Id": msg['MessageId'],
                "ReceiptHandle": msg['ReceiptHandle']
            })
            data = S3_POLLING_SCHEMA.loads(msg['Body']).data

            assert all_buckets[data["detail"]["request_parameters"]
                               ["bucket_name"]]
            assert datetime.strptime(
                data["detail"]["request_parameters"]["creation_date"],
                '%Y-%m-%dT%H:%M:%SZ')
            assert data["detail"]["event_source"] == "historical.s3.poller"

            # Remove from the dict (at the end, there should be 0 items left)
            del all_buckets[data["detail"]["request_parameters"]
                            ["bucket_name"]]

        sqs.delete_message_batch(QueueUrl=queue_url, Entries=message_ids)

    assert not all_buckets

    # Check that an exception raised doesn't break things:
    import historical.s3.poller

    def mocked_poller(account, stream, randomize_delay=0):  # pylint: disable=W0613
        raise ClientError({"Error": {
            "Message": "",
            "Code": "AccessDenied"
        }}, "sts:AssumeRole")

    old_method = historical.s3.poller.produce_events  # For pytest inter-test issues...
    historical.s3.poller.produce_events = mocked_poller
    handler(event, None)
    historical.s3.poller.produce_events = old_method