def test_find_new_partitions(mocker):
    date_part = GroupedDatePartitioner(s3_location="s3://nowhere")
    today = datetime.utcfromtimestamp(time.time()).date()
    yesterday = today - timedelta(days=1)
    existing_part = ['us-west-2'] + yesterday.__str__().split('-')

    s3_stub = S3Stubber.for_single_request('list_objects_v2',
                                           today_objects_request(),
                                           today_objects())
    with mocker.patch('boto3.client', return_value=s3_stub.client):
        with s3_stub.stubber:
            new_partitions = date_part.find_recent_partitions([existing_part])

    assert len(new_partitions) == 1
    assert new_partitions == [['us-west-2'] + today.__str__().split('-')]
def test_partition_scanner(mocker):
    date_part = GroupedDatePartitioner(s3_location="s3://nowhere/some_logs")
    today = datetime.utcfromtimestamp(time.time()).date()

    s3_stub = S3Stubber('list_objects_v2')

    s3_stub.add_response(response_with_prefixes(), delimiter_request())
    # Add in a response for each region
    for i in xrange(2):
        s3_stub.add_response(
            response_for_objects(region_s3_keys()[i]),
            prefix_request(
                response_with_prefixes()['CommonPrefixes'][i]['Prefix']))

    with mocker.patch('boto3.client', return_value=s3_stub.client):
        with s3_stub.stubber:
            new_tuples = date_part.build_partitions_from_s3()

    # The first partition will be the first key in the sample data
    # The last partition will be today in the region of the last key
    assert new_tuples[0] == ['us-west-2', '2017', '08', '11']
    assert new_tuples[-1] == ['us-east-1'] + today.__str__().split('-')
Ejemplo n.º 3
0
 def get_partitioner(self):
     return GroupedDatePartitioner(s3_location=self.s3_location, hive_compatible=True)
def test_partition_key():
    """We should have a region in the partition key"""
    date_part = GroupedDatePartitioner(s3_location="s3://nowhere")
    key_names = [x['Name'] for x in date_part.partition_keys()]
    assert 'region' in key_names