def test_find_new_partitions(mocker): date_part = GroupedDatePartitioner(s3_location="s3://nowhere") today = datetime.utcfromtimestamp(time.time()).date() yesterday = today - timedelta(days=1) existing_part = ['us-west-2'] + yesterday.__str__().split('-') s3_stub = S3Stubber.for_single_request('list_objects_v2', today_objects_request(), today_objects()) with mocker.patch('boto3.client', return_value=s3_stub.client): with s3_stub.stubber: new_partitions = date_part.find_recent_partitions([existing_part]) assert len(new_partitions) == 1 assert new_partitions == [['us-west-2'] + today.__str__().split('-')]
def test_partition_scanner(mocker): date_part = GroupedDatePartitioner(s3_location="s3://nowhere/some_logs") today = datetime.utcfromtimestamp(time.time()).date() s3_stub = S3Stubber('list_objects_v2') s3_stub.add_response(response_with_prefixes(), delimiter_request()) # Add in a response for each region for i in xrange(2): s3_stub.add_response( response_for_objects(region_s3_keys()[i]), prefix_request( response_with_prefixes()['CommonPrefixes'][i]['Prefix'])) with mocker.patch('boto3.client', return_value=s3_stub.client): with s3_stub.stubber: new_tuples = date_part.build_partitions_from_s3() # The first partition will be the first key in the sample data # The last partition will be today in the region of the last key assert new_tuples[0] == ['us-west-2', '2017', '08', '11'] assert new_tuples[-1] == ['us-east-1'] + today.__str__().split('-')
def get_partitioner(self): return GroupedDatePartitioner(s3_location=self.s3_location, hive_compatible=True)
def test_partition_key(): """We should have a region in the partition key""" date_part = GroupedDatePartitioner(s3_location="s3://nowhere") key_names = [x['Name'] for x in date_part.partition_keys()] assert 'region' in key_names