def test_partition_scanner(mocker): date_part = DatePartitioner(s3_location="s3://nowhere") today = datetime.utcfromtimestamp(time.time()).date() s3_stub = S3Stubber.for_single_request('list_objects_v2', request_params(), [basic_s3_key()]) with mocker.patch('boto3.client', return_value=s3_stub.client): with s3_stub.stubber: new_tuples = date_part.build_partitions_from_s3() assert new_tuples[0] == ['2017', '08', '11'] assert new_tuples[-1] == today.__str__().split('-')
def test_find_new_partitions(mocker): date_part = DatePartitioner(s3_location="s3://nowhere") existing_part = yesterday().__str__().split('-') s3_stub = S3Stubber.for_single_request('list_objects_v2', today_objects_request(), today_objects()) with mocker.patch('boto3.client', return_value=s3_stub.client): with s3_stub.stubber: new_partitions = date_part.find_recent_partitions([existing_part]) assert len(new_partitions) == 1 assert new_partitions == [today().__str__().split('-')]
def test_find_all_new_partitions(mocker): date_part = DatePartitioner(s3_location="s3://nowhere") requests = [] # Create request parameters for every day since (and including) today for i in range(DatePartitioner.MAX_RECENT_DAYS): requests.append(list_request_for_ts(today() - timedelta(days=i))) s3_stub = S3Stubber.for_multiple_requests( 'list_objects_v2', requests, [today_objects(), yesterday_objects()] + [[]] * (DatePartitioner.MAX_RECENT_DAYS - 2)) with mocker.patch('boto3.client', return_value=s3_stub.client): with s3_stub.stubber: new_partitions = date_part.find_recent_partitions([]) # Only 2 days have data assert len(new_partitions) == 2 assert new_partitions == [ today().__str__().split('-'), yesterday().__str__().split('-') ]
def get_partitioner(self): return DatePartitioner(s3_location=self.s3_location, hive_compatible=True)
def test_partition_key_order(): """Partition keys should be returned in order""" date_part = DatePartitioner(s3_location="s3://nowhere") key_names = [x['Name'] for x in date_part.partition_keys()] assert key_names == ['year', 'month', 'day']
def test_partition_builder(): date_part = DatePartitioner(s3_location="s3://nowhere") response = date_part.build_partitioned_path(['2017', '08', '11']) assert response == 's3://nowhere/2017/08/11'