Ejemplo n.º 1
0
def coalesce_partition(bucket, partition, use_localstack: bool,
                       manifests: bool):
    client = s3_client(use_localstack)
    s3 = S3(client)
    return [
        coalesce_batch(s3, bucket, batch, manifests) for batch in partition
    ]
Ejemplo n.º 2
0
 def test_get_full_prefix_when_not_set_with_end_slash(self):
     prefix = f"{test_prefix}/"
     expected = f"{test_prefix}/"
     date_to_add = "NOT_SET"
     today = datetime.strptime("2020-09-01", date_format)
     client = s3_client(True)
     s3 = S3(client)
     actual = s3.get_full_s3_prefix(prefix, date_to_add, today)
     self.assertEqual(expected, actual)
Ejemplo n.º 3
0
 def test_batches_are_deleted(self):
     batch = [{'object_key': f'prefix/{i}'} for i in range(5)]
     deletes = [{'Key': x['object_key']} for x in batch]
     calls = [call(Bucket=self.bucket, Delete={'Objects': deletes})]
     client = s3_client(True)
     client.delete_objects = MagicMock(return_value={})
     s3 = S3(client)
     s3.delete_batch(self.bucket, batch)
     client.delete_objects.assert_has_calls(calls)
Ejemplo n.º 4
0
 def test_get_full_prefix_when_set_to_yesterday(self):
     prefix = test_prefix
     expected = f"{test_prefix}/2020/08/31"
     date_to_add = "yesterday"
     today = datetime.strptime("2020-09-01", date_format)
     client = s3_client(True)
     s3 = S3(client)
     actual = s3.get_full_s3_prefix(prefix, date_to_add, today)
     self.assertEqual(expected, actual)
 def test_object_summaries(self):
     client = s3_client(True)
     s3 = S3(client)
     objects = [self.__summaries(x) for x in range(10)]
     contents = [x['Contents'] for x in [xs for xs in [ys for ys in objects]]]
     expected = reduce(lambda acc, xs: acc + xs, contents)
     objects[-1]['IsTruncated'] = False
     client.list_objects_v2 = Mock(side_effect=objects)
     actual = []
     for sub_batch in s3.object_summaries(self.bucket, 'prefix', 5):
         actual += sub_batch
     self.assertEqual(expected, actual)
 def test_batches_are_deleted_in_chunks(self):
     batch = [{'object_key': f'prefix/{i}'} for i in range(3500)]
     sub_batch1 = [{'Key': f'prefix/{i}'} for i in range(1000)]
     sub_batch2 = [{'Key': f'prefix/{i}'} for i in range(1000, 2000)]
     sub_batch3 = [{'Key': f'prefix/{i}'} for i in range(2000, 3000)]
     sub_batch4 = [{'Key': f'prefix/{i}'} for i in range(3000, 3500)]
     calls = [call(Bucket=self.bucket, Delete={'Objects': x}) for x in
              [sub_batch1, sub_batch2, sub_batch3, sub_batch4]]
     client = s3_client(True)
     client.delete_objects = MagicMock(return_value={})
     s3 = S3(client)
     s3.delete_batch(self.bucket, batch)
     client.delete_objects.assert_has_calls(calls)
Ejemplo n.º 7
0
def main():
    start = timer()
    args = command_line_args()
    client = s3_client(args.localstack)
    s3 = S3(client)
    print(
        f"Bucket: '{args.bucket}', prefix: '{args.prefix}', partition: {args.partition}, "
        f"threads: {args.threads}, multiprocessor: {args.multiprocessor}, manifests: {args.manifests}."
    )
    results = [
        coalesce_tranche(args, summaries) for summaries in s3.object_summaries(
            args.bucket, args.prefix, args.summaries)
    ]
    end = timer()
    print(f"Total time taken: {end - start:.2f} seconds.")
    exit(0 if all(results) else 1)
Ejemplo n.º 8
0
 def __client(self):
     objects = [self.__s3_object_with_body(i) for i in range(1000)]
     client = s3_client(True)
     client.get_object = Mock(side_effect=objects)
     client.upload_fileobj = Mock()
     return client
Ejemplo n.º 9
0
def coalesce_batch_parallel(bucket, batch, manifests,
                            use_localstack: bool) -> bool:
    return coalesce_batch(S3(s3_client(use_localstack)), bucket, batch,
                          manifests)