Example #1
0
    def test_find_moved_partitions(self):
        old_location = "s3://old-bucket/table/"

        self.s3.create_bucket(Bucket=self.bucket)
        self.helper.make_database_and_table()

        partitions = sorted(self.helper.create_many_partitions(count=15))

        batch_input = []
        for partition in partitions:
            batch_input.append({
                "Values": partition.values,
                "StorageDescriptor": {
                    "Location": f"{old_location}/data/"
                }
            })

        self.glue.batch_create_partition(DatabaseName=self.database,
                                         TableName=self.table,
                                         PartitionInputList=batch_input)

        partitioner = Partitioner(self.database,
                                  self.table,
                                  aws_region=self.region)
        moved = partitioner.find_moved_partitions()

        moved.should.have.length_of(len(partitions))

        moved.sort()
        partitions.sort()

        for idx, partition in enumerate(partitions):
            moved[idx].should.equal(partition)
Example #2
0
    def test_update_partition_locations(self):
        old_location = "s3://old-bucket/table/"

        self.s3.create_bucket(Bucket=self.bucket)
        self.helper.make_database_and_table()

        partitions = sorted(self.helper.create_many_partitions(count=15))

        batch_input = []
        calls = []
        for partition in partitions:
            batch_input.append({
                "Values": partition.values,
                "StorageDescriptor": {
                    "Location": f"{old_location}/data/"
                }
            })

            calls.append(
                call(DatabaseName=self.database,
                     TableName=self.table,
                     PartitionValueList=partition.values,
                     PartitionInput=ANY))

        self.glue.batch_create_partition(DatabaseName=self.database,
                                         TableName=self.table,
                                         PartitionInputList=batch_input)

        partitioner = Partitioner(self.database,
                                  self.table,
                                  aws_region=self.region)
        mock = MagicMock()
        partitioner.glue.update_partition = mock

        moved = partitioner.find_moved_partitions()
        errors = partitioner.update_partition_locations(moved)

        errors.should.be.empty
        mock.assert_has_calls(calls, any_order=True)
Example #3
0
    def test_find_moved_partitions_with_missing_partitions(self):
        old_location = "s3://old-bucket/table/"

        self.s3.create_bucket(Bucket=self.bucket)
        self.helper.make_database_and_table()

        self.glue.create_partition(DatabaseName=self.database,
                                   TableName=self.table,
                                   PartitionInput={
                                       "Values": ["2019", "01", "01", "01"],
                                       "StorageDescriptor": {
                                           "Location": f"{old_location}/data/"
                                       }
                                   })

        partitioner = Partitioner(self.database,
                                  self.table,
                                  aws_region=self.region)
        mock = MagicMock()
        partitioner.glue.update_partition = mock

        updated = partitioner.find_moved_partitions()
        updated.should.be.empty