def test_update_partitions(self): self.s3.create_bucket(Bucket=self.bucket) self.helper.make_database_and_table() cli = Cli() partitions = self.helper.create_many_partitions(10) partitions.sort() partitioner = Partitioner(self.database, self.table, aws_region=self.region) partitioner.create_partitions(partitions) expected_output = "Found 5 moved partitions" partitions_to_move = partitions[0:5] for p in partitions_to_move: subpath = "/".join(p.values) new_location = f"s3://old-bucket/old-table/{subpath}/" p.location = new_location expected_output += f"\n\t{p}" partitioner.update_partition_locations(partitions_to_move) out, err = self.get_cmd_output( cli, ["update-partitions", self.database, self.table]) out.should.equal(expected_output) found_map = PartitionMap(partitioner.existing_partitions()) for partition in partitions_to_move: matching = found_map.get(partition) matching.should_not.be.false matching.location.startswith( f"s3://{self.bucket}/{self.table}/").should.be.true
def test_update_partitions_error_output(self): self.s3.create_bucket(Bucket=self.bucket) self.helper.make_database_and_table() cli = Cli() partitioner = Partitioner(self.database, self.table, aws_region=self.region) partition = self.helper.create_partition_data() partition.location = "s3://old-bucket/old-table/" partitioner.create_partitions([partition]) mock = MagicMock() mock.return_value = [{ "PartitionValues": partition.values, "ErrorDetail": { "ErrorCode": "PartitionNotFound", "ErrorMessage": "Partition not found" } }] partitioner.update_partition_locations = mock partitioner_mock = MagicMock(return_value=partitioner) cli.get_partitioner = partitioner_mock expected_output = f"Found 1 moved partitions\n\t{partition}\nOne or more errors occurred when attempting to update partitions\nError on {partition.values}: PartitionNotFound" out, err = self.get_cmd_output( cli, ["update-partitions", self.database, self.table]) out.should.equal(expected_output) self.exit_mock.assert_called_with(1)
def test_update_partition_locations_with_non_existent_partition(self): self.helper.make_database_and_table() bad_partition = Partition(["2019", "01", "01", "01"], "s3://who/cares/") partitioner = Partitioner(self.database, self.table, aws_region=self.region) mock = MagicMock() partitioner.glue.update_partition = mock errors = partitioner.update_partition_locations([bad_partition]) errors.should.have.length_of(1) errors[0]["Partition"].should.equal(bad_partition.values) mock.assert_not_called()
def test_update_partition_locations_with_mix_of_good_and_bad(self): self.helper.make_database_and_table() good_old_location = "s3://old-bucket/table/data1/" good_new_location = f"s3://{self.bucket}/{self.table}/2019-01-01-01/" good_partition = Partition(["2019", "01", "01", "01"], good_old_location) bad_partition = Partition(["2018", "02", "02", "02"], "s3://old-bucket/table/data2/") self.glue.create_partition(DatabaseName=self.database, TableName=self.table, PartitionInput={ "Values": good_partition.values, "StorageDescriptor": { "Location": good_partition.location } }) good_partition.location = good_new_location partitioner = Partitioner(self.database, self.table, aws_region=self.region) mock = MagicMock() partitioner.glue.update_partition = mock errors = partitioner.update_partition_locations( [bad_partition, good_partition]) mock.assert_called_with(DatabaseName=self.database, TableName=self.table, PartitionValueList=good_partition.values, PartitionInput={ "Values": good_partition.values, "StorageDescriptor": { "Location": good_new_location } }) errors.should.have.length_of(1) errors[0]["Partition"].should.equal(bad_partition.values)
def test_update_partition_locations(self): old_location = "s3://old-bucket/table/" self.s3.create_bucket(Bucket=self.bucket) self.helper.make_database_and_table() partitions = sorted(self.helper.create_many_partitions(count=15)) batch_input = [] calls = [] for partition in partitions: batch_input.append({ "Values": partition.values, "StorageDescriptor": { "Location": f"{old_location}/data/" } }) calls.append( call(DatabaseName=self.database, TableName=self.table, PartitionValueList=partition.values, PartitionInput=ANY)) self.glue.batch_create_partition(DatabaseName=self.database, TableName=self.table, PartitionInputList=batch_input) partitioner = Partitioner(self.database, self.table, aws_region=self.region) mock = MagicMock() partitioner.glue.update_partition = mock moved = partitioner.find_moved_partitions() errors = partitioner.update_partition_locations(moved) errors.should.be.empty mock.assert_has_calls(calls, any_order=True)