コード例 #1
0
    def test_update_partitions(self):
        self.s3.create_bucket(Bucket=self.bucket)
        self.helper.make_database_and_table()
        cli = Cli()

        partitions = self.helper.create_many_partitions(10)
        partitions.sort()
        partitioner = Partitioner(self.database,
                                  self.table,
                                  aws_region=self.region)
        partitioner.create_partitions(partitions)

        expected_output = "Found 5 moved partitions"
        partitions_to_move = partitions[0:5]
        for p in partitions_to_move:
            subpath = "/".join(p.values)
            new_location = f"s3://old-bucket/old-table/{subpath}/"
            p.location = new_location
            expected_output += f"\n\t{p}"

        partitioner.update_partition_locations(partitions_to_move)

        out, err = self.get_cmd_output(
            cli, ["update-partitions", self.database, self.table])
        out.should.equal(expected_output)

        found_map = PartitionMap(partitioner.existing_partitions())
        for partition in partitions_to_move:
            matching = found_map.get(partition)
            matching.should_not.be.false
            matching.location.startswith(
                f"s3://{self.bucket}/{self.table}/").should.be.true
コード例 #2
0
    def test_update_partitions_error_output(self):
        self.s3.create_bucket(Bucket=self.bucket)
        self.helper.make_database_and_table()
        cli = Cli()

        partitioner = Partitioner(self.database,
                                  self.table,
                                  aws_region=self.region)

        partition = self.helper.create_partition_data()
        partition.location = "s3://old-bucket/old-table/"
        partitioner.create_partitions([partition])

        mock = MagicMock()
        mock.return_value = [{
            "PartitionValues": partition.values,
            "ErrorDetail": {
                "ErrorCode": "PartitionNotFound",
                "ErrorMessage": "Partition not found"
            }
        }]
        partitioner.update_partition_locations = mock

        partitioner_mock = MagicMock(return_value=partitioner)
        cli.get_partitioner = partitioner_mock

        expected_output = f"Found 1 moved partitions\n\t{partition}\nOne or more errors occurred when attempting to update partitions\nError on {partition.values}: PartitionNotFound"
        out, err = self.get_cmd_output(
            cli, ["update-partitions", self.database, self.table])
        out.should.equal(expected_output)

        self.exit_mock.assert_called_with(1)
コード例 #3
0
ファイル: partitioner_test.py プロジェクト: Journera/glutil
    def test_update_partition_locations_with_non_existent_partition(self):
        self.helper.make_database_and_table()
        bad_partition = Partition(["2019", "01", "01", "01"],
                                  "s3://who/cares/")

        partitioner = Partitioner(self.database,
                                  self.table,
                                  aws_region=self.region)
        mock = MagicMock()
        partitioner.glue.update_partition = mock

        errors = partitioner.update_partition_locations([bad_partition])
        errors.should.have.length_of(1)
        errors[0]["Partition"].should.equal(bad_partition.values)
        mock.assert_not_called()
コード例 #4
0
ファイル: partitioner_test.py プロジェクト: Journera/glutil
    def test_update_partition_locations_with_mix_of_good_and_bad(self):
        self.helper.make_database_and_table()

        good_old_location = "s3://old-bucket/table/data1/"
        good_new_location = f"s3://{self.bucket}/{self.table}/2019-01-01-01/"
        good_partition = Partition(["2019", "01", "01", "01"],
                                   good_old_location)
        bad_partition = Partition(["2018", "02", "02", "02"],
                                  "s3://old-bucket/table/data2/")

        self.glue.create_partition(DatabaseName=self.database,
                                   TableName=self.table,
                                   PartitionInput={
                                       "Values": good_partition.values,
                                       "StorageDescriptor": {
                                           "Location": good_partition.location
                                       }
                                   })

        good_partition.location = good_new_location

        partitioner = Partitioner(self.database,
                                  self.table,
                                  aws_region=self.region)
        mock = MagicMock()
        partitioner.glue.update_partition = mock

        errors = partitioner.update_partition_locations(
            [bad_partition, good_partition])

        mock.assert_called_with(DatabaseName=self.database,
                                TableName=self.table,
                                PartitionValueList=good_partition.values,
                                PartitionInput={
                                    "Values": good_partition.values,
                                    "StorageDescriptor": {
                                        "Location": good_new_location
                                    }
                                })

        errors.should.have.length_of(1)
        errors[0]["Partition"].should.equal(bad_partition.values)
コード例 #5
0
ファイル: partitioner_test.py プロジェクト: Journera/glutil
    def test_update_partition_locations(self):
        old_location = "s3://old-bucket/table/"

        self.s3.create_bucket(Bucket=self.bucket)
        self.helper.make_database_and_table()

        partitions = sorted(self.helper.create_many_partitions(count=15))

        batch_input = []
        calls = []
        for partition in partitions:
            batch_input.append({
                "Values": partition.values,
                "StorageDescriptor": {
                    "Location": f"{old_location}/data/"
                }
            })

            calls.append(
                call(DatabaseName=self.database,
                     TableName=self.table,
                     PartitionValueList=partition.values,
                     PartitionInput=ANY))

        self.glue.batch_create_partition(DatabaseName=self.database,
                                         TableName=self.table,
                                         PartitionInputList=batch_input)

        partitioner = Partitioner(self.database,
                                  self.table,
                                  aws_region=self.region)
        mock = MagicMock()
        partitioner.glue.update_partition = mock

        moved = partitioner.find_moved_partitions()
        errors = partitioner.update_partition_locations(moved)

        errors.should.be.empty
        mock.assert_has_calls(calls, any_order=True)