Example #1
0
def test_get_partition_difference_datetime():
    bucket = 'safebucket'
    key = 'dataset'
    partition = 'burgertime'
    rando_values = [(datetime.datetime.now() - datetime.timedelta(
        seconds=random.randrange(100 * 24 * 60 * 60))).replace(microsecond=0)
                    for x in range(5)]
    s3_paths = [
        f"{key}/{partition}={x.strftime('%Y-%m-%d %H:%M:%S')}/12345.parquet"
        for x in rando_values[:-1]
    ]

    with patch("s3parq.fetch_parq.get_all_files_list") as get_all_files_list:
        with patch("s3parq.fetch_parq._get_partitions_and_types"
                   ) as _get_partitions_and_types:
            get_all_files_list.return_value = s3_paths
            _get_partitions_and_types.return_value = {"burgertime": "datetime"}

            # partition values not in list values
            deltas = fetch_parq.get_diff_partition_values(
                bucket, key, partition, rando_values[:-2])
            assert deltas == [rando_values[-2]]

            # list values not in partition values
            deltas = fetch_parq.get_diff_partition_values(bucket,
                                                          key,
                                                          partition,
                                                          rando_values,
                                                          reverse=True)
            assert deltas == [rando_values[-1]]
Example #2
0
def test_get_partition_difference_int_comparison_none():
    bucket = 'safebucket'
    key = 'dataset'
    partition = 'hamburger'
    rando_values = [1, 2, 3, 4, 5]
    rando_values.sort()
    s3_paths = [
        f"{key}/{partition}={x}/12345.parquet" for x in rando_values[:-1]
    ]

    with patch("s3parq.fetch_parq.get_all_files_list") as get_all_files_list:
        with patch("s3parq.fetch_parq._get_partitions_and_types"
                   ) as _get_partitions_and_types:
            get_all_files_list.return_value = s3_paths
            _get_partitions_and_types.return_value = {"hamburger": "integer"}

            # values when there is no sent comparisons
            deltas = fetch_parq.get_diff_partition_values(
                bucket, key, partition, [])
            deltas.sort()
            assert deltas == rando_values[:-1]
            # values when there is no sent comparisons and reversed
            deltas = fetch_parq.get_diff_partition_values(
                bucket, key, partition, [], True)
            deltas.sort()
            assert deltas == []
Example #3
0
    def test_get_partition_difference_int_when_none(self):
        bucket = 'safebucket'
        key = 'dataset'
        partition = 'hamburger'
        rando_values = [1, 2, 3, 4, 5]
        rando_values.sort()
        s3_paths = []

        with patch("s3parq.fetch_parq._get_all_files_list"
                   ) as _get_all_files_list:
            with patch("s3parq.fetch_parq._get_partitions_and_types"
                       ) as _get_partitions_and_types:
                _get_all_files_list.return_value = []
                _get_partitions_and_types.return_value = 777.09

                # values when there is no bucket data
                deltas = fetch_parq.get_diff_partition_values(
                    bucket, key, partition, rando_values[:-2])
                deltas.sort()
                assert deltas == []
                # values when theres no bucket data and reversed
                deltas = fetch_parq.get_diff_partition_values(
                    bucket, key, partition, rando_values[:-2], True)
                deltas.sort()
                assert deltas == rando_values[:-2]
Example #4
0
    def test_get_partition_difference_string(self):
        bucket = 'safebucket'
        key = 'dataset'
        partition = 'hamburger'
        rando_values = [self.rand_string() for x in range(10)]
        s3_paths = [
            f"{key}/{partition}={x}/12345.parquet" for x in rando_values[:-1]
        ]

        with patch("s3parq.fetch_parq._get_all_files_list"
                   ) as _get_all_files_list:
            with patch("s3parq.fetch_parq._get_partitions_and_types"
                       ) as _get_partitions_and_types:
                _get_all_files_list.return_value = s3_paths
                _get_partitions_and_types.return_value = {
                    "hamburger": "string"
                }

                # partition values not in list values
                deltas = fetch_parq.get_diff_partition_values(
                    bucket, key, partition, rando_values[:-2])
                assert deltas == [rando_values[-2]]
                # list values not in partition values
                deltas = fetch_parq.get_diff_partition_values(
                    bucket, key, partition, rando_values, True)
                assert deltas == [rando_values[-1]]
Example #5
0
def test_get_partition_difference_string_when_none():
    bucket = 'safebucket'
    key = 'dataset'
    partition = 'hamburger'
    rando_values = [setup_random_string() for x in range(10)]
    rando_values.sort()
    s3_paths = []

    with patch("s3parq.fetch_parq.get_all_files_list") as get_all_files_list:
        get_all_files_list.return_value = []

        # values when theres no bucket data
        deltas = fetch_parq.get_diff_partition_values(bucket, key, partition,
                                                      rando_values[:-2])
        deltas.sort()
        assert deltas == []
        # values when theres no bucket data and reversed
        deltas = fetch_parq.get_diff_partition_values(bucket, key, partition,
                                                      rando_values[:-2], True)
        deltas.sort()
        assert deltas == rando_values[:-2]