def fill_from_path(spec, partition_path, reuse):
        data = reuse
        if data is None:
            data = DataFiles.new_partition_data(spec)

        partitions = partition_path.split("/")
        if len(partitions) > len(spec.fields):
            raise RuntimeError(
                "Invalid partition data, too many fields (expecting %s): %s" %
                (len(spec.fields), partition_path))

        if len(partitions) < len(spec.fields):
            raise RuntimeError(
                "Invalid partition data, not enough fields(expecting %s): %s" %
                (len(spec.fields), partition_path))

        for i, part in enumerate(partitions):
            field = spec.fields[i]
            parts = part.split("=")
            if len(parts) != 2 or parts[0] is None or parts[0] != field.name:
                raise RuntimeError("Invalid partition: %s" % part)

            data.set(
                i, Conversions.from_partition_string(data.get_type(i),
                                                     parts[1]))

        return data
def strict_file():
    return TestDataFile("file.avro", TestHelpers.Row.of(), 50, {
        4: 50,
        5: 50,
        6: 50
    }, {
        4: 50,
        5: 10,
        6: 0
    }, {
        1: Conversions.to_byte_buffer(IntegerType.get(), 30),
        7: Conversions.to_byte_buffer(IntegerType.get(), 5)
    }, {
        1: Conversions.to_byte_buffer(IntegerType.get(), 79),
        7: Conversions.to_byte_buffer(IntegerType.get(), 5)
    })
def file():
    return TestDataFile(
        "file.avro",
        TestHelpers.Row.of(),
        50,
        # value counts
        {
            4: 50,
            5: 50,
            6: 50
        },
        # null value counts
        {
            4: 50,
            5: 10,
            6: 0
        },
        # lower bounds
        {1: Conversions.to_byte_buffer(IntegerType.get(), 30)},
        # upper bounds
        {1: Conversions.to_byte_buffer(IntegerType.get(), 79)})
Beispiel #4
0
def inc_man_file():
    return TestManifestFile(
        "manifest-list.avro", 1024, 0, int(time.time() * 1000), 5,
        10, 0, (TestFieldSummary(
            False, Conversions.to_byte_buffer(IntegerType.get(), 30),
            Conversions.to_byte_buffer(
                IntegerType.get(), 79)), TestFieldSummary(True, None, None),
                TestFieldSummary(
                    True, Conversions.to_byte_buffer(StringType.get(), 'a'),
                    Conversions.to_byte_buffer(StringType.get(), 'z')),
                TestFieldSummary(
                    False, Conversions.to_byte_buffer(StringType.get(), 'a'),
                    Conversions.to_byte_buffer(StringType.get(), 'z'))))