Exemple #1
0
def test_read_stripe(striped_orc_data):
    reader = Reader(striped_orc_data(655350))
    stripe = reader.read_stripe(0)
    assert isinstance(stripe, Stripe)
    with pytest.raises(IndexError):
        _ = reader.read_stripe(11)
    with pytest.raises(TypeError):
        _ = reader.read_stripe(-1)
    with pytest.raises(IndexError):
        _ = reader.read_stripe(10)
    stripe = reader.read_stripe(9)
    assert isinstance(stripe, Stripe)
Exemple #2
0
def test_statistics_array_int(striped_orc_data):
    data = striped_orc_data(
        "struct<list:array<int>>",
        (([j + i for j in range(30)], ) for i in range(100000)),
    )
    reader = Reader(data)
    stripe = reader.read_stripe(0)
    stat = stripe[2].statistics
    assert stripe[1].statistics["kind"] == TypeKind.LIST
    assert stat["kind"] == TypeKind.INT
    assert sum(i for col in reader.read_stripe(0)
               for i in col[0]) == stat["sum"]
    assert min(i for col in reader.read_stripe(0)
               for i in col[0]) == stat["minimum"]
    assert max(i for col in reader.read_stripe(0)
               for i in col[0]) == stat["maximum"]
    stat = reader[2].statistics
    assert max(i for col in reader for i in col[0]) == stat["maximum"]
Exemple #3
0
def test_seek_and_read(striped_orc_data):
    data = striped_orc_data(
        "struct<col0:int,col1:string>",
        ((i, "Test {}".format(i + 1)) for i in range(100000)),
    )
    reader = Reader(data)
    stripe = reader.read_stripe(1)
    assert next(stripe) == (65535, "Test 65536")
    stripe.seek(10000)
    assert next(stripe) == (75535, "Test 75536")
    stripe.seek(-1, 2)
    assert next(stripe) == (99999, "Test 100000")
    stripe = reader.read_stripe(0)
    stripe.seek(-1, 2)
    assert next(stripe) == (65534, "Test 65535")
    stripe.seek(0)
    next(stripe)
    stripe.seek(10000, 1)
    assert next(stripe) == (10001, "Test 10002")
    expected = reader.read()
    result = stripe.read()
    assert result == expected[10002:65535]
    stripe = reader.read_stripe(1)
    assert stripe.read() == expected[65535:]
Exemple #4
0
def test_statistics_int(striped_orc_data):
    data = striped_orc_data("int", (i for i in range(100000)))
    reader = Reader(data)
    stripe = Stripe(reader, 0)
    stat = stripe[0].statistics
    assert stat["has_null"] is False
    assert stat["number_of_values"] == 65535
    assert stat["kind"] == TypeKind.INT
    assert stat["minimum"] == 0
    assert stat["maximum"] == 65534
    assert stat["sum"] == sum(i for i in range(len(stripe)))
    stat = reader[0].statistics
    assert stat["minimum"] == 0
    assert stat["maximum"] == 99999
    assert stat["sum"] == sum(i for i in range(100000))
    assert reader.read_stripe(1)[0].statistics["minimum"] == 65535
Exemple #5
0
def test_statistics_string(striped_orc_data):
    data = striped_orc_data("string", ("Test String {0}".format(i + 1)
                                       for i in range(100000)))
    reader = Reader(data)
    stripe = Stripe(reader, 0)
    stat = stripe[0].statistics
    assert stat["has_null"] is False
    assert stat["kind"] == TypeKind.STRING
    assert stat["number_of_values"] == 65535
    assert stat["total_length"] == sum(len(i) for i in stripe)
    assert stat["minimum"] == "Test String 1"
    assert stat["maximum"] == max(i for i in Stripe(reader, 0))
    stat = reader[0].statistics
    assert stat["maximum"] == max(i for i in reader)
    assert reader.read_stripe(
        1)[0].statistics["minimum"] == "Test String 100000"
Exemple #6
0
def test_statistics_double(striped_orc_data):
    data = striped_orc_data("double", (i * 0.1 for i in range(100000)))
    reader = Reader(data)
    stripe = Stripe(reader, 0)
    stat = stripe[0].statistics
    assert stat["has_null"] is False
    assert stat["number_of_values"] == 65535
    assert stat["kind"] == TypeKind.DOUBLE
    assert stat["minimum"] == 0
    assert math.isclose(stat["maximum"], 6553.4)
    assert stat["sum"] == sum(i * 0.1 for i in range(len(stripe)))
    stat = reader[0].statistics
    assert stat["minimum"] == 0
    assert math.isclose(stat["maximum"], 9999.9)
    assert stat["sum"] == sum(i * 0.1 for i in range(100000))
    assert reader.read_stripe(1)[0].statistics["minimum"] == 6553.5
Exemple #7
0
def test_statistics_decimal(striped_orc_data):
    data = striped_orc_data(
        "decimal(10,3)",
        (Decimal("1000.1") + Decimal((i + 100) * 0.1) for i in range(100000)),
    )
    reader = Reader(data)
    stripe = Stripe(reader, 0)
    stat = stripe[0].statistics
    assert stat["kind"] == TypeKind.DECIMAL
    assert stat["has_null"] is False
    assert stat["number_of_values"] == len(stripe)
    assert stat["minimum"] == Decimal("1010.100")
    assert stat["maximum"] == Decimal("7563.500")
    assert stat["sum"] == sum(
        Decimal("1000.1") + Decimal((i + 100) * 0.1)
        for i in range(len(stripe))).quantize(Decimal("1.000"))
    stat = reader[0].statistics
    assert stat["sum"] == sum(
        Decimal("1000.1") + Decimal((i + 100) * 0.1)
        for i in range(100000)).quantize(Decimal("1.000"))
    assert reader.read_stripe(1)[0].statistics["minimum"] == Decimal(
        "7563.600")