def test_null_handling():
    b1 = bytearray([0x01])

    assert None < Literal.of(b1)
    assert Literal.of(b1) > None
    assert Literal.of(b1).to(FixedType.of_length(3)) == Literal.of(b1).to(
        FixedType.of_length(4))
Esempio n. 2
0
    def add_timestamp_expression(self, timestamp, expr_func):
        if isinstance(timestamp, str):
            timestamp = Literal.of(timestamp).to(
                TimestampType.without_timezone()).value / 1000

        self.add_timestamp_filter(expr_func("timestamp_ms", timestamp))
        return self
Esempio n. 3
0
def test_date_human_string():
    date = DateType.get()

    identity = Transforms.identity(date)
    date_str = "2017-12-01"
    d = Literal.of(date_str).to(date)
    assert identity.to_human_string(d.value) == date_str
def test_string_to_date_literal():
    date_str = Literal.of("2017-08-18")
    date = date_str.to(DateType.get())

    avro_val = avro_conversion["int-date"](datetime.strptime(
        "2017-08-18", "%Y-%m-%d"), None)
    assert avro_val == date.value
Esempio n. 5
0
def test_timestamp_without_zone_human_string():
    ts_tz = TimestampType.without_timezone()
    identity = Transforms.identity(ts_tz)
    ts_str = "2017-12-01T10:12:55.038194"
    ts = Literal.of(ts_str).to(ts_tz)

    assert identity.to_human_string(ts.value) == ts_str
Esempio n. 6
0
def test_time_human_string():
    time = TimeType.get()

    identity = Transforms.identity(time)
    time_str = "10:12:55.038194"
    d = Literal.of(time_str).to(time)
    assert identity.to_human_string(d.value) == time_str
Esempio n. 7
0
def test_date_to_human_string(transform_gran, expected):
    type_var = DateType.get()
    date = Literal.of("2017-12-01").to(type_var)

    assert (transform_gran(DateType.get())
            .to_human_string(transform_gran(DateType.get())
                             .apply(date.value))) == expected
def test_string_to_time_literal():
    time_str = Literal.of("14:21:01.919")
    time_lit = time_str.to(TimeType.get())

    avro_val = avro_conversion["long-time-micros"](datetime.strptime(
        "14:21:01.919", "%H:%M:%S.%f").time(), None)

    assert avro_val == time_lit.value
Esempio n. 9
0
def test_decimal_to_decimal_conversion():
    lit = Literal.of(Decimal("34.11").quantize(Decimal(".01")))

    assert lit.value.as_tuple() == lit.to(DecimalType.of(9, 2)).value.as_tuple()
    assert lit.value.as_tuple() == lit.to(DecimalType.of(11, 2)).value.as_tuple()
    assert lit.to(DecimalType.of(9, 0)) is None
    assert lit.to(DecimalType.of(9, 1)) is None
    assert lit.to(DecimalType.of(9, 3)) is None
def test_string_to_timestamp_literal():
    timestamp_str = Literal.of("2017-08-18T14:21:01.919+00:00")
    timestamp = timestamp_str.to(TimestampType.with_timezone())

    avro_val = avro_conversion["long-timestamp-micros"](
        dateutil.parser.parse("2017-08-18T14:21:01.919+00:00"), None)
    assert avro_val == timestamp.value

    timestamp_str = Literal.of("2017-08-18T14:21:01.919")
    timestamp = timestamp_str.to(TimestampType.without_timezone())
    assert avro_val == timestamp.value

    timestamp_str = Literal.of("2017-08-18T14:21:01.919-07:00")
    timestamp = timestamp_str.to(TimestampType.with_timezone())
    avro_val = avro_conversion["long-timestamp-micros"](
        dateutil.parser.parse("2017-08-18T21:21:01.919+00:00"), None)
    assert avro_val == timestamp.value
def test_string_to_decimal_literal():
    decimal_str = Literal.of("34.560")
    decimal_lit = decimal_str.to(DecimalType.of(9, 3))

    assert 3 == abs(decimal_lit.value.as_tuple().exponent)
    assert Decimal("34.560").as_tuple() == decimal_lit.value.as_tuple()

    assert decimal_str.to(DecimalType.of(9, 2)) is None
    assert decimal_str.to(DecimalType.of(9, 4)) is None
Esempio n. 12
0
def test_long_to_float_conversion():
    lit = Literal.of(34).to(LongType.get())
    float_lit = lit.to(FloatType.get())

    assert math.isclose(lit.value, float_lit.value)
Esempio n. 13
0
def test_long_to_double_conversion():
    lit = Literal.of(34).to(LongType.get())
    dbl_lit = lit.to(DoubleType.get())

    assert math.isclose(lit.value, dbl_lit.value)
Esempio n. 14
0
def test_integer_to_decimal_conversion(type_val_tuples):
    lit = Literal.of(34)

    assert lit.to(type_val_tuples[0]).value.as_tuple() == Decimal(type_val_tuples[1]).as_tuple()
Esempio n. 15
0
def test_long_to_integer():
    lit = Literal.of(34).to(LongType.get())
    int_lit = lit.to(IntegerType.get())

    assert lit.value == int_lit.value
Esempio n. 16
0
 def test_to_bytes(self):
     self.assertEqual(b'\x00\x00', Literal.of(False).to_byte_buffer())
     self.assertEqual(b'\x01\x00', Literal.of(True).to_byte_buffer())
     self.assertEqual(b'\xd2\x04\x00\x00',
                      Literal.of(1234).to_byte_buffer())
     self.assertEqual(b'\xd2\x04\x00\x00\x00\x00\x00\x00',
                      Literal.of(1234).to(LongType.get()).to_byte_buffer())
     self.assertEqual(b'\x19\x04\x9e?', Literal.of(1.2345).to_byte_buffer())
     self.assertEqual(
         b'\x8d\x97\x6e\x12\x83\xc0\xf3\x3f',
         Literal.of(1.2345).to(DoubleType.get()).to_byte_buffer())
     self.assertEqual(b'\xd2\x04\x00\x00',
                      Literal.of(1234).to(DateType.get()).to_byte_buffer())
     self.assertEqual(
         b'\x00\xe8vH\x17\x00\x00\x00',
         Literal.of(100000000000).to(TimeType.get()).to_byte_buffer())
     self.assertEqual(
         b'\x00\xe8vH\x17\x00\x00\x00',
         Literal.of(100000000000).to(
             TimestampType.with_timezone()).to_byte_buffer())
     self.assertEqual(
         b'\x00\xe8vH\x17\x00\x00\x00',
         Literal.of(100000000000).to(
             TimestampType.without_timezone()).to_byte_buffer())
     self.assertEqual(b'foo', Literal.of("foo").to_byte_buffer())
     self.assertEqual(
         b'\xf7\x9c>\tg|K\xbd\xa4y?4\x9c\xb7\x85\xe7',
         Literal.of(uuid.UUID(
             "f79c3e09-677c-4bbd-a479-3f349cb785e7")).to_byte_buffer())
     self.assertEqual(b'foo', Literal.of(bytes(b'foo')).to_byte_buffer())
     self.assertEqual(b'foo',
                      Literal.of(bytearray(b'foo')).to_byte_buffer())
Esempio n. 17
0
def test_integer_to_long_conversion():
    lit = Literal.of(34)
    long_lit = lit.to(LongType.get())

    assert lit.value == long_lit.value
def test_string_to_string_literal():
    assert Literal.of("abc") == Literal.of("abc").to(StringType.get())
Esempio n. 19
0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from iceberg.api.expressions import (Literal, StringLiteral)
import pytest


@pytest.mark.parametrize(
    "input_vals",
    [
        (Literal.of("abc"), Literal.of(u'abc')),  # unicode and str are same
        (StringLiteral(None), StringLiteral(None))  # None literals are equal
    ])
def test_special_equality(input_vals):
    assert input_vals[0] == input_vals[1]


@pytest.mark.parametrize(
    "input_vals",
    [
        (Literal.of("abc"),
         Literal.of('abcd')),  # test_seq_length, longer is greater
        (Literal.of('abcd'), Literal.of("adc")
         ),  # test_char_order, first difference takes precedence over length
        (None, Literal.of('abc')
         )  # test_null_handling, null comes before non-null
Esempio n. 20
0

@pytest.mark.parametrize("test_input,test_type,expected",
                         [(1, BucketFloat(100), -142385009),
                          (1, BucketDouble(100), -142385009)])
def test_spec_values_dbl(test_input, test_type, expected):
    assert test_type.hash(test_input) == expected


@pytest.mark.parametrize(
    "test_input,test_type,scale_factor,expected",
    [(Decimal("14.20"), DecimalType.of(9, 2), Decimal(10)**-2, -500754589),
     (Decimal("137302769811943318102518958871258.37580"), DecimalType.of(
         38, 5), Decimal(10)**-5, -32334285)])
def test_spec_values_dec(test_input, test_type, scale_factor, expected):
    getcontext().prec = 38
    assert Bucket.get(test_type,
                      100).hash(test_input.quantize(scale_factor)) == expected


@pytest.mark.parametrize("test_input,test_type,expected", [
    (Literal.of("2017-11-16").to(DateType.get()), DateType.get(), -653330422),
    (Literal.of("22:31:08").to(TimeType.get()), TimeType.get(), -662762989),
    (Literal.of("2017-11-16T22:31:08").to(TimestampType.without_timezone()),
     TimestampType.without_timezone(), -2047944441),
    (Literal.of("2017-11-16T14:31:08-08:00").to(TimestampType.with_timezone()),
     TimestampType.with_timezone(), -2047944441)
])
def test_spec_values_datetime_uuid(test_input, test_type, expected):
    assert Bucket.get(test_type, 100).hash(test_input.value) == expected
Esempio n. 21
0
def test_float_to_decimal_conversion(float_type_val_tuples):
    lit = Literal.of(34.56)

    assert lit.to(float_type_val_tuples[0]).value.as_tuple() == Decimal(float_type_val_tuples[1]).as_tuple()
Esempio n. 22
0
def test_long_to_decimal_conversion(type_val_tuples):
    lit = Literal.of(34).to(LongType.get())

    assert lit.to(type_val_tuples[0]).value.as_tuple() == Decimal(type_val_tuples[1]).as_tuple()
def test_string_to_uuid_literal():
    expected = uuid.uuid4()
    uuid_str = Literal.of(str(expected))
    uuid_lit = uuid_str.to(UUIDType.get())

    assert expected == uuid_lit.value
def test_timestamp_without_zone_with_zone_in_literal():
    with raises(RuntimeError):
        timestamp_str = Literal.of("2017-08-18T14:21:01.919+07:00")
        timestamp_str.to(TimestampType.without_timezone())
Esempio n. 25
0
def test_float_to_double():
    lit = Literal.of(34.56)
    dbl_lit = lit.to(DoubleType.get())

    assert math.isclose(lit.value, dbl_lit.value)
Esempio n. 26
0
    def test_byte_buffer_conversions(self):
        # booleans are stored as 0x00 for 'false' and a non-zero byte for 'true'
        self.assertConversion(False, BooleanType.get(), b'\x00')
        self.assertConversion(True, BooleanType.get(), b'\x01')
        self.assertEqual(b'\x00', Literal.of(False).to_byte_buffer())
        self.assertEqual(b'\x01', Literal.of(True).to_byte_buffer())

        # integers are stored as 4 bytes in little-endian order
        # 84202 is 0...01|01001000|11101010 in binary
        # 11101010 -> 234 (-22), 01001000 -> 72, 00000001 -> 1, 00000000 -> 0
        self.assertConversion(84202, IntegerType.get(), bytes([234, 72, 1, 0]))
        self.assertEqual(bytes([234, 72, 1, 0]),
                         Literal.of(84202).to_byte_buffer())

        # longs are stored as 8 bytes in little-endian order
        # 200L is 0...0|11001000 in binary
        # 11001000 -> 200 (-56), 00000000 -> 0, ... , 00000000 -> 0
        self.assertConversion(200, LongType.get(),
                              bytes([200, 0, 0, 0, 0, 0, 0, 0]))
        self.assertEqual(bytes([200, 0, 0, 0, 0, 0, 0, 0]),
                         Literal.of(200).to(LongType.get()).to_byte_buffer())

        # floats are stored as 4 bytes in little-endian order
        # floating point numbers are represented as sign * 2ˆexponent * mantissa
        # -4.5F is -1 * 2ˆ2 * 1.125 and encoded as 11000000|10010000|0...0 in binary
        # 00000000 -> 0, 00000000 -> 0, 10010000 -> 144 (-112), 11000000 -> 192 (-64),
        self.assertConversion(-4.5, FloatType.get(), bytes([0, 0, 144, 192]))
        self.assertEqual(bytes([0, 0, 144, 192]),
                         Literal.of(-4.5).to_byte_buffer())

        # doubles are stored as 8 bytes in little-endian order
        # floating point numbers are represented as sign * 2ˆexponent * mantissa
        # 6.0 is 1 * 2ˆ4 * 1.5 and encoded as 01000000|00011000|0...0
        # 00000000 -> 0, ... , 00011000 -> 24, 01000000 -> 64
        self.assertConversion(6.0, DoubleType.get(),
                              bytes([0, 0, 0, 0, 0, 0, 24, 64]))
        self.assertEqual(bytes([0, 0, 0, 0, 0, 0, 24, 64]),
                         Literal.of(6.0).to(DoubleType.get()).to_byte_buffer())

        # dates are stored as days from 1970-01-01 in a 4-byte little-endian int
        # 1000 is 0...0|00000011|11101000 in binary
        # 11101000 -> 232 (-24), 00000011 -> 3, ... , 00000000 -> 0
        self.assertConversion(1000, DateType.get(), bytes([232, 3, 0, 0]))
        self.assertEqual(bytes([232, 3, 0, 0]),
                         Literal.of(1000).to(DateType.get()).to_byte_buffer())

        # time is stored as microseconds from midnight in an 8-byte little-endian long
        # 10000L is 0...0|00100111|00010000 in binary
        # 00010000 -> 16, 00100111 -> 39, ... , 00000000 -> 0
        self.assertConversion(10000, TimeType.get(),
                              bytes([16, 39, 0, 0, 0, 0, 0, 0]))
        self.assertEqual(
            bytes([16, 39, 0, 0, 0, 0, 0, 0]),
            Literal.of(10000).to(LongType.get()).to(
                TimeType.get()).to_byte_buffer())

        # timestamps are stored as microseconds from 1970-01-01 00:00:00.000000 in an 8-byte little-endian long
        # 400000L is 0...110|00011010|10000000 in binary
        # 10000000 -> 128 (-128), 00011010 -> 26, 00000110 -> 6, ... , 00000000 -> 0
        self.assertConversion(400000, TimestampType.without_timezone(),
                              bytes([128, 26, 6, 0, 0, 0, 0, 0]))
        self.assertConversion(400000, TimestampType.with_timezone(),
                              bytes([128, 26, 6, 0, 0, 0, 0, 0]))
        self.assertEqual(
            bytes([128, 26, 6, 0, 0, 0, 0, 0]),
            Literal.of(400000).to(LongType.get()).to(
                TimestampType.without_timezone()).to_byte_buffer())
        self.assertEqual(
            bytes([128, 26, 6, 0, 0, 0, 0, 0]),
            Literal.of(400000).to(LongType.get()).to(
                TimestampType.with_timezone()).to_byte_buffer())

        # strings are stored as UTF-8 bytes (without length)
        # 'A' -> 65, 'B' -> 66, 'C' -> 67
        self.assertConversion("ABC", StringType.get(), bytes([65, 66, 67]))
        self.assertEqual(bytes([65, 66, 67]),
                         Literal.of("ABC").to_byte_buffer())

        # uuids are stored as 16-byte big-endian values
        # f79c3e09-677c-4bbd-a479-3f349cb785e7 is encoded as F7 9C 3E 09 67 7C 4B BD A4 79 3F 34 9C B7 85 E7
        # 0xF7 -> 11110111 -> 247 (-9), 0x9C -> 10011100 -> 156 (-100), 0x3E -> 00111110 -> 62,
        # 0x09 -> 00001001 -> 9, 0x67 -> 01100111 -> 103, 0x7C -> 01111100 -> 124,
        # 0x4B -> 01001011 -> 75, 0xBD -> 10111101 -> 189 (-67), 0xA4 -> 10100100 -> 164 (-92),
        # 0x79 -> 01111001 -> 121, 0x3F -> 00111111 -> 63, 0x34 -> 00110100 -> 52,
        # 0x9C -> 10011100 -> 156 (-100), 0xB7 -> 10110111 -> 183 (-73), 0x85 -> 10000101 -> 133 (-123),
        # 0xE7 -> 11100111 -> 231 (-25)
        self.assertConversion(
            uuid.UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7"), UUIDType.get(),
            bytes([
                247, 156, 62, 9, 103, 124, 75, 189, 164, 121, 63, 52, 156, 183,
                133, 231
            ]))
        self.assertEqual(
            bytes([
                247, 156, 62, 9, 103, 124, 75, 189, 164, 121, 63, 52, 156, 183,
                133, 231
            ]),
            Literal.of(uuid.UUID(
                "f79c3e09-677c-4bbd-a479-3f349cb785e7")).to_byte_buffer())

        # fixed values are stored directly
        # 'a' -> 97, 'b' -> 98
        self.assertConversion(bytes("ab", "utf8"), FixedType.of_length(2),
                              bytes([97, 98]))
        self.assertEqual(bytes([97, 98]),
                         Literal.of(bytes("ab", "utf8")).to_byte_buffer())

        # binary values are stored directly
        # 'Z' -> 90
        self.assertConversion(bytearray("Z", "utf8"), BinaryType.get(),
                              bytes([90]))
        self.assertEqual(bytes([90]),
                         Literal.of(bytearray("Z", "utf8")).to_byte_buffer())

        # decimals are stored as unscaled values in the form of two's-complement big-endian binary,
        # using the minimum number of bytes for the values
        # 345 is 0...1|01011001 in binary
        # 00000001 -> 1, 01011001 -> 89
        self.assertConversion(
            Decimal(3.45).quantize(Decimal(".01")), DecimalType.of(3, 2),
            bytes([1, 89]))
        self.assertEqual(
            bytes([1, 89]),
            Literal.of(3.45).to(DecimalType.of(3, 2)).to_byte_buffer())

        # decimal on 3-bytes to test that we use the minimum number of bytes and not a power of 2
        # 1234567 is 00010010|11010110|10000111 in binary
        # 00010010 -> 18, 11010110 -> 214, 10000111 -> 135
        self.assertConversion(
            Decimal(123.4567).quantize(Decimal(".0001")), DecimalType.of(7, 4),
            bytes([18, 214, 135]))
        self.assertEqual(
            bytes([18, 214, 135]),
            Literal.of(123.4567).to(DecimalType.of(7, 4)).to_byte_buffer())

        # negative decimal to test two's complement
        # -1234567 is 11101101|00101001|01111001 in binary
        # 11101101 -> 237, 00101001 -> 41, 01111001 -> 121
        self.assertConversion(
            Decimal(-123.4567).quantize(Decimal(".0001")),
            DecimalType.of(7, 4), bytes([237, 41, 121]))
        self.assertEqual(
            bytes([237, 41, 121]),
            Literal.of(-123.4567).to(DecimalType.of(7, 4)).to_byte_buffer())

        # test empty byte in decimal
        # 11 is 00001011 in binary
        # 00001011 -> 11
        self.assertConversion(
            Decimal(0.011).quantize(Decimal(".001")), DecimalType.of(10, 3),
            bytes([11]))
        self.assertEqual(
            bytes([11]),
            Literal.of(0.011).to(DecimalType.of(10, 3)).to_byte_buffer())
Esempio n. 27
0
def test_double_to_float():
    lit = Literal.of(34.56).to(DoubleType.get())
    float_lit = lit.to(FloatType.get())

    assert math.isclose(lit.value, float_lit.value)
Esempio n. 28
0
                params=[
                    Expressions.equal("id", 5),
                    Expressions.equal("id", 29),
                    Expressions.equal("id", 30),
                    Expressions.equal("id", 75),
                    Expressions.equal("id", 79),
                    Expressions.equal("id", 80),
                    Expressions.equal("id", 85)
                ])
def not_eq_rewrite(request):
    yield request.param


@pytest.fixture(scope="session",
                params=[
                    Literal.of(False),
                    Literal.of(34),
                    Literal.of(35),
                    Literal.of(36.75),
                    Literal.of(8.75),
                    Literal.of("2017-11-29").to(DateType.get()),
                    Literal.of("11:30:0").to(TimeType.get()),
                    Literal.of("2017-11-29T11:30:07.123").to(
                        TimestampType.without_timezone()),
                    Literal.of("2017-11-29T11:30:07.123+01:00").to(
                        TimestampType.with_timezone()),
                    Literal.of("abc"),
                    Literal.of(uuid.uuid4()),
                    Literal.of(bytes([0x01, 0x02,
                                      0x03])).to(FixedType.of_length(3)),
                    Literal.of(bytes([0x03, 0x04, 0x05,
Esempio n. 29
0
 def test_to_bytes(self):
     self.assertEqual(b'\x00', Literal.of(False).to_byte_buffer())
     self.assertEqual(b'\x01', Literal.of(True).to_byte_buffer())
     self.assertEqual(b'\xd2\x04\x00\x00',
                      Literal.of(1234).to_byte_buffer())
     self.assertEqual(b'\xd2\x04\x00\x00\x00\x00\x00\x00',
                      Literal.of(1234).to(LongType.get()).to_byte_buffer())
     self.assertEqual(b'\x19\x04\x9e?', Literal.of(1.2345).to_byte_buffer())
     self.assertEqual(
         b'\x8d\x97\x6e\x12\x83\xc0\xf3\x3f',
         Literal.of(1.2345).to(DoubleType.get()).to_byte_buffer())
     self.assertEqual(b'\xd2\x04\x00\x00',
                      Literal.of(1234).to(DateType.get()).to_byte_buffer())
     self.assertEqual(
         b'\x00\xe8vH\x17\x00\x00\x00',
         Literal.of(100000000000).to(TimeType.get()).to_byte_buffer())
     self.assertEqual(
         b'\x00\xe8vH\x17\x00\x00\x00',
         Literal.of(100000000000).to(
             TimestampType.with_timezone()).to_byte_buffer())
     self.assertEqual(
         b'\x00\xe8vH\x17\x00\x00\x00',
         Literal.of(100000000000).to(
             TimestampType.without_timezone()).to_byte_buffer())
     self.assertEqual(b'foo', Literal.of("foo").to_byte_buffer())
     self.assertEqual(
         b'\xf7\x9c>\tg|K\xbd\xa4y?4\x9c\xb7\x85\xe7',
         Literal.of(uuid.UUID(
             "f79c3e09-677c-4bbd-a479-3f349cb785e7")).to_byte_buffer())
     self.assertEqual(b'foo', Literal.of(bytes(b'foo')).to_byte_buffer())
     self.assertEqual(b'foo',
                      Literal.of(bytearray(b'foo')).to_byte_buffer())
     # Decimal on 2-bytes
     self.assertEqual(
         b'\x30\x39',
         Literal.of(123.45).to(DecimalType.of(5, 2)).to_byte_buffer())
     # Decimal on 3-bytes to test that we use the minimum number of bytes
     self.assertEqual(
         b'\x12\xd6\x87',
         Literal.of(123.4567).to(DecimalType.of(7, 4)).to_byte_buffer())
     # Negative decimal to test two's complement
     self.assertEqual(
         b'\xed\x29\x79',
         Literal.of(-123.4567).to(DecimalType.of(7, 4)).to_byte_buffer())
def test_natural_order_eq(input_val):
    assert Literal.of(input_val) == Literal.of(input_val)