Example #1
0
    def apply_expr(self, lhs, operator_str, rhs):
        if isinstance(lhs, ColumnChunk):
            column = lhs
            val = rhs
        elif isinstance(rhs, ColumnChunk):
            column = rhs
            val = lhs
        elif isinstance(lhs, bool) and isinstance(rhs, bool):
            operator_func = self.expr_operator_map[operator_str]
            return operator_func(lhs, rhs)
        else:
            raise ValueError(
                'Either lhs or rhs should be a column reference, not lhs:%s rhs:%s'
                % (lhs, rhs))

        name = '.'.join(column.meta_data.path_in_schema)
        se = self.schema.schema_element(name)
        vmax, vmin = None, None
        s = column.meta_data.statistics
        if s is not None:
            if s.max is not None:
                b = ensure_bytes(s.max)
                vmax = encoding.read_plain(b, column.meta_data.type, 1)
                if se.converted_type is not None:
                    vmax = converted_types.convert(vmax, se)
            if s.min is not None:
                b = ensure_bytes(s.min)
                vmin = encoding.read_plain(b, column.meta_data.type, 1)
                if se.converted_type is not None:
                    vmin = converted_types.convert(vmin, se)
            if doesnt_satisfy_filter(operator_str, val, vmin, vmax):
                return False
        return True
def test_utf8():
    """Test bytes representing utf-8 string."""
    schema = pt.SchemaElement(type=pt.Type.BYTE_ARRAY,
                              name="test",
                              converted_type=pt.ConvertedType.UTF8)
    data = u"Ördög"  # conversion now happens on read
    assert convert(pd.Series([data]), schema)[0] == u"Ördög"
def test_utf8():
    """Test bytes representing utf-8 string."""
    schema = pt.SchemaElement(type=pt.Type.BYTE_ARRAY,
                              name="test",
                              converted_type=pt.ConvertedType.UTF8)
    data = b'\xc3\x96rd\xc3\xb6g'
    assert convert(pd.Series([data]), schema)[0] == u"Ördög"
def test_uint32():
    """Test decoding int32 as uint32."""
    schema = pt.SchemaElement(
        type=pt.Type.INT32,
        name="test",
        converted_type=pt.ConvertedType.UINT_32
    )
    assert convert(pd.Series([-6884376]), schema)[0] == 4288082920
def test_uint64():
    """Test decoding int64 as uint64."""
    schema = pt.SchemaElement(
        type=pt.Type.INT64,
        name="test",
        converted_type=pt.ConvertedType.UINT_64
    )
    assert convert(pd.Series([-6884376]), schema)[0] == 18446744073702667240
def test_uint32():
    """Test decoding int32 as uint32."""
    schema = pt.SchemaElement(
        type=pt.Type.INT32,
        name="test",
        converted_type=pt.ConvertedType.UINT_32
    )
    assert convert(pd.Series([-6884376]), schema)[0] == 4288082920
def test_uint16():
    """Test decoding int32 as uint16."""
    schema = pt.SchemaElement(
        type=pt.Type.INT32,
        name="test",
        converted_type=pt.ConvertedType.UINT_16
    )
    assert convert(pd.Series([-3]), schema)[0] == 65533
def test_uint16():
    """Test decoding int32 as uint16."""
    schema = pt.SchemaElement(
        type=pt.Type.INT32,
        name="test",
        converted_type=pt.ConvertedType.UINT_16
    )
    assert convert(pd.Series([-3]), schema)[0] == 65533
def test_uint64():
    """Test decoding int64 as uint64."""
    schema = pt.SchemaElement(
        type=pt.Type.INT64,
        name="test",
        converted_type=pt.ConvertedType.UINT_64
    )
    assert convert(pd.Series([-6884376]), schema)[0] == 18446744073702667240
Example #10
0
def test_time_millis():
    """Test int32 encoding a timedelta in millis."""
    schema = pt.SchemaElement(
        type=pt.Type.INT32,
        name="test",
        converted_type=pt.ConvertedType.TIME_MILLIS,
    )
    assert convert(pd.Series([731888]),
                   schema)[0] == datetime.timedelta(milliseconds=731888)
def test_json():
    """Test bytes representing json."""
    schema = pt.SchemaElement(type=pt.Type.BYTE_ARRAY,
                              name="test",
                              converted_type=pt.ConvertedType.JSON)
    assert convert(pd.Series([b'{"foo": ["bar", "\\ud83d\\udc7e"]}']),
                   schema)[0] == {
                       'foo': ['bar', u'👾']
                   }
def test_time_millis():
    """Test int32 encoding a timedelta in millis."""
    schema = pt.SchemaElement(
        type=pt.Type.INT32,
        name="test",
        converted_type=pt.ConvertedType.TIME_MILLIS,
    )
    assert (convert(np.array([731888], dtype='int32'),
                    schema)[0] == np.array([731888], dtype='timedelta64[ms]'))
def test_json():
    """Test bytes representing json."""
    schema = pt.SchemaElement(
        type=pt.Type.BYTE_ARRAY,
        name="test",
        converted_type=pt.ConvertedType.JSON
    )
    assert convert(pd.Series([b'{"foo": ["bar", "\\ud83d\\udc7e"]}']),
                          schema)[0] == {'foo': ['bar', u'👾']}
def test_int32():
    """Test decimal data stored as int32."""
    schema = pt.SchemaElement(type=pt.Type.INT32,
                              name="test",
                              converted_type=pt.ConvertedType.DECIMAL,
                              scale=10,
                              precision=9)

    assert (convert(pd.Series([9876543210]), schema)[0] - 9.87654321) < 0.01
Example #15
0
def test_date():
    """Test int32 encoding a date."""
    schema = pt.SchemaElement(
        type=pt.Type.INT32,
        name="test",
        converted_type=pt.ConvertedType.DATE,
    )
    assert (convert(pd.Series([731888]),
                    schema)[0] == pd.to_datetime([datetime.date(2004, 11, 3)]))
def test_utf8():
    """Test bytes representing utf-8 string."""
    schema = pt.SchemaElement(
        type=pt.Type.BYTE_ARRAY,
        name="test",
        converted_type=pt.ConvertedType.UTF8
    )
    data = b'\xc3\x96rd\xc3\xb6g'
    assert convert(pd.Series([data]), schema)[0] == u"Ördög"
def test_time_millis():
    """Test int32 encoding a timedelta in millis."""
    schema = pt.SchemaElement(
        type=pt.Type.INT32,
        name="test",
        converted_type=pt.ConvertedType.TIME_MILLIS,
    )
    assert (convert(np.array([731888], dtype='int32'), schema)[0] ==
            np.array([731888], dtype='timedelta64[ms]'))
Example #18
0
def test_timestamp_millis():
    """Test int64 encoding a datetime."""
    schema = pt.SchemaElement(
        type=pt.Type.INT64,
        name="test",
        converted_type=pt.ConvertedType.TIMESTAMP_MILLIS,
    )
    assert convert(pd.Series([1099511625014]),
                   schema)[0] == datetime.datetime(2004, 11, 3, 19, 53, 45,
                                                   14 * 1000)
def test_timestamp_millis():
    """Test int64 encoding a datetime."""
    schema = pt.SchemaElement(
        type=pt.Type.INT64,
        name="test",
        converted_type=pt.ConvertedType.TIMESTAMP_MILLIS,
    )
    assert (convert(np.array([1099511625014], dtype='int64'), schema)[0] ==
            np.array(datetime.datetime(2004, 11, 3, 19, 53, 45, 14 * 1000),
                dtype='datetime64[ns]'))
def test_date():
    """Test int32 encoding a date."""
    schema = pt.SchemaElement(
        type=pt.Type.INT32,
        name="test",
        converted_type=pt.ConvertedType.DATE,
    )
    days = (datetime.date(2004, 11, 3) - datetime.date(1970, 1, 1)).days
    assert (convert(pd.Series([days]), schema)[0] ==
            pd.to_datetime([datetime.date(2004, 11, 3)]))
def test_timestamp_millis():
    """Test int64 encoding a datetime."""
    schema = pt.SchemaElement(
        type=pt.Type.INT64,
        name="test",
        converted_type=pt.ConvertedType.TIMESTAMP_MILLIS,
    )
    assert (convert(np.array([1099511625014], dtype='int64'), schema)[0] ==
            np.array(datetime.datetime(2004, 11, 3, 19, 53, 45, 14 * 1000),
                dtype='datetime64[ns]'))
def test_int32():
    """Test decimal data stored as int32."""
    schema = pt.SchemaElement(
        type=pt.Type.INT32,
        name="test",
        converted_type=pt.ConvertedType.DECIMAL,
        scale=10,
        precision=9
    )

    assert (convert(pd.Series([9876543210]), schema)[0] - 9.87654321) < 0.01
def test_bson():
    """Test bytes representing bson."""
    bson = pytest.importorskip('bson')
    schema = pt.SchemaElement(
        type=pt.Type.BYTE_ARRAY,
        name="test",
        converted_type=pt.ConvertedType.BSON
    )
    assert convert(pd.Series(
            [b'&\x00\x00\x00\x04foo\x00\x1c\x00\x00\x00\x020'
             b'\x00\x04\x00\x00\x00bar\x00\x021\x00\x05\x00\x00\x00\xf0\x9f\x91\xbe\x00\x00\x00']),
            schema)[0] == {'foo': ['bar', '👾']}
def test_date():
    """Test int32 encoding a date."""
    schema = pt.SchemaElement(
        type=pt.Type.INT32,
        name="test",
        converted_type=pt.ConvertedType.DATE,
    )
    days = (datetime.date(2004, 11, 3) - datetime.date(1970, 1, 1)).days
    data = pd.Series([days]).to_numpy()
    data.flags.writeable = False
    assert (convert(data,
                    schema)[0] == pd.to_datetime([datetime.date(2004, 11, 3)]))
def test_bson():
    """Test bytes representing bson."""
    bson = pytest.importorskip('bson')
    schema = pt.SchemaElement(
        type=pt.Type.BYTE_ARRAY,
        name="test",
        converted_type=pt.ConvertedType.BSON
    )
    assert convert(pd.Series(
            [b'&\x00\x00\x00\x04foo\x00\x1c\x00\x00\x00\x020'
             b'\x00\x04\x00\x00\x00bar\x00\x021\x00\x05\x00\x00\x00\xf0\x9f\x91\xbe\x00\x00\x00']),
            schema)[0] == {'foo': ['bar', '👾']}
def test_big_decimal():
    schema = pt.SchemaElement(type=pt.Type.FIXED_LEN_BYTE_ARRAY,
                              name="test",
                              converted_type=pt.ConvertedType.DECIMAL,
                              type_length=32,
                              scale=1,
                              precision=38)
    data = np.array([
        b'', b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1e\\',
        b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1d\\',
        b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\r{',
        b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19)'
    ],
                    dtype='|S32')
    assert np.isclose(convert(data, schema),
                      np.array([0., 777.2, 751.6, 345.1, 644.1])).all()
def test_big_decimal():
    schema = pt.SchemaElement(
        type=pt.Type.FIXED_LEN_BYTE_ARRAY,
        name="test",
        converted_type=pt.ConvertedType.DECIMAL,
        type_length=32,
        scale=1,
        precision=38
    )
    data = np.array([
    b'', b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1e\\',
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1d\\',
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\r{',
    b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19)'],
            dtype='|S32')
    assert np.isclose(convert(data, schema),
                      np.array([0., 777.2, 751.6, 345.1, 644.1])).all()