Ejemplo n.º 1
0
    def _test_scalar_type(self, spark_type, numpy_type, bits):
        codec = ScalarCodec(spark_type())
        field = UnischemaField(name='field_int', numpy_dtype=numpy_type, shape=(), codec=codec, nullable=False)

        min_val, max_val = -2 ** (bits - 1), 2 ** (bits - 1) - 1
        self.assertEqual(codec.decode(field, codec.encode(field, numpy_type(min_val))), min_val)
        self.assertEqual(codec.decode(field, codec.encode(field, numpy_type(max_val))), max_val)
        self.assertNotEqual(codec.decode(field, codec.encode(field, numpy_type(min_val))), min_val - 1)
Ejemplo n.º 2
0
def test_bad_unischema_field_shape():
    codec = ScalarCodec(IntegerType())
    field = UnischemaField(name='field_int',
                           numpy_dtype=np.int32,
                           shape=(1, ),
                           codec=codec,
                           nullable=False)
    with pytest.raises(ValueError, match='must be an empty tuple'):
        codec.encode(field, np.int32(1))
Ejemplo n.º 3
0
def test_encode_non_scalar_type_is_passed(non_scalar_value):
    codec = ScalarCodec(FloatType())
    field = UnischemaField(name='field_float',
                           numpy_dtype=np.float32,
                           shape=(),
                           codec=codec,
                           nullable=False)
    with pytest.raises(TypeError, match='Expected a scalar'):
        codec.encode(field, non_scalar_value)
Ejemplo n.º 4
0
def test_unicode():
    codec = ScalarCodec(StringType())
    field = UnischemaField(name='field_string',
                           numpy_dtype=np.unicode_,
                           shape=(),
                           codec=codec,
                           nullable=False)

    assert codec.decode(field, codec.encode(field, 'abc')) == 'abc'
    assert codec.decode(field, codec.encode(field, '')) == ''
Ejemplo n.º 5
0
def test_encode_scalar_bool():
    codec = ScalarCodec(BooleanType())
    field = UnischemaField(name='field_bool', numpy_dtype=np.bool, shape=(), codec=codec, nullable=False)

    encoded = codec.encode(field, np.bool_(True))
    assert isinstance(codec.encode(field, encoded), bool)
    assert encoded

    encoded = codec.encode(field, np.bool_(False))
    assert not encoded
Ejemplo n.º 6
0
    def test_scalar_codec_unicode(self):
        codec = ScalarCodec(StringType())
        field = UnischemaField(name='field_string',
                               numpy_dtype=np.unicode_,
                               shape=(),
                               codec=codec,
                               nullable=False)

        self.assertEqual(codec.decode(field, codec.encode(field, 'abc')),
                         'abc')
        self.assertEqual(codec.decode(field, codec.encode(field, '')), '')
Ejemplo n.º 7
0
def test_numeric_types(spark_numpy_types):
    spark_type, numpy_type = spark_numpy_types

    codec = ScalarCodec(spark_type())
    field = UnischemaField(name='field_int',
                           numpy_dtype=numpy_type,
                           shape=(),
                           codec=codec,
                           nullable=False)

    min_val, max_val = np.iinfo(numpy_type).min, np.iinfo(numpy_type).max

    assert codec.decode(field, codec.encode(field,
                                            numpy_type(min_val))) == min_val
    assert codec.decode(field, codec.encode(field,
                                            numpy_type(max_val))) == max_val
Ejemplo n.º 8
0
def test_encode_scalar_string():
    codec = ScalarCodec(StringType())
    expected = 'surprise'
    field = UnischemaField(name='field_string', numpy_dtype=np.unicode_, shape=(), codec=codec, nullable=False)
    encoded = codec.encode(field, expected)
    assert isinstance(encoded, str)
    assert expected == encoded
Ejemplo n.º 9
0
def test_encode_scalar_float():
    codec = ScalarCodec(FloatType())
    expected = np.random.random(()).astype(np.float64)
    field = UnischemaField(name='field_float', numpy_dtype=np.float32, shape=(), codec=codec, nullable=False)
    encoded = codec.encode(field, expected)
    assert isinstance(encoded, float)
    assert expected == encoded
Ejemplo n.º 10
0
def test_bad_encoded_data_shape():
    codec = ScalarCodec(IntegerType())
    field = UnischemaField(name='field_int',
                           numpy_dtype=np.int32,
                           shape=(),
                           codec=codec,
                           nullable=False)
    with pytest.raises(TypeError):
        codec.decode(field, codec.encode(field, np.asarray([10, 10])))
Ejemplo n.º 11
0
def test_scalar_codec_decimal():
    codec = ScalarCodec(DecimalType(4, 3))
    field = UnischemaField(name='field_decimal',
                           numpy_dtype=Decimal,
                           shape=(),
                           codec=codec,
                           nullable=False)

    value = Decimal('123.4567')
    assert codec.decode(field, codec.encode(field, value)) == value
Ejemplo n.º 12
0
def test_encode_scalar_int():
    codec = ScalarCodec(IntegerType())
    field = UnischemaField(name='field_int',
                           numpy_dtype=np.int32,
                           shape=(),
                           codec=codec,
                           nullable=False)
    encoded = codec.encode(field, np.int32(42))
    assert isinstance(encoded, int)
    assert 42 == encoded