def test_verify_type_ok_nullable(self): obj = None types = [DataTypes.INT(), DataTypes.FLOAT(), DataTypes.VARCHAR(), DataTypes.ROW([])] for data_type in types: try: _create_type_verifier(data_type)(obj) except (TypeError, ValueError): self.fail("verify_type(%s, %s, nullable=True)" % (obj, data_type))
def test_verify_type_exception_msg(self): self.assertRaises( ValueError, lambda: _create_type_verifier( DataTypes.VARCHAR(nullable=False), name="test_name")(None)) schema = DataTypes.ROW( [DataTypes.FIELD('a', DataTypes.ROW([DataTypes.FIELD('b', DataTypes.INT())]))]) self.assertRaises( TypeError, lambda: _create_type_verifier(schema)([["data"]]))
def test_verify_type_exception_msg(self): self.assertRaises( ValueError, lambda: _create_type_verifier( DataTypes.STRING(nullable=False), name="test_name")(None)) schema = DataTypes.ROW( [DataTypes.FIELD('a', DataTypes.ROW([DataTypes.FIELD('b', DataTypes.INT())]))]) self.assertRaises( TypeError, lambda: _create_type_verifier(schema)([["data"]]))
def test_verify_type_ok_nullable(self): obj = None types = [ DataTypes.INT(), DataTypes.FLOAT(), DataTypes.STRING(), DataTypes.ROW([]) ] for data_type in types: try: _create_type_verifier(data_type)(obj) except (TypeError, ValueError): self.fail("verify_type(%s, %s, nullable=True)" % (obj, data_type))
def test_udt(self): p = ExamplePoint(1.0, 2.0) self.assertEqual(_infer_type(p), ExamplePointUDT()) _create_type_verifier(ExamplePointUDT())(ExamplePoint(1.0, 2.0)) self.assertRaises(ValueError, lambda: _create_type_verifier(ExamplePointUDT())([1.0, 2.0])) p = PythonOnlyPoint(1.0, 2.0) self.assertEqual(_infer_type(p), PythonOnlyUDT()) _create_type_verifier(PythonOnlyUDT())(PythonOnlyPoint(1.0, 2.0)) self.assertRaises(ValueError, lambda: _create_type_verifier(PythonOnlyUDT())([1.0, 2.0]))
def test_verify_type_not_nullable(self): import array import datetime import decimal schema = DataTypes.ROW([ DataTypes.FIELD('s', DataTypes.VARCHAR(nullable=False)), DataTypes.FIELD('i', DataTypes.INT(True))]) class MyObj: def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) # obj, data_type success_spec = [ # String ("", DataTypes.VARCHAR()), (u"", DataTypes.VARCHAR()), # UDT (ExamplePoint(1.0, 2.0), ExamplePointUDT()), # Boolean (True, DataTypes.BOOLEAN()), # TinyInt (-(2 ** 7), DataTypes.TINYINT()), (2 ** 7 - 1, DataTypes.TINYINT()), # SmallInt (-(2 ** 15), DataTypes.SMALLINT()), (2 ** 15 - 1, DataTypes.SMALLINT()), # Int (-(2 ** 31), DataTypes.INT()), (2 ** 31 - 1, DataTypes.INT()), # BigInt (2 ** 64, DataTypes.BIGINT()), # Float & Double (1.0, DataTypes.FLOAT()), (1.0, DataTypes.DOUBLE()), # Decimal (decimal.Decimal("1.0"), DataTypes.DECIMAL()), # Binary (bytearray([1]), DataTypes.BINARY()), # Date/Time/Timestamp (datetime.date(2000, 1, 2), DataTypes.DATE()), (datetime.datetime(2000, 1, 2, 3, 4), DataTypes.DATE()), (datetime.time(1, 1, 2), DataTypes.TIME()), (datetime.datetime(2000, 1, 2, 3, 4), DataTypes.TIMESTAMP()), # Array ([], DataTypes.ARRAY(DataTypes.INT())), (["1", None], DataTypes.ARRAY(DataTypes.VARCHAR(nullable=True))), ([1, 2], DataTypes.ARRAY(DataTypes.INT())), ((1, 2), DataTypes.ARRAY(DataTypes.INT())), (array.array('h', [1, 2]), DataTypes.ARRAY(DataTypes.INT())), # Map ({}, DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.INT())), ({"a": 1}, DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.INT())), ({"a": None}, DataTypes.MAP(DataTypes.VARCHAR(nullable=False), DataTypes.INT(True))), # Struct ({"s": "a", "i": 1}, schema), ({"s": "a", "i": None}, schema), ({"s": "a"}, schema), ({"s": "a", "f": 1.0}, schema), (Row(s="a", i=1), schema), (Row(s="a", i=None), schema), (Row(s="a", i=1, f=1.0), schema), (["a", 1], schema), (["a", None], schema), (("a", 1), schema), (MyObj(s="a", i=1), schema), (MyObj(s="a", i=None), schema), (MyObj(s="a"), schema), ] # obj, data_type, exception class failure_spec = [ # Char/VarChar (match anything but None) (None, DataTypes.VARCHAR(), ValueError), (None, DataTypes.CHAR(), ValueError), # VarChar (length exceeds maximum length) ("abc", DataTypes.VARCHAR(), ValueError), # Char (length exceeds length) ("abc", DataTypes.CHAR(), ValueError), # UDT (ExamplePoint(1.0, 2.0), PythonOnlyUDT(), ValueError), # Boolean (1, DataTypes.BOOLEAN(), TypeError), ("True", DataTypes.BOOLEAN(), TypeError), ([1], DataTypes.BOOLEAN(), TypeError), # TinyInt (-(2 ** 7) - 1, DataTypes.TINYINT(), ValueError), (2 ** 7, DataTypes.TINYINT(), ValueError), ("1", DataTypes.TINYINT(), TypeError), (1.0, DataTypes.TINYINT(), TypeError), # SmallInt (-(2 ** 15) - 1, DataTypes.SMALLINT(), ValueError), (2 ** 15, DataTypes.SMALLINT(), ValueError), # Int (-(2 ** 31) - 1, DataTypes.INT(), ValueError), (2 ** 31, DataTypes.INT(), ValueError), # Float & Double (1, DataTypes.FLOAT(), TypeError), (1, DataTypes.DOUBLE(), TypeError), # Decimal (1.0, DataTypes.DECIMAL(), TypeError), (1, DataTypes.DECIMAL(), TypeError), ("1.0", DataTypes.DECIMAL(), TypeError), # Binary (1, DataTypes.BINARY(), TypeError), # VarBinary (length exceeds maximum length) (bytearray([1, 2]), DataTypes.VARBINARY(), ValueError), # Char (length exceeds length) (bytearray([1, 2]), DataTypes.BINARY(), ValueError), # Date/Time/Timestamp ("2000-01-02", DataTypes.DATE(), TypeError), ("10:01:02", DataTypes.TIME(), TypeError), (946811040, DataTypes.TIMESTAMP(), TypeError), # Array (["1", None], DataTypes.ARRAY(DataTypes.VARCHAR(nullable=False)), ValueError), ([1, "2"], DataTypes.ARRAY(DataTypes.INT()), TypeError), # Map ({"a": 1}, DataTypes.MAP(DataTypes.INT(), DataTypes.INT()), TypeError), ({"a": "1"}, DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.INT()), TypeError), ({"a": None}, DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.INT(False)), ValueError), # Struct ({"s": "a", "i": "1"}, schema, TypeError), (Row(s="a"), schema, ValueError), # Row can't have missing field (Row(s="a", i="1"), schema, TypeError), (["a"], schema, ValueError), (["a", "1"], schema, TypeError), (MyObj(s="a", i="1"), schema, TypeError), (MyObj(s=None, i="1"), schema, ValueError), ] # Check success cases for obj, data_type in success_spec: try: _create_type_verifier(data_type.not_null())(obj) except (TypeError, ValueError): self.fail("verify_type(%s, %s, nullable=False)" % (obj, data_type)) # Check failure cases for obj, data_type, exp in failure_spec: msg = "verify_type(%s, %s, nullable=False) == %s" % (obj, data_type, exp) with self.assertRaises(exp, msg=msg): _create_type_verifier(data_type.not_null())(obj)
def test_verify_type_not_nullable(self): import array import datetime import decimal schema = DataTypes.ROW([ DataTypes.FIELD('s', DataTypes.STRING(nullable=False)), DataTypes.FIELD('i', DataTypes.INT(True)) ]) class MyObj: def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) # obj, data_type success_spec = [ # String ("", DataTypes.STRING()), (u"", DataTypes.STRING()), # UDT (ExamplePoint(1.0, 2.0), ExamplePointUDT()), # Boolean (True, DataTypes.BOOLEAN()), # TinyInt (-(2**7), DataTypes.TINYINT()), (2**7 - 1, DataTypes.TINYINT()), # SmallInt (-(2**15), DataTypes.SMALLINT()), (2**15 - 1, DataTypes.SMALLINT()), # Int (-(2**31), DataTypes.INT()), (2**31 - 1, DataTypes.INT()), # BigInt (2**64, DataTypes.BIGINT()), # Float & Double (1.0, DataTypes.FLOAT()), (1.0, DataTypes.DOUBLE()), # Decimal (decimal.Decimal("1.0"), DataTypes.DECIMAL(10, 0)), # Binary (bytearray([1]), DataTypes.BINARY(1)), # Date/Time/Timestamp (datetime.date(2000, 1, 2), DataTypes.DATE()), (datetime.datetime(2000, 1, 2, 3, 4), DataTypes.DATE()), (datetime.time(1, 1, 2), DataTypes.TIME()), (datetime.datetime(2000, 1, 2, 3, 4), DataTypes.TIMESTAMP()), # Array ([], DataTypes.ARRAY(DataTypes.INT())), (["1", None], DataTypes.ARRAY(DataTypes.STRING(nullable=True))), ([1, 2], DataTypes.ARRAY(DataTypes.INT())), ((1, 2), DataTypes.ARRAY(DataTypes.INT())), (array.array('h', [1, 2]), DataTypes.ARRAY(DataTypes.INT())), # Map ({}, DataTypes.MAP(DataTypes.STRING(), DataTypes.INT())), ({ "a": 1 }, DataTypes.MAP(DataTypes.STRING(), DataTypes.INT())), ({ "a": None }, DataTypes.MAP(DataTypes.STRING(nullable=False), DataTypes.INT(True))), # Struct ({ "s": "a", "i": 1 }, schema), ({ "s": "a", "i": None }, schema), ({ "s": "a" }, schema), ({ "s": "a", "f": 1.0 }, schema), (Row(s="a", i=1), schema), (Row(s="a", i=None), schema), (Row(s="a", i=1, f=1.0), schema), (["a", 1], schema), (["a", None], schema), (("a", 1), schema), (MyObj(s="a", i=1), schema), (MyObj(s="a", i=None), schema), (MyObj(s="a"), schema), ] # obj, data_type, exception class failure_spec = [ # Char/VarChar (match anything but None) (None, DataTypes.VARCHAR(1), ValueError), (None, DataTypes.CHAR(1), ValueError), # VarChar (length exceeds maximum length) ("abc", DataTypes.VARCHAR(1), ValueError), # Char (length exceeds length) ("abc", DataTypes.CHAR(1), ValueError), # UDT (ExamplePoint(1.0, 2.0), PythonOnlyUDT(), ValueError), # Boolean (1, DataTypes.BOOLEAN(), TypeError), ("True", DataTypes.BOOLEAN(), TypeError), ([1], DataTypes.BOOLEAN(), TypeError), # TinyInt (-(2**7) - 1, DataTypes.TINYINT(), ValueError), (2**7, DataTypes.TINYINT(), ValueError), ("1", DataTypes.TINYINT(), TypeError), (1.0, DataTypes.TINYINT(), TypeError), # SmallInt (-(2**15) - 1, DataTypes.SMALLINT(), ValueError), (2**15, DataTypes.SMALLINT(), ValueError), # Int (-(2**31) - 1, DataTypes.INT(), ValueError), (2**31, DataTypes.INT(), ValueError), # Float & Double (1, DataTypes.FLOAT(), TypeError), (1, DataTypes.DOUBLE(), TypeError), # Decimal (1.0, DataTypes.DECIMAL(10, 0), TypeError), (1, DataTypes.DECIMAL(10, 0), TypeError), ("1.0", DataTypes.DECIMAL(10, 0), TypeError), # Binary (1, DataTypes.BINARY(1), TypeError), # VarBinary (length exceeds maximum length) (bytearray([1, 2]), DataTypes.VARBINARY(1), ValueError), # Char (length exceeds length) (bytearray([1, 2]), DataTypes.BINARY(1), ValueError), # Date/Time/Timestamp ("2000-01-02", DataTypes.DATE(), TypeError), ("10:01:02", DataTypes.TIME(), TypeError), (946811040, DataTypes.TIMESTAMP(), TypeError), # Array (["1", None], DataTypes.ARRAY(DataTypes.VARCHAR(1, nullable=False)), ValueError), ([1, "2"], DataTypes.ARRAY(DataTypes.INT()), TypeError), # Map ({ "a": 1 }, DataTypes.MAP(DataTypes.INT(), DataTypes.INT()), TypeError), ({ "a": "1" }, DataTypes.MAP(DataTypes.VARCHAR(1), DataTypes.INT()), TypeError), ({ "a": None }, DataTypes.MAP(DataTypes.VARCHAR(1), DataTypes.INT(False)), ValueError), # Struct ({ "s": "a", "i": "1" }, schema, TypeError), (Row(s="a"), schema, ValueError), # Row can't have missing field (Row(s="a", i="1"), schema, TypeError), (["a"], schema, ValueError), (["a", "1"], schema, TypeError), (MyObj(s="a", i="1"), schema, TypeError), (MyObj(s=None, i="1"), schema, ValueError), ] # Check success cases for obj, data_type in success_spec: try: _create_type_verifier(data_type.not_null())(obj) except (TypeError, ValueError): self.fail("verify_type(%s, %s, nullable=False)" % (obj, data_type)) # Check failure cases for obj, data_type, exp in failure_spec: msg = "verify_type(%s, %s, nullable=False) == %s" % ( obj, data_type, exp) with self.assertRaises(exp, msg=msg): _create_type_verifier(data_type.not_null())(obj)
def from_elements(self, elements, schema=None, verify_schema=True): """ Creates a table from a collection of elements. The elements types must be acceptable atomic types or acceptable composite types. All elements must be of the same type. If the elements types are composite types, the composite types must be strictly equal, and its subtypes must also be acceptable types. e.g. if the elements are tuples, the length of the tuples must be equal, the element types of the tuples must be equal in order. The built-in acceptable atomic element types contains: **int**, **long**, **str**, **unicode**, **bool**, **float**, **bytearray**, **datetime.date**, **datetime.time**, **datetime.datetime**, **datetime.timedelta**, **decimal.Decimal** The built-in acceptable composite element types contains: **list**, **tuple**, **dict**, **array**, :class:`pyflink.table.Row` If the element type is a composite type, it will be unboxed. e.g. table_env.from_elements([(1, 'Hi'), (2, 'Hello')]) will return a table like: +----+-------+ | _1 | _2 | +====+=======+ | 1 | Hi | +----+-------+ | 2 | Hello | +----+-------+ "_1" and "_2" are generated field names. Example: :: # use the second parameter to specify custom field names >>> table_env.from_elements([(1, 'Hi'), (2, 'Hello')], ['a', 'b']) # use the second parameter to specify custom table schema >>> table_env.from_elements([(1, 'Hi'), (2, 'Hello')], ... DataTypes.ROW([DataTypes.FIELD("a", DataTypes.INT()), ... DataTypes.FIELD("b", DataTypes.STRING())])) # use the thrid parameter to switch whether to verify the elements against the schema >>> table_env.from_elements([(1, 'Hi'), (2, 'Hello')], ... DataTypes.ROW([DataTypes.FIELD("a", DataTypes.INT()), ... DataTypes.FIELD("b", DataTypes.STRING())]), ... False) :param elements: The elements to create a table from. :param schema: The schema of the table. :param verify_schema: Whether to verify the elements against the schema. :return: The result :class:`Table`. """ # verifies the elements against the specified schema if isinstance(schema, RowType): verify_func = _create_type_verifier( schema) if verify_schema else lambda _: True def verify_obj(obj): verify_func(obj) return obj elif isinstance(schema, DataType): data_type = schema schema = RowType().add("value", schema) verify_func = _create_type_verifier( data_type, name="field value") if verify_schema else lambda _: True def verify_obj(obj): verify_func(obj) return obj else: def verify_obj(obj): return obj if "__len__" not in dir(elements): elements = list(elements) # infers the schema if not specified if schema is None or isinstance(schema, (list, tuple)): schema = _infer_schema_from_data(elements, names=schema) converter = _create_converter(schema) elements = map(converter, elements) if isinstance(schema, (list, tuple)): for i, name in enumerate(schema): schema.fields[i].name = name schema.names[i] = name elif not isinstance(schema, RowType): raise TypeError( "schema should be RowType, list, tuple or None, but got: %s" % schema) # verifies the elements against the specified schema elements = map(verify_obj, elements) # converts python data to sql data elements = [schema.to_sql_type(element) for element in elements] return self._from_elements(elements, schema)
def from_elements(self, elements, schema=None, verify_schema=True): """ Creates a table from a collection of elements. Example: :: >>> table_env.from_elements([(1, 'Hi'), (2, 'Hello')], ['a', 'b']) :param elements: The elements to create a table from. :param schema: The schema of the table. :param verify_schema: Whether to verify the elements against the schema. :return: The result :class:`Table`. """ # verifies the elements against the specified schema if isinstance(schema, RowType): verify_func = _create_type_verifier( schema) if verify_schema else lambda _: True def verify_obj(obj): verify_func(obj) return obj elif isinstance(schema, DataType): data_type = schema schema = RowType().add("value", schema) verify_func = _create_type_verifier( data_type, name="field value") if verify_schema else lambda _: True def verify_obj(obj): verify_func(obj) return obj else: def verify_obj(obj): return obj if "__len__" not in dir(elements): elements = list(elements) # infers the schema if not specified if schema is None or isinstance(schema, (list, tuple)): schema = _infer_schema_from_data(elements, names=schema) converter = _create_converter(schema) elements = map(converter, elements) if isinstance(schema, (list, tuple)): for i, name in enumerate(schema): schema.fields[i].name = name schema.names[i] = name elif not isinstance(schema, RowType): raise TypeError( "schema should be RowType, list, tuple or None, but got: %s" % schema) # verifies the elements against the specified schema elements = map(verify_obj, elements) # converts python data to sql data elements = [schema.to_sql_type(element) for element in elements] return self._from_elements(elements, schema)