def test_schema(self): table_schema = TableSchema(["a", "b"], [DataTypes.INT(), DataTypes.STRING()]) schema = Schema().schema(table_schema) properties = schema.to_properties() expected = {'schema.0.name': 'a', 'schema.0.data-type': 'INT', 'schema.1.name': 'b', 'schema.1.data-type': 'VARCHAR(2147483647)'} self.assertEqual(expected, properties)
def get_schema(self) -> TableSchema: """ Get the schema of the table. :return: Schema of the table/view. . note:: Deprecated in 1.14. This method returns the deprecated TableSchema class. The old class was a hybrid of resolved and unresolved schema information. It has been replaced by the new Schema which is always unresolved and will be resolved by the framework later. """ return TableSchema( j_table_schema=self._j_catalog_base_table.getSchema())
def test_schema(): schema = Schema() table_schema = TableSchema(["a", "b"], [DataTypes.INT(), DataTypes.STRING()]) schema = schema.schema(table_schema) properties = schema.to_properties() expected = {'schema.0.name': 'a', 'schema.0.type': 'INT', 'schema.1.name': 'b', 'schema.1.type': 'VARCHAR'} assert properties == expected
def test_get_field_data_type(self): schema = TableSchema(["a", "b", "c"], [DataTypes.INT(), DataTypes.BIGINT(), DataTypes.STRING()]) type_by_name = schema.get_field_data_type("b") type_by_index = schema.get_field_data_type(2) type_by_name_not_exist = schema.get_field_data_type("d") type_by_index_not_exist = schema.get_field_data_type(6) with self.assertRaises(TypeError): schema.get_field_data_type(None) self.assertEqual(DataTypes.BIGINT(), type_by_name) self.assertEqual(DataTypes.STRING(), type_by_index) self.assertIsNone(type_by_name_not_exist) self.assertIsNone(type_by_index_not_exist)
def test_get_schema(self): t_env = self.t_env t = t_env.from_elements([(1, 'Hi', 'Hello'), (2, 'Hello', 'Hello'), (2, 'Hello', 'Hello')], ['a', 'b', 'c']) field_names = ["a", "b"] field_types = [DataTypes.BIGINT(), DataTypes.STRING()] t_env.register_table_sink( "Results", field_names, field_types, source_sink_utils.TestRetractSink()) result = t.group_by("c").select("a.sum as a, c as b") schema = result.get_schema() assert schema == TableSchema(["a", "b"], [DataTypes.BIGINT(), DataTypes.STRING()])
def test_schema(self): csv = OldCsv() schema = TableSchema(["a", "b"], [DataTypes.INT(), DataTypes.STRING()]) csv = csv.schema(schema) properties = csv.to_properties() expected = {'format.fields.0.name': 'a', 'format.fields.0.data-type': 'INT', 'format.fields.1.name': 'b', 'format.fields.1.data-type': 'VARCHAR(2147483647)', 'format.type': 'csv', 'format.property-version': '1'} self.assertEqual(expected, properties)
def test_schema(): csv = OldCsv() schema = TableSchema(["a", "b"], [DataTypes.INT(), DataTypes.STRING()]) csv = csv.schema(schema) properties = csv.to_properties() expected = {'format.fields.0.name': 'a', 'format.fields.0.type': 'INT', 'format.fields.1.name': 'b', 'format.fields.1.type': 'VARCHAR', 'format.type': 'csv', 'format.property-version': '1'} assert properties == expected
def table_java_inference(): prep_data() testing_server = start_zk_server() generate_model() stream_env = StreamExecutionEnvironment.get_execution_environment() table_env = TableEnvironment.get_table_environment(stream_env) stream_env.set_parallelism(2) train_data_path = "file://" + get_root_path( ) + "/examples/target/data/test/" paths = [train_data_path + "0.tfrecords", train_data_path + "1.tfrecords"] src_row_type = RowType([StringType(), IntegerType()], ["image_raw", "label"]) table_src = TFRTableSource( paths=paths, epochs=1, out_row_type=src_row_type, converters=[ScalarConverter.FIRST, ScalarConverter.ONE_HOT]) tfr_tbl_name = "tfr_input_table" table_env.register_table_source(tfr_tbl_name, table_src) ext_func_name = "tfr_extract" table_env.register_function(ext_func_name, java_inference_extract_func()) out_cols = 'image,org_label' in_cols = ','.join(src_row_type.fields_names) extracted = table_env.sql_query( 'select {} from {}, LATERAL TABLE({}({})) as T({})'.format( out_cols, tfr_tbl_name, ext_func_name, in_cols, out_cols)) builder = TableSchema.Builder() builder.column(name='real_label', data_type=LongType()).column(name='predicted_label', data_type=LongType()) output_schema = builder.build() props = build_props('0') props[TF_INFERENCE_EXPORT_PATH] = export_path props[TF_INFERENCE_INPUT_TENSOR_NAMES] = 'image' props[TF_INFERENCE_OUTPUT_TENSOR_NAMES] = 'prediction' props[TF_INFERENCE_OUTPUT_ROW_FIELDS] = ','.join( ['org_label', 'prediction']) output_table = tensorflow_on_flink_table.inference( num_worker=2, properties=props, stream_env=stream_env, table_env=table_env, input_table=extracted, output_schema=output_schema) output_table.write_to_sink(LogInferAccSink()) table_env.generate_stream_graph() stream_env.execute() testing_server.stop()
def get_table_schema(self): """ Get the schema of result. The schema of DDL, USE, EXPLAIN: :: +-------------+-------------+----------+ | column name | column type | comments | +-------------+-------------+----------+ | result | STRING | | +-------------+-------------+----------+ The schema of SHOW: :: +---------------+-------------+----------+ | column name | column type | comments | +---------------+-------------+----------+ | <object name> | STRING | | +---------------+-------------+----------+ The column name of `SHOW CATALOGS` is "catalog name", the column name of `SHOW DATABASES` is "database name", the column name of `SHOW TABLES` is "table name", the column name of `SHOW VIEWS` is "view name", the column name of `SHOW FUNCTIONS` is "function name". The schema of DESCRIBE: :: +------------------+-------------+-------------------------------------------------+ | column name | column type | comments | +------------------+-------------+-------------------------------------------------+ | name | STRING | field name | +------------------+-------------+-------------------------------------------------+ | type | STRING | field type expressed as a String | +------------------+-------------+-------------------------------------------------+ | null | BOOLEAN | field nullability: true if a field is nullable, | | | | else false | +------------------+-------------+-------------------------------------------------+ | key | BOOLEAN | key constraint: 'PRI' for primary keys, | | | | 'UNQ' for unique keys, else null | +------------------+-------------+-------------------------------------------------+ | computed column | STRING | computed column: string expression | | | | if a field is computed column, else null | +------------------+-------------+-------------------------------------------------+ | watermark | STRING | watermark: string expression if a field is | | | | watermark, else null | +------------------+-------------+-------------------------------------------------+ The schema of INSERT: (one column per one sink) :: +----------------------------+-------------+-----------------------+ | column name | column type | comments | +----------------------------+-------------+-----------------------+ | (name of the insert table) | BIGINT | the insert table name | +----------------------------+-------------+-----------------------+ The schema of SELECT is the selected field names and types. :return: The schema of result. :rtype: pyflink.table.TableSchema .. versionadded:: 1.11.0 """ return TableSchema( j_table_schema=self._j_table_result.getTableSchema())
def test_repr(self): schema = TableSchema(["a", "b", "c"], [DataTypes.INT(), DataTypes.BIGINT(), DataTypes.STRING()]) expected = "root\n |-- a: INT\n |-- b: BIGINT\n |-- c: STRING\n" self.assertEqual(expected, repr(schema))
def test_get_schema(self): t = self.t_env.from_elements([(1, 'Hi', 'Hello')], ['a', 'b', 'c']) result = t.group_by("c").select("a.sum as a, c as b") schema = result.get_schema() assert schema == TableSchema(["a", "b"], [DataTypes.BIGINT(), DataTypes.STRING()])