コード例 #1
0
    def from_elements(self, elements, schema=None, verify_schema=True):
        """
        Creates a table from a collection of elements.

        Example:
        ::

            >>> table_env.from_elements([(1, 'Hi'), (2, 'Hello')], ['a', 'b'])

        :param elements: The elements to create a table from.
        :param schema: The schema of the table.
        :param verify_schema: Whether to verify the elements against the schema.
        :return: The result :class:`Table`.
        """

        # verifies the elements against the specified schema
        if isinstance(schema, RowType):
            verify_func = _create_type_verifier(
                schema) if verify_schema else lambda _: True

            def verify_obj(obj):
                verify_func(obj)
                return obj
        elif isinstance(schema, DataType):
            data_type = schema
            schema = RowType().add("value", schema)

            verify_func = _create_type_verifier(
                data_type,
                name="field value") if verify_schema else lambda _: True

            def verify_obj(obj):
                verify_func(obj)
                return obj
        else:

            def verify_obj(obj):
                return obj

        if "__len__" not in dir(elements):
            elements = list(elements)

        # infers the schema if not specified
        if schema is None or isinstance(schema, (list, tuple)):
            schema = _infer_schema_from_data(elements, names=schema)
            converter = _create_converter(schema)
            elements = map(converter, elements)
            if isinstance(schema, (list, tuple)):
                for i, name in enumerate(schema):
                    schema.fields[i].name = name
                    schema.names[i] = name

        elif not isinstance(schema, RowType):
            raise TypeError(
                "schema should be RowType, list, tuple or None, but got: %s" %
                schema)

        # converts python data to sql data
        elements = [schema.to_sql_type(element) for element in elements]
        return self._from_elements(map(verify_obj, elements), schema)
コード例 #2
0
    def test_explain(self):
        schema = RowType() \
            .add('a', DataTypes.INT()) \
            .add('b', DataTypes.STRING()) \
            .add('c', DataTypes.STRING())
        t_env = self.t_env
        t = t_env.from_elements([], schema)
        result = t.select(t.a + 1, t.b, t.c)

        actual = result.explain()

        assert isinstance(actual, str)
コード例 #3
0
    def test_explain_with_extended(self):
        schema = RowType() \
            .add('a', DataTypes.INT()) \
            .add('b', DataTypes.STRING()) \
            .add('c', DataTypes.STRING())
        t_env = self.t_env
        t = t_env.from_elements([], schema)
        result = t.select("1 + a, b, c")

        actual = t_env.explain(result, True)

        assert isinstance(actual, str) or isinstance(actual, unicode)
コード例 #4
0
    def test_explain_with_extended(self):
        schema = RowType() \
            .add('a', DataTypes.INT()) \
            .add('b', DataTypes.STRING()) \
            .add('c', DataTypes.STRING())
        t_env = self.t_env
        t = t_env.from_elements([], schema)
        result = t.select(t.a + 1, t.b, t.c)

        actual = result.explain(ExplainDetail.ESTIMATED_COST, ExplainDetail.CHANGELOG_MODE)

        assert isinstance(actual, str)
コード例 #5
0
    def _to_data_type(cls, field_type):
        from pyflink.fn_execution import flink_fn_execution_pb2

        if field_type.type_name == flink_fn_execution_pb2.Schema.TINYINT:
            return TinyIntType(field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.SMALLINT:
            return SmallIntType(field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.INT:
            return IntType(field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.BIGINT:
            return BigIntType(field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.BOOLEAN:
            return BooleanType(field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.FLOAT:
            return FloatType(field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.DOUBLE:
            return DoubleType(field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.VARCHAR:
            return VarCharType(0x7fffffff, field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.VARBINARY:
            return VarBinaryType(0x7fffffff, field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.DECIMAL:
            return DecimalType(field_type.decimal_info.precision,
                               field_type.decimal_info.scale,
                               field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.DATE:
            return DateType(field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.TIME:
            return TimeType(field_type.time_info.precision,
                            field_type.nullable)
        elif field_type.type_name == \
                flink_fn_execution_pb2.Schema.LOCAL_ZONED_TIMESTAMP:
            return LocalZonedTimestampType(
                field_type.local_zoned_timestamp_info.precision,
                field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.TIMESTAMP:
            return TimestampType(field_type.timestamp_info.precision,
                                 field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.BASIC_ARRAY:
            return ArrayType(
                cls._to_data_type(field_type.collection_element_type),
                field_type.nullable)
        elif field_type.type_name == flink_fn_execution_pb2.Schema.TypeName.ROW:
            return RowType([
                RowField(f.name, cls._to_data_type(f.type), f.description)
                for f in field_type.row_schema.fields
            ], field_type.nullable)
        else:
            raise ValueError("field_type %s is not supported." % field_type)
コード例 #6
0
 def _to_row_type(row_schema):
     return RowType([RowField(f.name, _to_data_type(f.type)) for f in row_schema.fields])
コード例 #7
0
    def from_elements(self, elements, schema=None, verify_schema=True):
        """
        Creates a table from a collection of elements.
        The elements types must be acceptable atomic types or acceptable composite types.
        All elements must be of the same type.
        If the elements types are composite types, the composite types must be strictly equal,
        and its subtypes must also be acceptable types.
        e.g. if the elements are tuples, the length of the tuples must be equal, the element types
        of the tuples must be equal in order.

        The built-in acceptable atomic element types contains:

        **int**, **long**, **str**, **unicode**, **bool**,
        **float**, **bytearray**, **datetime.date**, **datetime.time**, **datetime.datetime**,
        **datetime.timedelta**, **decimal.Decimal**

        The built-in acceptable composite element types contains:

        **list**, **tuple**, **dict**, **array**, :class:`pyflink.table.Row`

        If the element type is a composite type, it will be unboxed.
        e.g. table_env.from_elements([(1, 'Hi'), (2, 'Hello')]) will return a table like:

        +----+-------+
        | _1 |  _2   |
        +====+=======+
        | 1  |  Hi   |
        +----+-------+
        | 2  | Hello |
        +----+-------+

        "_1" and "_2" are generated field names.

        Example:
        ::

            # use the second parameter to specify custom field names
            >>> table_env.from_elements([(1, 'Hi'), (2, 'Hello')], ['a', 'b'])
            # use the second parameter to specify custom table schema
            >>> table_env.from_elements([(1, 'Hi'), (2, 'Hello')],
            ...                         DataTypes.ROW([DataTypes.FIELD("a", DataTypes.INT()),
            ...                                        DataTypes.FIELD("b", DataTypes.STRING())]))
            # use the thrid parameter to switch whether to verify the elements against the schema
            >>> table_env.from_elements([(1, 'Hi'), (2, 'Hello')],
            ...                         DataTypes.ROW([DataTypes.FIELD("a", DataTypes.INT()),
            ...                                        DataTypes.FIELD("b", DataTypes.STRING())]),
            ...                         False)

        :param elements: The elements to create a table from.
        :param schema: The schema of the table.
        :param verify_schema: Whether to verify the elements against the schema.
        :return: The result :class:`Table`.
        """

        # verifies the elements against the specified schema
        if isinstance(schema, RowType):
            verify_func = _create_type_verifier(
                schema) if verify_schema else lambda _: True

            def verify_obj(obj):
                verify_func(obj)
                return obj
        elif isinstance(schema, DataType):
            data_type = schema
            schema = RowType().add("value", schema)

            verify_func = _create_type_verifier(
                data_type,
                name="field value") if verify_schema else lambda _: True

            def verify_obj(obj):
                verify_func(obj)
                return obj
        else:

            def verify_obj(obj):
                return obj

        if "__len__" not in dir(elements):
            elements = list(elements)

        # infers the schema if not specified
        if schema is None or isinstance(schema, (list, tuple)):
            schema = _infer_schema_from_data(elements, names=schema)
            converter = _create_converter(schema)
            elements = map(converter, elements)
            if isinstance(schema, (list, tuple)):
                for i, name in enumerate(schema):
                    schema.fields[i].name = name
                    schema.names[i] = name

        elif not isinstance(schema, RowType):
            raise TypeError(
                "schema should be RowType, list, tuple or None, but got: %s" %
                schema)

        # verifies the elements against the specified schema
        elements = map(verify_obj, elements)
        # converts python data to sql data
        elements = [schema.to_sql_type(element) for element in elements]
        return self._from_elements(elements, schema)