Ejemplo n.º 1
0
def call(f: Union[str, UserDefinedFunctionWrapper], *args) -> Expression:
    """
    The first parameter `f` could be a str or a Python user-defined function.

    When it is str, this is a call to a function that will be looked up in a catalog. There
    are two kinds of functions:

        - System functions - which are identified with one part names
        - Catalog functions - which are identified always with three parts names
            (catalog, database, function)

    Moreover each function can either be a temporary function or permanent one
    (which is stored in an external catalog).

    Based on that two properties the resolution order for looking up a function based on
    the provided `function_name` is following:

        - Temporary system function
        - System function
        - Temporary catalog function
        - Catalog function

    :param f: the path of the function or the Python user-defined function.
    :param args: parameters of the user-defined function.
    """
    gateway = get_gateway()

    if isinstance(f, str):
        return Expression(gateway.jvm.Expressions.call(
            f, to_jarray(gateway.jvm.Object, [_get_java_expression(arg) for arg in args])))

    def get_function_definition(f):
        if isinstance(f, UserDefinedTableFunctionWrapper):
            """
            TypeInference was not supported for TableFunction in the old planner. Use
            TableFunctionDefinition to work around this issue.
            """
            j_result_types = to_jarray(gateway.jvm.TypeInformation,
                                       [_to_java_type(i) for i in f._result_types])
            j_result_type = gateway.jvm.org.apache.flink.api.java.typeutils.RowTypeInfo(
                j_result_types)
            return gateway.jvm.org.apache.flink.table.functions.TableFunctionDefinition(
                'f', f.java_user_defined_function(), j_result_type)
        else:
            return f.java_user_defined_function()

    expressions_clz = load_java_class("org.apache.flink.table.api.Expressions")
    function_definition_clz = load_java_class('org.apache.flink.table.functions.FunctionDefinition')
    j_object_array_type = to_jarray(gateway.jvm.Object, []).getClass()

    api_call_method = expressions_clz.getDeclaredMethod(
        "apiCall",
        to_jarray(gateway.jvm.Class, [function_definition_clz, j_object_array_type]))
    api_call_method.setAccessible(True)

    return Expression(api_call_method.invoke(
        None,
        to_jarray(gateway.jvm.Object,
                  [get_function_definition(f),
                   to_jarray(gateway.jvm.Object, [_get_java_expression(arg) for arg in args])])))
Ejemplo n.º 2
0
    def __init__(self,
                 record_class: str = None,
                 avro_schema_string: str = None):
        """
        Creates AvroSerializationSchema that serializes SpecificRecord using provided schema or
        record class.

        :param record_class: Avro record class used to serialize  Flink's row to Avro's record.
        :param avro_schema_string: Avro schema string to serialize Flink's row to Avro's record.
        """
        if avro_schema_string is None and record_class is None:
            raise TypeError(
                "record_class or avro_schema_string should be specified.")

        j_serialization_schema = None
        if record_class is not None:
            gateway = get_gateway()
            java_import(gateway.jvm, record_class)
            j_record_class = load_java_class(record_class)
            JAvroRowSerializationSchema = get_gateway().jvm \
                .org.apache.flink.formats.avro.AvroRowSerializationSchema
            j_serialization_schema = JAvroRowSerializationSchema(
                j_record_class)

        elif avro_schema_string is not None:
            JAvroRowSerializationSchema = get_gateway().jvm \
                .org.apache.flink.formats.avro.AvroRowSerializationSchema
            j_serialization_schema = JAvroRowSerializationSchema(
                avro_schema_string)

        super(AvroRowSerializationSchema,
              self).__init__(j_serialization_schema)
Ejemplo n.º 3
0
    def add_default_kryo_serializer(self, type_class_name, serializer_class_name):
        """
        Adds a new Kryo default serializer to the Runtime.

        Example:
        ::

            >>> env.add_default_kryo_serializer("com.aaa.bbb.TypeClass", "com.aaa.bbb.Serializer")

        :param type_class_name: The full-qualified java class name of the types serialized with the
                                given serializer.
        :param serializer_class_name: The full-qualified java class name of the serializer to use.
        """
        type_clz = load_java_class(type_class_name)
        j_serializer_clz = load_java_class(serializer_class_name)
        self._j_stream_execution_environment.addDefaultKryoSerializer(type_clz, j_serializer_clz)
Ejemplo n.º 4
0
    def __init__(self,
                 record_class: str = None,
                 avro_schema_string: str = None):
        """
        Creates an Avro deserialization schema for the given specific record class or Avro schema
        string. Having the concrete Avro record class might improve performance.

        :param record_class: Avro record class used to deserialize Avro's record to Flink's row.
        :param avro_schema_string: Avro schema string to deserialize Avro's record to Flink's row.
        """

        if avro_schema_string is None and record_class is None:
            raise TypeError(
                "record_class or avro_schema_string should be specified.")
        j_deserialization_schema = None
        if record_class is not None:
            gateway = get_gateway()
            java_import(gateway.jvm, record_class)
            j_record_class = load_java_class(record_class)
            JAvroRowDeserializationSchema = get_gateway().jvm \
                .org.apache.flink.formats.avro.AvroRowDeserializationSchema
            j_deserialization_schema = JAvroRowDeserializationSchema(
                j_record_class)

        elif avro_schema_string is not None:
            JAvroRowDeserializationSchema = get_gateway().jvm \
                .org.apache.flink.formats.avro.AvroRowDeserializationSchema
            j_deserialization_schema = JAvroRowDeserializationSchema(
                avro_schema_string)

        super(AvroRowDeserializationSchema,
              self).__init__(j_deserialization_schema)
Ejemplo n.º 5
0
 def with_bucket_assigner(
         self,
         assigner_class_name: str) -> 'StreamingFileSink.DefaultRowFormatBuilder':
     gateway = get_gateway()
     java_import(gateway.jvm, assigner_class_name)
     j_record_class = load_java_class(assigner_class_name)
     self.j_default_row_format_builder.withBucketAssigner(j_record_class)
     return self
Ejemplo n.º 6
0
    def register_type_with_kryo_serializer(self, type_class_name, serializer_class_name):
        """
        Registers the given Serializer via its class as a serializer for the given type at the
        KryoSerializer.

        Example:
        ::

            >>> config.register_type_with_kryo_serializer("com.aaa.bbb.PojoClass",
            ...                                           "com.aaa.bbb.Serializer")

        :param type_class_name: The full-qualified java class name of the types serialized with
                                the given serializer.
        :param serializer_class_name: The full-qualified java class name of the serializer to use.
        """
        type_clz = load_java_class(type_class_name)
        j_serializer_clz = load_java_class(serializer_class_name)
        self._j_execution_config.registerTypeWithKryoSerializer(type_clz, j_serializer_clz)
Ejemplo n.º 7
0
    def test_create_custom_state_backend(self):
        gateway = get_gateway()
        JConfiguration = gateway.jvm.org.apache.flink.configuration.Configuration
        j_config = JConfiguration()
        j_factory = load_java_class("org.apache.flink.streaming.runtime.tasks."
                                    "StreamTaskTest$TestMemoryStateBackendFactory").newInstance()
        context_classloader = gateway.jvm.Thread.currentThread().getContextClassLoader()
        state_backend = _from_j_state_backend(j_factory.createFromConfig(j_config,
                                                                         context_classloader))

        self.assertIsInstance(state_backend, CustomStateBackend)
Ejemplo n.º 8
0
    def add_default_kryo_serializer(
            self, type_class_name: str,
            serializer_class_name: str) -> 'ExecutionConfig':
        """
        Adds a new Kryo default serializer to the Runtime.

        Example:
        ::

            >>> config.add_default_kryo_serializer("com.aaa.bbb.PojoClass",
            ...                                    "com.aaa.bbb.Serializer")

        :param type_class_name: The full-qualified java class name of the types serialized with the
                                given serializer.
        :param serializer_class_name: The full-qualified java class name of the serializer to use.
        """
        type_clz = load_java_class(type_class_name)
        j_serializer_clz = load_java_class(serializer_class_name)
        self._j_execution_config.addDefaultKryoSerializer(
            type_clz, j_serializer_clz)
        return self
    def register_type(self, type_class_name):
        """
        Registers the given type with the serialization stack. If the type is eventually
        serialized as a POJO, then the type is registered with the POJO serializer. If the
        type ends up being serialized with Kryo, then it will be registered at Kryo to make
        sure that only tags are written.

        Example:
        ::

            >>> env.register_type("com.aaa.bbb.TypeClass")

        :param type_class_name: The full-qualified java class name of the type to register.
        """
        type_clz = load_java_class(type_class_name)
        self._j_stream_execution_environment.registerType(type_clz)
Ejemplo n.º 10
0
    def register_kryo_type(self, type_class_name: str) -> 'ExecutionConfig':
        """
        Registers the given type with the serialization stack. If the type is eventually
        serialized as a POJO, then the type is registered with the POJO serializer. If the
        type ends up being serialized with Kryo, then it will be registered at Kryo to make
        sure that only tags are written.

        Example:
        ::

            >>> config.register_kryo_type("com.aaa.bbb.KryoClass")

        :param type_class_name: The full-qualified java class name of the type to register.
        """
        type_clz = load_java_class(type_class_name)
        self._j_execution_config.registerKryoType(type_clz)
        return self
Ejemplo n.º 11
0
    def set_options(self, options_factory_class_name: str):
        """
        Sets ``org.rocksdb.Options`` for the RocksDB instances.
        Because the options are not serializable and hold native code references,
        they must be specified through a factory.

        The options created by the factory here are applied on top of the pre-defined
        options profile selected via :func:`set_predefined_options`.
        If the pre-defined options profile is the default (:data:`PredefinedOptions.DEFAULT`),
        then the factory fully controls the RocksDB options.

        :param options_factory_class_name: The fully-qualified class name of the options
                                           factory in Java that lazily creates the RocksDB options.
                                           The options factory must have a default constructor.
        """
        gateway = get_gateway()
        JOptionsFactory = gateway.jvm.org.apache.flink.contrib.streaming.state.RocksDBOptionsFactory
        j_options_factory_clz = load_java_class(options_factory_class_name)
        if not get_java_class(JOptionsFactory).isAssignableFrom(j_options_factory_clz):
            raise ValueError("The input class does not implement RocksDBOptionsFactory.")
        self._j_rocks_db_state_backend.setRocksDBOptions(j_options_factory_clz.newInstance())