Ejemplo n.º 1
0
 def setUp(self):
     super(PyFlinkOldBatchTableTestCase, self).setUp()
     self.env = ExecutionEnvironment.get_execution_environment()
     self.env.set_parallelism(2)
     self.t_env = BatchTableEnvironment.create(self.env, TableConfig())
     self.t_env.get_config().get_configuration().set_string(
         "python.fn-execution.bundle.size", "1")
Ejemplo n.º 2
0
    def test_table_environment_with_blink_planner(self):
        t_env = BatchTableEnvironment.create(
            environment_settings=EnvironmentSettings.new_instance(
            ).in_batch_mode().use_blink_planner().build())

        source_path = os.path.join(self.tempdir + '/streaming.csv')
        sink_path = os.path.join(self.tempdir + '/results')
        field_names = ["a", "b", "c"]
        field_types = [DataTypes.INT(), DataTypes.STRING(), DataTypes.STRING()]
        data = [(1, 'hi', 'hello'), (2, 'hello', 'hello')]
        csv_source = self.prepare_csv_source(source_path, data, field_types,
                                             field_names)

        t_env.register_table_source("source", csv_source)

        t_env.register_table_sink(
            "sink", CsvTableSink(field_names, field_types, sink_path))
        source = t_env.scan("source")

        result = source.alias("a, b, c").select("1 + a, b, c")

        result.insert_into("sink")

        t_env.execute("blink_test")

        results = []
        for root, dirs, files in os.walk(sink_path):
            for sub_file in files:
                with open(os.path.join(root, sub_file), 'r') as f:
                    line = f.readline()
                    while line is not None and line != '':
                        results.append(line)
                        line = f.readline()

        self.assert_equals(results, ['2,hi,hello\n', '3,hello,hello\n'])
Ejemplo n.º 3
0
 def setUp(self):
     super(PyFlinkBatchTableTestCase, self).setUp()
     self.env = ExecutionEnvironment.get_execution_environment()
     self.env.set_parallelism(2)
     self.t_env = BatchTableEnvironment.create(self.env, TableConfig())
     self.t_env.get_config().get_configuration().set_string(
         "taskmanager.memory.task.off-heap.size", "80mb")
Ejemplo n.º 4
0
 def test_create_table_environment_with_old_planner(self):
     t_env = BatchTableEnvironment.create(
         environment_settings=EnvironmentSettings.new_instance().in_batch_mode()
         .use_old_planner().build())
     self.assertEqual(
         t_env._j_tenv.getClass().getName(),
         "org.apache.flink.table.api.bridge.java.internal.BatchTableEnvironmentImpl")
Ejemplo n.º 5
0
 def setUp(self):
     super(PyFlinkLegacyBlinkBatchTableTestCase, self).setUp()
     self.t_env = BatchTableEnvironment.create(
         environment_settings=EnvironmentSettings.new_instance()
         .in_batch_mode().use_blink_planner().build())
     self.t_env._j_tenv.getPlanner().getExecEnv().setParallelism(2)
     self.t_env.get_config().get_configuration().set_string(
         "python.fn-execution.bundle.size", "1")
Ejemplo n.º 6
0
 def setUp(self):
     super(PyFlinkBlinkBatchTableTestCase, self).setUp()
     self.t_env = BatchTableEnvironment.create(
         environment_settings=EnvironmentSettings.new_instance()
         .in_batch_mode().use_blink_planner().build())
     self.t_env.get_config().get_configuration().set_string(
         "taskmanager.memory.task.off-heap.size", "80mb")
     self.t_env._j_tenv.getPlanner().getExecEnv().setParallelism(2)
Ejemplo n.º 7
0
    def test_create_table_environment_with_blink_planner(self):
        t_env = BatchTableEnvironment.create(
            environment_settings=EnvironmentSettings.new_instance(
            ).in_batch_mode().use_blink_planner().build())

        planner = t_env._j_tenv.getPlanner()

        self.assertEqual(
            planner.getClass().getName(),
            "org.apache.flink.table.planner.delegation.BatchPlanner")
Ejemplo n.º 8
0
    def get_batch_table_environment(self) -> BatchTableEnvironment:
        """
        Get the BatchTableEnvironment. If the BatchTableEnvironment has not been set,
        it initial the BatchTableEnvironment with default Configuration.

        :return: the BatchTableEnvironment.

        .. versionadded:: 1.11.0
        """
        if self._batch_tab_env is None:
            self._batch_tab_env = BatchTableEnvironment.create(
                ExecutionEnvironment.get_execution_environment())
        return self._batch_tab_env
Ejemplo n.º 9
0
    def test_create_table_environment(self):
        table_config = TableConfig()
        table_config.set_max_generated_code_length(32000)
        table_config.set_null_check(False)
        table_config.set_local_timezone("Asia/Shanghai")

        env = ExecutionEnvironment.get_execution_environment()
        t_env = BatchTableEnvironment.create(env, table_config)

        readed_table_config = t_env.get_config()

        self.assertFalse(readed_table_config.get_null_check())
        self.assertEqual(readed_table_config.get_max_generated_code_length(), 32000)
        self.assertEqual(readed_table_config.get_local_timezone(), "Asia/Shanghai")
Ejemplo n.º 10
0
    def get_default() -> Optional[MLEnvironment]:
        """
        Get the MLEnvironment use the default MLEnvironmentId.

        :return: the default MLEnvironment.

        .. versionadded:: 1.11.0
        """
        with MLEnvironmentFactory._lock:
            if MLEnvironmentFactory._map[MLEnvironmentFactory._default_ml_environment_id] is None:
                j_ml_env = get_gateway().\
                    jvm.org.apache.flink.ml.common.MLEnvironmentFactory.getDefault()
                ml_env = MLEnvironment(
                    ExecutionEnvironment(j_ml_env.getExecutionEnvironment()),
                    StreamExecutionEnvironment(j_ml_env.getStreamExecutionEnvironment()),
                    BatchTableEnvironment(j_ml_env.getBatchTableEnvironment()),
                    StreamTableEnvironment(j_ml_env.getStreamTableEnvironment()))
                MLEnvironmentFactory._map[MLEnvironmentFactory._default_ml_environment_id] = ml_env

            return MLEnvironmentFactory._map[MLEnvironmentFactory._default_ml_environment_id]
Ejemplo n.º 11
0
    def test_explain_with_multi_sinks_with_blink_planner(self):
        t_env = BatchTableEnvironment.create(
            environment_settings=EnvironmentSettings.new_instance(
            ).in_batch_mode().use_blink_planner().build())
        source = t_env.from_elements([(1, "Hi", "Hello"),
                                      (2, "Hello", "Hello")], ["a", "b", "c"])
        field_names = ["a", "b", "c"]
        field_types = [
            DataTypes.BIGINT(),
            DataTypes.STRING(),
            DataTypes.STRING()
        ]
        t_env.register_table_sink(
            "sink1", CsvTableSink(field_names, field_types, "path1"))
        t_env.register_table_sink(
            "sink2", CsvTableSink(field_names, field_types, "path2"))

        t_env.sql_update("insert into sink1 select * from %s where a > 100" %
                         source)
        t_env.sql_update("insert into sink2 select * from %s where a < 100" %
                         source)

        actual = t_env.explain(extended=True)
        self.assertIsInstance(actual, (str, unicode))
Ejemplo n.º 12
0
 def setUp(self):
     super(PyFlinkBlinkBatchTableTestCase, self).setUp()
     self.t_env = BatchTableEnvironment.create(
         environment_settings=EnvironmentSettings.new_instance(
         ).in_batch_mode().use_blink_planner().build())
     self.t_env._j_tenv.getPlanner().getExecEnv().setParallelism(2)
Ejemplo n.º 13
0
 def setUp(self):
     super(PyFlinkBatchTableTestCase, self).setUp()
     self.env = ExecutionEnvironment.get_execution_environment()
     self.env.set_parallelism(2)
     self.t_env = BatchTableEnvironment.create(self.env, TableConfig())