예제 #1
0
    def test_pipeline_from_and_to_java_json(self):
        # json generated from Java api
        java_json = '[{"stageClassName":"org.apache.flink.ml.pipeline.' \
                    'UserDefinedPipelineStages$SelectColumnTransformer",' \
                    '"stageJson":"{\\"selectedCols\\":\\"[\\\\\\"a\\\\\\",' \
                    '\\\\\\"b\\\\\\"]\\"}"}]'

        # load json
        p = Pipeline()
        p.load_json(java_json)
        python_json = p.to_json()

        t_env = MLEnvironmentFactory().get_default(
        ).get_stream_table_environment()

        table_sink = source_sink_utils.TestAppendSink(
            ['a', 'b'],
            [DataTypes.BIGINT(), DataTypes.BIGINT()])
        t_env.register_table_sink("TestJsonResults", table_sink)

        source_table = t_env.from_elements([(1, 2, 3, 4), (4, 3, 2, 1)],
                                           ['a', 'b', 'c', 'd'])
        transformer = p.get_stages()[0]
        exec_insert_table(transformer.transform(t_env, source_table),
                          "TestJsonResults")

        actual = source_sink_utils.results()

        self.assert_equals(actual, ["1,2", "4,3"])
        self.assertEqual(python_json, java_json)
예제 #2
0
 def test_fit_table(self):
     id = MLEnvironmentFactory.get_new_ml_environment_id()
     env = MLEnvironmentFactory.get(id)
     table = env.get_stream_table_environment().from_elements([(1, 2, 3)])
     estimator = self.create_pipeline_stage()
     estimator.fit(env.get_stream_table_environment(), table)
     self.assertTrue(estimator.fitted)
    def test_java_transformer(self):
        t_env = MLEnvironmentFactory().get_default().get_stream_table_environment()

        table_sink = source_sink_utils.TestAppendSink(
            ['a', 'b'], [DataTypes.BIGINT(), DataTypes.BIGINT()])
        t_env.register_table_sink("TransformerResults", table_sink)

        source_table = t_env.from_elements([(1, 2, 3, 4), (4, 3, 2, 1)], ['a', 'b', 'c', 'd'])
        transformer = WrapperTransformer(selected_cols=["a", "b"])
        transformer.transform(t_env, source_table).execute_insert("TransformerResults").wait()
        actual = source_sink_utils.results()
        self.assert_equals(actual, ["1,2", "4,3"])
예제 #4
0
    def test_pipeline(self):
        t_env = MLEnvironmentFactory().get_default(
        ).get_stream_table_environment()
        train_table = t_env.from_elements([(1, 2), (1, 4), (1, 0), (10, 2),
                                           (10, 4), (10, 0)], ['a', 'b'])
        serving_table = t_env.from_elements([(0, 0), (12, 3)], ['a', 'b'])

        table_sink = source_sink_utils.TestAppendSink(['predict_result'],
                                                      [DataTypes.BOOLEAN()])
        t_env.register_table_sink("PredictResults", table_sink)

        # transformer, output features column which is the sum of a and b.
        transformer = PythonAddTransformer(selected_cols=["a", "b"],
                                           output_col="features")

        # estimator
        estimator = PythonEstimator()\
            .set_vector_col("features")\
            .set_prediction_col("predict_result")

        # pipeline
        pipeline = Pipeline().append_stage(transformer).append_stage(estimator)
        pipeline\
            .fit(t_env, train_table)\
            .transform(t_env, serving_table)\
            .insert_into('PredictResults')
        # execute
        t_env.execute('PipelineITCase')

        actual = source_sink_utils.results()
        # the first input is false since 0 + 0 is smaller than the max_sum 14.
        # the second input is true since 12 + 3 is bigger than the max_sum 14.
        self.assert_equals(actual, ["false", "true"])