def test_map_function_without_data_types(self):
     self.env.set_parallelism(1)
     ds = self.env.from_collection([('ab', decimal.Decimal(1)),
                                    ('bdc', decimal.Decimal(2)),
                                    ('cfgs', decimal.Decimal(3)),
                                    ('deeefg', decimal.Decimal(4))],
                                   type_info=Types.ROW(
                                       [Types.STRING(),
                                        Types.BIG_DEC()]))
     ds.map(MyMapFunction()).add_sink(self.test_sink)
     self.env.execute('map_function_test')
     results = self.test_sink.get_results(True)
     expected = [
         "<Row('ab', 2, Decimal('1'))>", "<Row('bdc', 3, Decimal('2'))>",
         "<Row('cfgs', 4, Decimal('3'))>",
         "<Row('deeefg', 6, Decimal('4'))>"
     ]
     expected.sort()
     results.sort()
     self.assertEqual(expected, results)
Beispiel #2
0
 def test_map_function_without_data_types(self):
     self.env.set_parallelism(1)
     ds = self.env.from_collection([('ab', decimal.Decimal(1)),
                                    ('bdc', decimal.Decimal(2)),
                                    ('cfgs', decimal.Decimal(3)),
                                    ('deeefg', decimal.Decimal(4))],
                                   type_info=Types.ROW(
                                       [Types.STRING(),
                                        Types.BIG_DEC()]))
     mapped_stream = ds.map(MyMapFunction())
     collect_util = DataStreamCollectUtil()
     collect_util.collect(mapped_stream)
     self.env.execute('map_function_test')
     results = collect_util.results()
     expected = [
         "('ab', 2, Decimal('1'))", "('bdc', 3, Decimal('2'))",
         "('cfgs', 4, Decimal('3'))", "('deeefg', 6, Decimal('4'))"
     ]
     expected.sort()
     results.sort()
     self.assertEqual(expected, results)
Beispiel #3
0
    def test_from_java_type(self):
        basic_int_type_info = Types.INT()
        self.assertEqual(basic_int_type_info,
                         _from_java_type(basic_int_type_info.get_java_type_info()))

        basic_short_type_info = Types.SHORT()
        self.assertEqual(basic_short_type_info,
                         _from_java_type(basic_short_type_info.get_java_type_info()))

        basic_long_type_info = Types.LONG()
        self.assertEqual(basic_long_type_info,
                         _from_java_type(basic_long_type_info.get_java_type_info()))

        basic_float_type_info = Types.FLOAT()
        self.assertEqual(basic_float_type_info,
                         _from_java_type(basic_float_type_info.get_java_type_info()))

        basic_double_type_info = Types.DOUBLE()
        self.assertEqual(basic_double_type_info,
                         _from_java_type(basic_double_type_info.get_java_type_info()))

        basic_char_type_info = Types.CHAR()
        self.assertEqual(basic_char_type_info,
                         _from_java_type(basic_char_type_info.get_java_type_info()))

        basic_byte_type_info = Types.BYTE()
        self.assertEqual(basic_byte_type_info,
                         _from_java_type(basic_byte_type_info.get_java_type_info()))

        basic_big_int_type_info = Types.BIG_INT()
        self.assertEqual(basic_big_int_type_info,
                         _from_java_type(basic_big_int_type_info.get_java_type_info()))

        basic_big_dec_type_info = Types.BIG_DEC()
        self.assertEqual(basic_big_dec_type_info,
                         _from_java_type(basic_big_dec_type_info.get_java_type_info()))

        basic_sql_date_type_info = Types.SQL_DATE()
        self.assertEqual(basic_sql_date_type_info,
                         _from_java_type(basic_sql_date_type_info.get_java_type_info()))

        basic_sql_time_type_info = Types.SQL_TIME()
        self.assertEqual(basic_sql_time_type_info,
                         _from_java_type(basic_sql_time_type_info.get_java_type_info()))

        basic_sql_timestamp_type_info = Types.SQL_TIMESTAMP()
        self.assertEqual(basic_sql_timestamp_type_info,
                         _from_java_type(basic_sql_timestamp_type_info.get_java_type_info()))

        row_type_info = Types.ROW([Types.INT(), Types.STRING()])
        self.assertEqual(row_type_info, _from_java_type(row_type_info.get_java_type_info()))

        tuple_type_info = Types.TUPLE([Types.CHAR(), Types.INT()])
        self.assertEqual(tuple_type_info, _from_java_type(tuple_type_info.get_java_type_info()))

        primitive_int_array_type_info = Types.PRIMITIVE_ARRAY(Types.INT())
        self.assertEqual(primitive_int_array_type_info,
                         _from_java_type(primitive_int_array_type_info.get_java_type_info()))

        object_array_type_info = Types.OBJECT_ARRAY(Types.SQL_DATE())
        self.assertEqual(object_array_type_info,
                         _from_java_type(object_array_type_info.get_java_type_info()))

        pickled_byte_array_type_info = Types.PICKLED_BYTE_ARRAY()
        self.assertEqual(pickled_byte_array_type_info,
                         _from_java_type(pickled_byte_array_type_info.get_java_type_info()))

        sql_date_type_info = Types.SQL_DATE()
        self.assertEqual(sql_date_type_info,
                         _from_java_type(sql_date_type_info.get_java_type_info()))

        map_type_info = Types.MAP(Types.INT(), Types.STRING())
        self.assertEqual(map_type_info,
                         _from_java_type(map_type_info.get_java_type_info()))

        list_type_info = Types.LIST(Types.INT())
        self.assertEqual(list_type_info,
                         _from_java_type(list_type_info.get_java_type_info()))
    def test_from_collection_with_data_types(self):
        # verify from_collection for the collection with single object.
        ds = self.env.from_collection(['Hi', 'Hello'],
                                      type_info=Types.STRING())
        ds.add_sink(self.test_sink)
        self.env.execute("test from collection with single object")
        results = self.test_sink.get_results(False)
        expected = ['Hello', 'Hi']
        results.sort()
        expected.sort()
        self.assertEqual(expected, results)

        # verify from_collection for the collection with multiple objects like tuple.
        ds = self.env.from_collection(
            [(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932,
              bytearray(b'flink'), 'pyflink', datetime.date(2014, 9, 13),
              datetime.time(hour=12, minute=0, second=0, microsecond=123000),
              datetime.datetime(2018, 3, 11, 3, 0, 0, 123000), [1, 2, 3],
              decimal.Decimal('1000000000000000000.05'),
              decimal.Decimal('1000000000000000000.0599999999999'
                              '9999899999999999')),
             (2, None, 2, True, 43878, 9147483648, 9.87, 2.98936,
              bytearray(b'flink'), 'pyflink', datetime.date(2015, 10, 14),
              datetime.time(hour=11, minute=2, second=2, microsecond=234500),
              datetime.datetime(2020, 4, 15, 8, 2, 6, 235000), [2, 4, 6],
              decimal.Decimal('2000000000000000000.74'),
              decimal.Decimal('2000000000000000000.061111111111111'
                              '11111111111111'))],
            type_info=Types.ROW([
                Types.LONG(),
                Types.LONG(),
                Types.SHORT(),
                Types.BOOLEAN(),
                Types.SHORT(),
                Types.INT(),
                Types.FLOAT(),
                Types.DOUBLE(),
                Types.PICKLED_BYTE_ARRAY(),
                Types.STRING(),
                Types.SQL_DATE(),
                Types.SQL_TIME(),
                Types.SQL_TIMESTAMP(),
                Types.BASIC_ARRAY(Types.LONG()),
                Types.BIG_DEC(),
                Types.BIG_DEC()
            ]))
        ds.add_sink(self.test_sink)
        self.env.execute("test from collection with tuple object")
        results = self.test_sink.get_results(False)
        # if user specifies data types of input data, the collected result should be in row format.
        expected = [
            '+I[1, null, 1, true, 32767, -2147483648, 1.23, 1.98932, [102, 108, 105, 110, 107], '
            'pyflink, 2014-09-13, 12:00:00, 2018-03-11 03:00:00.123, [1, 2, 3], '
            '1000000000000000000.05, 1000000000000000000.05999999999999999899999999999]',
            '+I[2, null, 2, true, -21658, 557549056, 9.87, 2.98936, [102, 108, 105, 110, 107], '
            'pyflink, 2015-10-14, 11:02:02, 2020-04-15 08:02:06.235, [2, 4, 6], '
            '2000000000000000000.74, 2000000000000000000.06111111111111111111111111111]'
        ]
        results.sort()
        expected.sort()
        self.assertEqual(expected, results)