Beispiel #1
0
    def test_basic_array_type_info(self):
        ds = self.env.from_collection(
            [(1, [1.1, None, 1.30], [None, 'hi', 'flink']),
             (2, [None, 2.2, 2.3], ['hello', None, 'flink']),
             (3, [3.1, 3.2, None], ['hello', 'hi', None])],
            type_info=Types.ROW([
                Types.INT(),
                Types.BASIC_ARRAY(Types.FLOAT()),
                Types.BASIC_ARRAY(Types.STRING())
            ]))

        ds.map(lambda x: x, output_type=Types.ROW([Types.INT(),
                                                   Types.BASIC_ARRAY(Types.FLOAT()),
                                                   Types.BASIC_ARRAY(Types.STRING())]))\
            .add_sink(self.test_sink)
        self.env.execute("test basic array type info")
        results = self.test_sink.get_results()
        expected = [
            '1,[1.1, null, 1.3],[null, hi, flink]',
            '2,[null, 2.2, 2.3],[hello, null, flink]',
            '3,[3.1, 3.2, null],[hello, hi, null]'
        ]
        results.sort()
        expected.sort()
        self.assertEqual(expected, results)
    def test_from_collection_with_data_types(self):
        # verify from_collection for the collection with single object.
        ds = self.env.from_collection(['Hi', 'Hello'], type_info=Types.STRING())
        ds.add_sink(self.test_sink)
        self.env.execute("test from collection with single object")
        results = self.test_sink.get_results(False)
        expected = ['Hello', 'Hi']
        results.sort()
        expected.sort()
        self.assertEqual(expected, results)

        # verify from_collection for the collection with multiple objects like tuple.
        ds = self.env.from_collection([(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932,
                                        bytearray(b'flink'), 'pyflink', datetime.date(2014, 9, 13),
                                        datetime.time(hour=12, minute=0, second=0,
                                                      microsecond=123000),
                                        datetime.datetime(2018, 3, 11, 3, 0, 0, 123000), [1, 2, 3],
                                        decimal.Decimal('1000000000000000000.05'),
                                        decimal.Decimal('1000000000000000000.0599999999999'
                                                        '9999899999999999')),
                                       (2, None, 2, True, 43878, 9147483648, 9.87, 2.98936,
                                        bytearray(b'flink'), 'pyflink', datetime.date(2015, 10, 14),
                                        datetime.time(hour=11, minute=2, second=2,
                                                      microsecond=234500),
                                        datetime.datetime(2020, 4, 15, 8, 2, 6, 235000), [2, 4, 6],
                                        decimal.Decimal('2000000000000000000.74'),
                                        decimal.Decimal('2000000000000000000.061111111111111'
                                                        '11111111111111'))],
                                      type_info=Types.ROW(
                                          [Types.LONG(), Types.LONG(), Types.SHORT(),
                                           Types.BOOLEAN(), Types.SHORT(), Types.INT(),
                                           Types.FLOAT(), Types.DOUBLE(),
                                           Types.PICKLED_BYTE_ARRAY(),
                                           Types.STRING(), Types.SQL_DATE(), Types.SQL_TIME(),
                                           Types.SQL_TIMESTAMP(),
                                           Types.BASIC_ARRAY(Types.LONG()), Types.BIG_DEC(),
                                           Types.BIG_DEC()]))
        ds.add_sink(self.test_sink)
        self.env.execute("test from collection with tuple object")
        results = self.test_sink.get_results(False)
        # if user specifies data types of input data, the collected result should be in row format.
        expected = [
            '+I[1, null, 1, true, 32767, -2147483648, 1.23, 1.98932, [102, 108, 105, 110, 107], '
            'pyflink, 2014-09-13, 12:00:00, 2018-03-11 03:00:00.123, [1, 2, 3], '
            '1000000000000000000.05, 1000000000000000000.05999999999999999899999999999]',
            '+I[2, null, 2, true, -21658, 557549056, 9.87, 2.98936, [102, 108, 105, 110, 107], '
            'pyflink, 2015-10-14, 11:02:02, 2020-04-15 08:02:06.235, [2, 4, 6], '
            '2000000000000000000.74, 2000000000000000000.06111111111111111111111111111]']
        results.sort()
        expected.sort()
        self.assertEqual(expected, results)