Beispiel #1
0
 def test_mixed_with_built_in_functions_without_retract(self):
     self.t_env.get_config().set("parallelism.default", "1")
     self.t_env.create_temporary_system_function("concat",
                                                 ConcatAggregateFunction())
     t = self.t_env.from_elements([('Hi', 2), ('Hi', 4), (None, None),
                                   ('hello2', 8), ('hello', 10)],
                                  ['b', 'c'])
     self.t_env.create_temporary_view("source", t)
     result_table = self.t_env.sql_query("select concat(b, ',') as a, "
                                         "FIRST_VALUE(b) as b, "
                                         "LAST_VALUE(b) as c, "
                                         "COUNT(c) as d, "
                                         "COUNT(1) as e, "
                                         "LISTAGG(b) as f,"
                                         "LISTAGG(b, '|') as g,"
                                         "MAX(c) as h,"
                                         "MAX(cast(c as float) + 1) as i,"
                                         "MIN(c) as j,"
                                         "MIN(cast(c as decimal) + 1) as k,"
                                         "SUM(c) as l,"
                                         "SUM(cast(c as float) + 1) as m "
                                         "from source")
     result = [i for i in result_table.execute().collect()]
     expected = Row('Hi,Hi,hello,hello2', 'Hi', 'hello', 4, 5,
                    'Hi,Hi,hello2,hello', 'Hi|Hi|hello2|hello', 10, 11.0, 2,
                    Decimal(3.0), 24, 28.0)
     expected.set_row_kind(RowKind.UPDATE_AFTER)
     self.assertEqual(result[len(result) - 1], expected)
Beispiel #2
0
 def decode_from_stream(self, in_stream, nested):
     row_kind_value, fields = self._decode_one_row_from_stream(
         in_stream, nested)
     row = Row(*fields)
     row.set_field_names(self.field_names)
     row.set_row_kind(RowKind(row_kind_value))
     return row
Beispiel #3
0
class RowDataConverter(DataConverter):
    def __init__(self, field_data_converters: List[DataConverter],
                 field_names: List[str]):
        self._field_data_converters = field_data_converters
        self._reuse_row = Row()
        self._reuse_external_row_data = [
            None for _ in range(len(field_data_converters))
        ]
        self._reuse_external_row = [None, self._reuse_external_row_data]
        self._reuse_row.set_field_names(field_names)

    def to_internal(self, value) -> IN:
        if value is None:
            return None

        self._reuse_row._values = [
            self._field_data_converters[i].to_internal(item)
            for i, item in enumerate(value[1])
        ]
        self._reuse_row.set_row_kind(RowKind(value[0]))

        return self._reuse_row

    def to_external(self, value: Row) -> OUT:
        if value is None:
            return None

        self._reuse_external_row[0] = value.get_row_kind().value
        values = value._values
        for i in range(len(values)):
            self._reuse_external_row_data[i] = self._field_data_converters[
                i].to_external(values[i])
        return self._reuse_external_row
Beispiel #4
0
 def test_mixed_with_built_in_functions_with_retract(self):
     self.env.set_parallelism(1)
     self.t_env.create_temporary_system_function(
         "concat",
         ConcatAggregateFunction())
     t = self.t_env.from_elements(
         [(1, 'Hi_', 1),
          (1, 'Hi', 2),
          (2, 'Hi_', 3),
          (2, 'Hi', 4),
          (3, None, None),
          (3, None, None),
          (4, 'hello2_', 7),
          (4, 'hello2', 8),
          (5, 'hello_', 9),
          (5, 'hello', 10)], ['a', 'b', 'c'])
     self.t_env.create_temporary_view("source", t)
     table_with_retract_message = self.t_env.sql_query(
         "select a, LAST_VALUE(b) as b, LAST_VALUE(c) as c from source group by a")
     self.t_env.create_temporary_view("retract_table", table_with_retract_message)
     result_table = self.t_env.sql_query(
         "select concat(b, ',') as a, "
         "FIRST_VALUE(b) as b"
         " from retract_table")
     result = [i for i in result_table.execute().collect()]
     expected = Row('Hi,Hi,hello,hello2', 'Hi')
     expected.set_row_kind(RowKind.UPDATE_AFTER)
     self.assertEqual(result[len(result) - 1], expected)
Beispiel #5
0
    def to_internal(self, value) -> IN:
        if value is None:
            return None

        row = Row()
        row._values = [self._field_data_converters[i].to_internal(item)
                       for i, item in enumerate(value[1])]
        row.set_field_names(self._field_names)
        row.set_row_kind(RowKind(value[0]))

        return row
Beispiel #6
0
 def test_cython_row_coder(self):
     from pyflink.common import Row, RowKind
     field_count = 15
     field_names = ['f{}'.format(i) for i in range(field_count)]
     row = Row(**{
         field_names[i]: None if i % 2 == 0 else i
         for i in range(field_count)
     })
     data = [row]
     python_field_coders = [
         coder_impl.RowCoderImpl(
             [coder_impl.BigIntCoderImpl() for _ in range(field_count)],
             row._fields)
     ]
     cython_field_coders = [
         coder_impl_fast.RowCoderImpl([
             coder_impl_fast.BigIntCoderImpl() for _ in range(field_count)
         ], row._fields)
     ]
     row.set_row_kind(RowKind.INSERT)
     self.check_cython_coder(python_field_coders, cython_field_coders, data)
     row.set_row_kind(RowKind.UPDATE_BEFORE)
     self.check_cython_coder(python_field_coders, cython_field_coders, data)
     row.set_row_kind(RowKind.UPDATE_AFTER)
     self.check_cython_coder(python_field_coders, cython_field_coders, data)
     row.set_row_kind(RowKind.DELETE)
     self.check_cython_coder(python_field_coders, cython_field_coders, data)
Beispiel #7
0
 def test_cython_row_coder(self):
     from pyflink.common import Row, RowKind
     field_count = 2
     row = Row(*[None if i % 2 == 0 else i for i in range(field_count)])
     data = [row]
     python_field_coders = [
         coder_impl.RowCoderImpl(
             [coder_impl.BigIntCoderImpl() for _ in range(field_count)])
     ]
     cython_field_coders = [
         coder_impl_fast.RowCoderImpl([
             coder_impl_fast.BigIntCoderImpl() for _ in range(field_count)
         ])
     ]
     row.set_row_kind(RowKind.INSERT)
     self.check_cython_coder(python_field_coders, cython_field_coders,
                             [data])
     row.set_row_kind(RowKind.UPDATE_BEFORE)
     self.check_cython_coder(python_field_coders, cython_field_coders,
                             [data])
     row.set_row_kind(RowKind.UPDATE_AFTER)
     self.check_cython_coder(python_field_coders, cython_field_coders,
                             [data])
     row.set_row_kind(RowKind.DELETE)
     self.check_cython_coder(python_field_coders, cython_field_coders,
                             [data])
Beispiel #8
0
    def decode_from_stream(self, in_stream, length=0) -> Row:
        row_kind_and_null_mask = self._mask_utils.read_mask(in_stream)
        fields = [None if row_kind_and_null_mask[idx + ROW_KIND_BIT_SIZE] else
                  self._field_coders[idx].decode_from_stream(in_stream)
                  for idx in range(0, self._field_count)]

        # compute the row_kind value
        row_kind_value = 0
        for i in range(ROW_KIND_BIT_SIZE):
            row_kind_value += int(row_kind_and_null_mask[i]) * 2 ** i

        row = Row(*fields)
        row.set_field_names(self._field_names)
        row.set_row_kind(RowKind(row_kind_value))
        return row
Beispiel #9
0
 def test_mixed_with_built_in_functions_with_retract(self):
     self.t_env.get_config().get_configuration().set_string("parallelism.default", "1")
     self.t_env.create_temporary_system_function(
         "concat",
         ConcatAggregateFunction())
     t = self.t_env.from_elements(
         [(1, 'Hi_', 1),
          (1, 'Hi', 2),
          (2, 'Hi_', 3),
          (2, 'Hi', 4),
          (3, None, None),
          (3, None, None),
          (4, 'hello2_', 7),
          (4, 'hello2', 8),
          (5, 'hello_', 9),
          (5, 'hello', 10)], ['a', 'b', 'c'])
     self.t_env.create_temporary_view("source", t)
     table_with_retract_message = self.t_env.sql_query(
         "select a, LAST_VALUE(b) as b, LAST_VALUE(c) as c from source group by a")
     self.t_env.create_temporary_view("retract_table", table_with_retract_message)
     result_table = self.t_env.sql_query(
         "select concat(b, ',') as a, "
         "FIRST_VALUE(b) as b, "
         "LAST_VALUE(b) as c, "
         "COUNT(c) as d, "
         "COUNT(1) as e, "
         "LISTAGG(b) as f,"
         "LISTAGG(b, '|') as g,"
         "MAX(c) as h,"
         "MAX(cast(c as float) + 1) as i,"
         "MIN(c) as j,"
         "MIN(cast(c as decimal) + 1) as k,"
         "SUM(c) as l,"
         "SUM(cast(c as float) + 1) as m,"
         "AVG(c) as n,"
         "AVG(cast(c as double) + 1) as o,"
         "STDDEV_POP(cast(c as float)),"
         "STDDEV_SAMP(cast(c as float)),"
         "VAR_POP(cast(c as float)),"
         "VAR_SAMP(cast(c as float))"
         " from retract_table")
     result = [i for i in result_table.execute().collect()]
     expected = Row('Hi,Hi,hello,hello2', 'Hi', 'hello', 4, 5, 'Hi,Hi,hello2,hello',
                    'Hi|Hi|hello2|hello', 10, 11.0, 2, Decimal(3.0), 24, 28.0, 6, 7.0,
                    3.1622777, 3.6514838, 10.0, 13.333333)
     expected.set_row_kind(RowKind.UPDATE_AFTER)
     self.assertEqual(result[len(result) - 1], expected)
Beispiel #10
0
 def __next__(self):
     if not self._j_closeable_iterator.hasNext():
         raise StopIteration("No more data.")
     gateway = get_gateway()
     pickle_bytes = gateway.jvm.PythonBridgeUtils. \
         getPickledBytesFromRow(self._j_closeable_iterator.next(),
                                self._j_field_data_types)
     row_kind = RowKind(int.from_bytes(pickle_bytes[0], byteorder='big', signed=False))
     pickle_bytes = list(pickle_bytes[1:])
     field_data = zip(pickle_bytes, self._data_types)
     fields = []
     for data, field_type in field_data:
         if len(data) == 0:
             fields.append(None)
         else:
             fields.append(pickled_bytes_to_python_converter(data, field_type))
     result_row = Row(*fields)
     result_row.set_row_kind(row_kind)
     return result_row
Beispiel #11
0
 def test_row_coder(self):
     from pyflink.common import Row, RowKind
     field_coder = BigIntCoder()
     field_count = 10
     coder = RowCoder([field_coder for _ in range(field_count)])
     v = Row(*[None if i % 2 == 0 else i for i in range(field_count)])
     v.set_row_kind(RowKind.INSERT)
     self.check_coder(coder, v)
     v.set_row_kind(RowKind.UPDATE_BEFORE)
     self.check_coder(coder, v)
     v.set_row_kind(RowKind.UPDATE_AFTER)
     self.check_coder(coder, v)
     v.set_row_kind(RowKind.DELETE)
     self.check_coder(coder, v)
Beispiel #12
0
 def test_row_coder(self):
     from pyflink.common import Row, RowKind
     field_coder = BigIntCoder()
     field_count = 10
     field_names = ['f{}'.format(i) for i in range(field_count)]
     coder = RowCoder([field_coder for _ in range(field_count)], field_names)
     v = Row(**{field_names[i]: None if i % 2 == 0 else i for i in range(field_count)})
     v.set_row_kind(RowKind.INSERT)
     self.check_coder(coder, v)
     v.set_row_kind(RowKind.UPDATE_BEFORE)
     self.check_coder(coder, v)
     v.set_row_kind(RowKind.UPDATE_AFTER)
     self.check_coder(coder, v)
     v.set_row_kind(RowKind.DELETE)
     self.check_coder(coder, v)