Beispiel #1
0
    def test_basic_array_type_info(self):
        ds = self.env.from_collection(
            [(1, [1.1, None, 1.30], [None, 'hi', 'flink']),
             (2, [None, 2.2, 2.3], ['hello', None, 'flink']),
             (3, [3.1, 3.2, None], ['hello', 'hi', None])],
            type_info=Types.ROW([
                Types.INT(),
                Types.BASIC_ARRAY(Types.FLOAT()),
                Types.BASIC_ARRAY(Types.STRING())
            ]))

        ds.map(lambda x: x, output_type=Types.ROW([Types.INT(),
                                                   Types.BASIC_ARRAY(Types.FLOAT()),
                                                   Types.BASIC_ARRAY(Types.STRING())]))\
            .add_sink(self.test_sink)
        self.env.execute("test basic array type info")
        results = self.test_sink.get_results()
        expected = [
            '1,[1.1, null, 1.3],[null, hi, flink]',
            '2,[null, 2.2, 2.3],[hello, null, flink]',
            '3,[3.1, 3.2, null],[hello, hi, null]'
        ]
        results.sort()
        expected.sort()
        self.assertEqual(expected, results)
Beispiel #2
0
    def test_primitive_array_type_info(self):
        ds = self.env.from_collection([(1, [1.1, 1.2, 1.30]), (2, [2.1, 2.2, 2.3]),
                                      (3, [3.1, 3.2, 3.3])],
                                      type_info=Types.ROW([Types.INT(),
                                                           Types.PRIMITIVE_ARRAY(Types.FLOAT())]))

        ds.map(lambda x: x, output_type=Types.ROW([Types.INT(),
                                                   Types.PRIMITIVE_ARRAY(Types.FLOAT())]))\
            .add_sink(self.test_sink)
        self.env.execute("test primitive array type info")
        results = self.test_sink.get_results()
        expected = ['1,[1.1, 1.2, 1.3]', '2,[2.1, 2.2, 2.3]', '3,[3.1, 3.2, 3.3]']
        results.sort()
        expected.sort()
        self.assertEqual(expected, results)
Beispiel #3
0
def event_timer_timer_demo():
    env = StreamExecutionEnvironment.get_execution_environment()

    ds = env.from_collection(
        collection=[
            (1000, 'Alice', 110.1),
            (4000, 'Bob', 30.2),
            (3000, 'Alice', 20.0),
            (2000, 'Bob', 53.1),
            (5000, 'Alice', 13.1),
            (3000, 'Bob', 3.1),
            (7000, 'Bob', 16.1),
            (10000, 'Alice', 20.1)
        ],
        type_info=Types.TUPLE([Types.LONG(), Types.STRING(), Types.FLOAT()]))

    ds = ds.assign_timestamps_and_watermarks(
        WatermarkStrategy.for_bounded_out_of_orderness(Duration.of_seconds(2))
                         .with_timestamp_assigner(MyTimestampAssigner()))

    # apply the process function onto a keyed stream
    ds.key_by(lambda value: value[1]) \
      .process(Sum()) \
      .print()

    # submit for execution
    env.execute()
Beispiel #4
0
def pickled_bytes_to_python_converter(data, field_type):
    if isinstance(field_type, RowTypeInfo):
        data = zip(list(data[1:]), field_type.get_field_types())
        fields = []
        for d, d_type in data:
            fields.append(pickled_bytes_to_python_converter(d, d_type))
        return tuple(fields)
    else:
        data = pickle.loads(data)
        if field_type == Types.SQL_TIME():
            seconds, microseconds = divmod(data, 10**6)
            minutes, seconds = divmod(seconds, 60)
            hours, minutes = divmod(minutes, 60)
            return datetime.time(hours, minutes, seconds, microseconds)
        elif field_type == Types.SQL_DATE():
            return field_type.from_internal_type(data)
        elif field_type == Types.SQL_TIMESTAMP():
            return field_type.from_internal_type(int(data.timestamp() * 10**6))
        elif field_type == Types.FLOAT():
            return field_type.from_internal_type(ast.literal_eval(data))
        elif is_basic_array_type_info(
                field_type) or is_primitive_array_type_info(field_type):
            element_type = typeinfo._from_java_type(
                field_type.get_java_type_info().getComponentInfo())
            elements = []
            for element_bytes in data:
                elements.append(
                    pickled_bytes_to_python_converter(element_bytes,
                                                      element_type))
            return elements
        else:
            return field_type.from_internal_type(data)
Beispiel #5
0
 def open(self, runtime_context: RuntimeContext):
     state_descriptor = ValueStateDescriptor("state", Types.FLOAT())
     state_ttl_config = StateTtlConfig \
         .new_builder(Time.seconds(1)) \
         .set_update_type(StateTtlConfig.UpdateType.OnReadAndWrite) \
         .disable_cleanup_in_background() \
         .build()
     state_descriptor.enable_time_to_live(state_ttl_config)
     self.state = runtime_context.get_state(state_descriptor)
    def test_from_collection_with_data_types(self):
        # verify from_collection for the collection with single object.
        ds = self.env.from_collection(['Hi', 'Hello'], type_info=Types.STRING())
        ds.add_sink(self.test_sink)
        self.env.execute("test from collection with single object")
        results = self.test_sink.get_results(False)
        expected = ['Hello', 'Hi']
        results.sort()
        expected.sort()
        self.assertEqual(expected, results)

        # verify from_collection for the collection with multiple objects like tuple.
        ds = self.env.from_collection([(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932,
                                        bytearray(b'flink'), 'pyflink', datetime.date(2014, 9, 13),
                                        datetime.time(hour=12, minute=0, second=0,
                                                      microsecond=123000),
                                        datetime.datetime(2018, 3, 11, 3, 0, 0, 123000), [1, 2, 3],
                                        decimal.Decimal('1000000000000000000.05'),
                                        decimal.Decimal('1000000000000000000.0599999999999'
                                                        '9999899999999999')),
                                       (2, None, 2, True, 43878, 9147483648, 9.87, 2.98936,
                                        bytearray(b'flink'), 'pyflink', datetime.date(2015, 10, 14),
                                        datetime.time(hour=11, minute=2, second=2,
                                                      microsecond=234500),
                                        datetime.datetime(2020, 4, 15, 8, 2, 6, 235000), [2, 4, 6],
                                        decimal.Decimal('2000000000000000000.74'),
                                        decimal.Decimal('2000000000000000000.061111111111111'
                                                        '11111111111111'))],
                                      type_info=Types.ROW(
                                          [Types.LONG(), Types.LONG(), Types.SHORT(),
                                           Types.BOOLEAN(), Types.SHORT(), Types.INT(),
                                           Types.FLOAT(), Types.DOUBLE(),
                                           Types.PICKLED_BYTE_ARRAY(),
                                           Types.STRING(), Types.SQL_DATE(), Types.SQL_TIME(),
                                           Types.SQL_TIMESTAMP(),
                                           Types.BASIC_ARRAY(Types.LONG()), Types.BIG_DEC(),
                                           Types.BIG_DEC()]))
        ds.add_sink(self.test_sink)
        self.env.execute("test from collection with tuple object")
        results = self.test_sink.get_results(False)
        # if user specifies data types of input data, the collected result should be in row format.
        expected = [
            '+I[1, null, 1, true, 32767, -2147483648, 1.23, 1.98932, [102, 108, 105, 110, 107], '
            'pyflink, 2014-09-13, 12:00:00, 2018-03-11 03:00:00.123, [1, 2, 3], '
            '1000000000000000000.05, 1000000000000000000.05999999999999999899999999999]',
            '+I[2, null, 2, true, -21658, 557549056, 9.87, 2.98936, [102, 108, 105, 110, 107], '
            'pyflink, 2015-10-14, 11:02:02, 2020-04-15 08:02:06.235, [2, 4, 6], '
            '2000000000000000000.74, 2000000000000000000.06111111111111111111111111111]']
        results.sort()
        expected.sort()
        self.assertEqual(expected, results)
Beispiel #7
0
def pickled_bytes_to_python_converter(data, field_type):
    if isinstance(field_type, RowTypeInfo):
        row_kind = RowKind(int.from_bytes(data[0], 'little'))
        data = zip(list(data[1:]), field_type.get_field_types())
        fields = []
        for d, d_type in data:
            fields.append(pickled_bytes_to_python_converter(d, d_type))
        row = Row.of_kind(row_kind, *fields)
        return row
    else:
        data = pickle.loads(data)
        if field_type == Types.SQL_TIME():
            seconds, microseconds = divmod(data, 10**6)
            minutes, seconds = divmod(seconds, 60)
            hours, minutes = divmod(minutes, 60)
            return datetime.time(hours, minutes, seconds, microseconds)
        elif field_type == Types.SQL_DATE():
            return field_type.from_internal_type(data)
        elif field_type == Types.SQL_TIMESTAMP():
            return field_type.from_internal_type(int(data.timestamp() * 10**6))
        elif field_type == Types.FLOAT():
            return field_type.from_internal_type(ast.literal_eval(data))
        elif isinstance(
                field_type,
            (BasicArrayTypeInfo, PrimitiveArrayTypeInfo, ObjectArrayTypeInfo)):
            element_type = field_type._element_type
            elements = []
            for element_bytes in data:
                elements.append(
                    pickled_bytes_to_python_converter(element_bytes,
                                                      element_type))
            return elements
        elif isinstance(field_type, MapTypeInfo):
            key_type = field_type._key_type_info
            value_type = field_type._value_type_info
            zip_kv = zip(data[0], data[1])
            return dict((pickled_bytes_to_python_converter(k, key_type),
                         pickled_bytes_to_python_converter(v, value_type))
                        for k, v in zip_kv)
        elif isinstance(field_type, ListTypeInfo):
            element_type = field_type.elem_type
            elements = []
            for element_bytes in data:
                elements.append(
                    pickled_bytes_to_python_converter(element_bytes,
                                                      element_type))
            return elements
        else:
            return field_type.from_internal_type(data)
Beispiel #8
0
    def test_from_java_type(self):
        basic_int_type_info = Types.INT()
        self.assertEqual(basic_int_type_info,
                         _from_java_type(basic_int_type_info.get_java_type_info()))

        basic_short_type_info = Types.SHORT()
        self.assertEqual(basic_short_type_info,
                         _from_java_type(basic_short_type_info.get_java_type_info()))

        basic_long_type_info = Types.LONG()
        self.assertEqual(basic_long_type_info,
                         _from_java_type(basic_long_type_info.get_java_type_info()))

        basic_float_type_info = Types.FLOAT()
        self.assertEqual(basic_float_type_info,
                         _from_java_type(basic_float_type_info.get_java_type_info()))

        basic_double_type_info = Types.DOUBLE()
        self.assertEqual(basic_double_type_info,
                         _from_java_type(basic_double_type_info.get_java_type_info()))

        basic_char_type_info = Types.CHAR()
        self.assertEqual(basic_char_type_info,
                         _from_java_type(basic_char_type_info.get_java_type_info()))

        basic_byte_type_info = Types.BYTE()
        self.assertEqual(basic_byte_type_info,
                         _from_java_type(basic_byte_type_info.get_java_type_info()))

        basic_big_int_type_info = Types.BIG_INT()
        self.assertEqual(basic_big_int_type_info,
                         _from_java_type(basic_big_int_type_info.get_java_type_info()))

        basic_big_dec_type_info = Types.BIG_DEC()
        self.assertEqual(basic_big_dec_type_info,
                         _from_java_type(basic_big_dec_type_info.get_java_type_info()))

        basic_sql_date_type_info = Types.SQL_DATE()
        self.assertEqual(basic_sql_date_type_info,
                         _from_java_type(basic_sql_date_type_info.get_java_type_info()))

        basic_sql_time_type_info = Types.SQL_TIME()
        self.assertEqual(basic_sql_time_type_info,
                         _from_java_type(basic_sql_time_type_info.get_java_type_info()))

        basic_sql_timestamp_type_info = Types.SQL_TIMESTAMP()
        self.assertEqual(basic_sql_timestamp_type_info,
                         _from_java_type(basic_sql_timestamp_type_info.get_java_type_info()))

        row_type_info = Types.ROW([Types.INT(), Types.STRING()])
        self.assertEqual(row_type_info, _from_java_type(row_type_info.get_java_type_info()))

        tuple_type_info = Types.TUPLE([Types.CHAR(), Types.INT()])
        self.assertEqual(tuple_type_info, _from_java_type(tuple_type_info.get_java_type_info()))

        primitive_int_array_type_info = Types.PRIMITIVE_ARRAY(Types.INT())
        self.assertEqual(primitive_int_array_type_info,
                         _from_java_type(primitive_int_array_type_info.get_java_type_info()))

        object_array_type_info = Types.OBJECT_ARRAY(Types.SQL_DATE())
        self.assertEqual(object_array_type_info,
                         _from_java_type(object_array_type_info.get_java_type_info()))

        pickled_byte_array_type_info = Types.PICKLED_BYTE_ARRAY()
        self.assertEqual(pickled_byte_array_type_info,
                         _from_java_type(pickled_byte_array_type_info.get_java_type_info()))

        sql_date_type_info = Types.SQL_DATE()
        self.assertEqual(sql_date_type_info,
                         _from_java_type(sql_date_type_info.get_java_type_info()))

        map_type_info = Types.MAP(Types.INT(), Types.STRING())
        self.assertEqual(map_type_info,
                         _from_java_type(map_type_info.get_java_type_info()))

        list_type_info = Types.LIST(Types.INT())
        self.assertEqual(list_type_info,
                         _from_java_type(list_type_info.get_java_type_info()))
Beispiel #9
0
def to_java_typeinfo(type_info: TypeInformation):
    if isinstance(type_info, BasicTypeInfo):
        basic_type = type_info._basic_type

        if basic_type == BasicType.STRING:
            j_typeinfo = JTypes.STRING
        elif basic_type == BasicType.BYTE:
            j_typeinfo = JTypes.LONG
        elif basic_type == BasicType.BOOLEAN:
            j_typeinfo = JTypes.BOOLEAN
        elif basic_type == BasicType.SHORT:
            j_typeinfo = JTypes.LONG
        elif basic_type == BasicType.INT:
            j_typeinfo = JTypes.LONG
        elif basic_type == BasicType.LONG:
            j_typeinfo = JTypes.LONG
        elif basic_type == BasicType.FLOAT:
            j_typeinfo = JTypes.DOUBLE
        elif basic_type == BasicType.DOUBLE:
            j_typeinfo = JTypes.DOUBLE
        elif basic_type == BasicType.CHAR:
            j_typeinfo = JTypes.STRING
        elif basic_type == BasicType.BIG_INT:
            j_typeinfo = JTypes.BIG_INT
        elif basic_type == BasicType.BIG_DEC:
            j_typeinfo = JTypes.BIG_DEC
        elif basic_type == BasicType.INSTANT:
            j_typeinfo = JTypes.INSTANT
        else:
            raise TypeError("Invalid BasicType %s." % basic_type)

    elif isinstance(type_info, PrimitiveArrayTypeInfo):
        element_type = type_info._element_type

        if element_type == Types.BOOLEAN():
            j_typeinfo = JPrimitiveArrayTypeInfo.BOOLEAN_PRIMITIVE_ARRAY_TYPE_INFO
        elif element_type == Types.BYTE():
            j_typeinfo = JPrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO
        elif element_type == Types.SHORT():
            j_typeinfo = JPrimitiveArrayTypeInfo.SHORT_PRIMITIVE_ARRAY_TYPE_INFO
        elif element_type == Types.INT():
            j_typeinfo = JPrimitiveArrayTypeInfo.INT_PRIMITIVE_ARRAY_TYPE_INFO
        elif element_type == Types.LONG():
            j_typeinfo = JPrimitiveArrayTypeInfo.LONG_PRIMITIVE_ARRAY_TYPE_INFO
        elif element_type == Types.FLOAT():
            j_typeinfo = JPrimitiveArrayTypeInfo.FLOAT_PRIMITIVE_ARRAY_TYPE_INFO
        elif element_type == Types.DOUBLE():
            j_typeinfo = JPrimitiveArrayTypeInfo.DOUBLE_PRIMITIVE_ARRAY_TYPE_INFO
        elif element_type == Types.CHAR():
            j_typeinfo = JPrimitiveArrayTypeInfo.CHAR_PRIMITIVE_ARRAY_TYPE_INFO
        else:
            raise TypeError("Invalid element type for a primitive array.")

    elif isinstance(type_info, BasicArrayTypeInfo):
        element_type = type_info._element_type

        if element_type == Types.BOOLEAN():
            j_typeinfo = JBasicArrayTypeInfo.BOOLEAN_ARRAY_TYPE_INFO
        elif element_type == Types.BYTE():
            j_typeinfo = JBasicArrayTypeInfo.BYTE_ARRAY_TYPE_INFO
        elif element_type == Types.SHORT():
            j_typeinfo = JBasicArrayTypeInfo.SHORT_ARRAY_TYPE_INFO
        elif element_type == Types.INT():
            j_typeinfo = JBasicArrayTypeInfo.INT_ARRAY_TYPE_INFO
        elif element_type == Types.LONG():
            j_typeinfo = JBasicArrayTypeInfo.LONG_ARRAY_TYPE_INFO
        elif element_type == Types.FLOAT():
            j_typeinfo = JBasicArrayTypeInfo.FLOAT_ARRAY_TYPE_INFO
        elif element_type == Types.DOUBLE():
            j_typeinfo = JBasicArrayTypeInfo.DOUBLE_ARRAY_TYPE_INFO
        elif element_type == Types.CHAR():
            j_typeinfo = JBasicArrayTypeInfo.CHAR_ARRAY_TYPE_INFO
        elif element_type == Types.STRING():
            j_typeinfo = JBasicArrayTypeInfo.STRING_ARRAY_TYPE_INFO
        else:
            raise TypeError("Invalid element type for a basic array.")

    elif isinstance(type_info, ObjectArrayTypeInfo):
        element_type = type_info._element_type

        j_typeinfo = JTypes.OBJECT_ARRAY(to_java_typeinfo(element_type))

    elif isinstance(type_info, MapTypeInfo):
        j_key_typeinfo = to_java_typeinfo(type_info._key_type_info)
        j_value_typeinfo = to_java_typeinfo(type_info._value_type_info)

        j_typeinfo = JMapTypeInfo(j_key_typeinfo, j_value_typeinfo)
    else:
        j_typeinfo = JPickledByteArrayTypeInfo.PICKLED_BYTE_ARRAY_TYPE_INFO

    return j_typeinfo