def test_json_row_serialization_deserialization_schema(self):
        jsons = [
            "{\"svt\":\"2020-02-24T12:58:09.209+0800\"}",
            "{\"svt\":\"2020-02-24T12:58:09.209+0800\", "
            "\"ops\":{\"id\":\"281708d0-4092-4c21-9233-931950b6eccf\"},\"ids\":[1, 2, 3]}",
            "{\"svt\":\"2020-02-24T12:58:09.209+0800\"}"
        ]
        expected_jsons = [
            "{\"svt\":\"2020-02-24T12:58:09.209+0800\",\"ops\":null,\"ids\":null}",
            "{\"svt\":\"2020-02-24T12:58:09.209+0800\","
            "\"ops\":{\"id\":\"281708d0-4092-4c21-9233-931950b6eccf\"},"
            "\"ids\":[1,2,3]}",
            "{\"svt\":\"2020-02-24T12:58:09.209+0800\",\"ops\":null,\"ids\":null}"
        ]

        row_schema = Types.ROW_NAMED(["svt", "ops", "ids"], [
            Types.STRING(),
            Types.ROW_NAMED(['id'], [Types.STRING()]),
            Types.PRIMITIVE_ARRAY(Types.INT())
        ])

        json_row_serialization_schema = JsonRowSerializationSchema.builder() \
            .with_type_info(row_schema).build()
        json_row_deserialization_schema = JsonRowDeserializationSchema.builder() \
            .type_info(row_schema).build()

        for i in range(len(jsons)):
            j_row = json_row_deserialization_schema._j_deserialization_schema\
                .deserialize(bytes(jsons[i], encoding='utf-8'))
            result = str(json_row_serialization_schema._j_serialization_schema.
                         serialize(j_row),
                         encoding='utf-8')
            self.assertEqual(expected_jsons[i], result)
Beispiel #2
0
    def test_rabbitmq_connectors(self):
        connection_config = RMQConnectionConfig.Builder() \
            .set_host('localhost') \
            .set_port(5672) \
            .set_virtual_host('/') \
            .set_user_name('guest') \
            .set_password('guest') \
            .build()
        type_info = Types.ROW([Types.INT(), Types.STRING()])
        deserialization_schema = JsonRowDeserializationSchema.builder() \
            .type_info(type_info=type_info).build()

        rmq_source = RMQSource(connection_config, 'source_queue', True,
                               deserialization_schema)
        self.assertEqual(
            get_field_value(rmq_source.get_java_function(), 'queueName'),
            'source_queue')
        self.assertTrue(
            get_field_value(rmq_source.get_java_function(),
                            'usesCorrelationId'))

        serialization_schema = JsonRowSerializationSchema.builder().with_type_info(type_info) \
            .build()
        rmq_sink = RMQSink(connection_config, 'sink_queue',
                           serialization_schema)
        self.assertEqual(
            get_field_value(rmq_sink.get_java_function(), 'queueName'),
            'sink_queue')
Beispiel #3
0
        def _check_record(data, topic, serialized_data):
            input_type = Types.ROW([Types.STRING()])

            serialization_schema = KafkaRecordSerializationSchema.builder() \
                .set_topic_selector(_select) \
                .set_value_serialization_schema(
                    JsonRowSerializationSchema.builder().with_type_info(input_type).build()) \
                .build()
            jvm = get_gateway().jvm
            serialization_schema._j_serialization_schema.open(
                jvm.org.apache.flink.connector.testutils.formats.
                DummyInitializationContext(),
                jvm.org.apache.flink.connector.kafka.sink.
                DefaultKafkaSinkContext(0, 1, jvm.java.util.Properties()))
            sink = KafkaSink.builder() \
                .set_bootstrap_servers('localhost:9092') \
                .set_record_serializer(serialization_schema) \
                .build()

            ds = MockDataStream(Types.ROW([Types.STRING()]))
            ds.sink_to(sink)
            row = Row(data)
            topic_row = ds.feed(row)  # type: Row
            j_record = serialization_schema._j_serialization_schema.serialize(
                to_java_data_structure(topic_row), None, None)
            self.assertEqual(j_record.topic(), topic)
            self.assertIsNone(j_record.key())
            self.assertEqual(j_record.value(), serialized_data)
Beispiel #4
0
    def test_set_topic(self):
        input_type = Types.ROW([Types.STRING()])

        serialization_schema = KafkaRecordSerializationSchema.builder() \
            .set_topic('test-topic') \
            .set_value_serialization_schema(
                JsonRowSerializationSchema.builder().with_type_info(input_type).build()) \
            .build()

        j_record = serialization_schema._j_serialization_schema.serialize(
            to_java_data_structure(Row('test')), None, None)
        self.assertEqual(j_record.topic(), 'test-topic')
        self.assertIsNone(j_record.key())
        self.assertEqual(j_record.value(), b'{"f0":"test"}')
Beispiel #5
0
 def sink_to_kafka(self,
                   topic: str,
                   table: Table,
                   typeinfo: TypeInformation,
                   pool_size: int = 1) -> None:
     builder = JsonRowSerializationSchema.builder()
     builder.with_type_info(typeinfo)
     stream = self.table_env.to_retract_stream(table, typeinfo)
     stream = stream.filter(lambda x: x[0])\
                    .map(lambda x: x[1], output_type=typeinfo)
     stream.add_sink(
         FlinkKafkaProducer(
             topic,
             builder.build(),
             producer_config={'bootstrap.servers': self.kafka_addr},
             kafka_producer_pool_size=pool_size))
Beispiel #6
0
    def kafka_connector_assertion(self, flink_kafka_consumer_clz, flink_kafka_producer_clz):
        source_topic = 'test_source_topic'
        sink_topic = 'test_sink_topic'
        props = {'bootstrap.servers': 'localhost:9092', 'group.id': 'test_group'}
        type_info = Types.ROW([Types.INT(), Types.STRING()])

        # Test for kafka consumer
        deserialization_schema = JsonRowDeserializationSchema.builder() \
            .type_info(type_info=type_info).build()

        flink_kafka_consumer = flink_kafka_consumer_clz(source_topic, deserialization_schema, props)
        flink_kafka_consumer.set_start_from_earliest()
        flink_kafka_consumer.set_commit_offsets_on_checkpoints(True)

        j_properties = get_private_field(flink_kafka_consumer.get_java_function(), 'properties')
        self.assertEqual('localhost:9092', j_properties.getProperty('bootstrap.servers'))
        self.assertEqual('test_group', j_properties.getProperty('group.id'))
        self.assertTrue(get_private_field(flink_kafka_consumer.get_java_function(),
                                          'enableCommitOnCheckpoints'))
        j_start_up_mode = get_private_field(flink_kafka_consumer.get_java_function(), 'startupMode')

        j_deserializer = get_private_field(flink_kafka_consumer.get_java_function(), 'deserializer')
        j_deserialize_type_info = invoke_java_object_method(j_deserializer, "getProducedType")
        deserialize_type_info = typeinfo._from_java_type(j_deserialize_type_info)
        self.assertTrue(deserialize_type_info == type_info)
        self.assertTrue(j_start_up_mode.equals(get_gateway().jvm
                                               .org.apache.flink.streaming.connectors
                                               .kafka.config.StartupMode.EARLIEST))
        j_topic_desc = get_private_field(flink_kafka_consumer.get_java_function(),
                                         'topicsDescriptor')
        j_topics = invoke_java_object_method(j_topic_desc, 'getFixedTopics')
        self.assertEqual(['test_source_topic'], list(j_topics))

        # Test for kafka producer
        serialization_schema = JsonRowSerializationSchema.builder().with_type_info(type_info) \
            .build()
        flink_kafka_producer = flink_kafka_producer_clz(sink_topic, serialization_schema, props)
        flink_kafka_producer.set_write_timestamp_to_kafka(False)

        j_producer_config = get_private_field(flink_kafka_producer.get_java_function(),
                                              'producerConfig')
        self.assertEqual('localhost:9092', j_producer_config.getProperty('bootstrap.servers'))
        self.assertEqual('test_group', j_producer_config.getProperty('group.id'))
        self.assertFalse(get_private_field(flink_kafka_producer.get_java_function(),
                                           'writeTimestampToKafka'))
Beispiel #7
0
    def test_set_topic(self):
        input_type = Types.ROW([Types.STRING()])

        serialization_schema = KafkaRecordSerializationSchema.builder() \
            .set_topic('test-topic') \
            .set_value_serialization_schema(
                JsonRowSerializationSchema.builder().with_type_info(input_type).build()) \
            .build()
        jvm = get_gateway().jvm
        serialization_schema._j_serialization_schema.open(
            jvm.org.apache.flink.connector.testutils.formats.
            DummyInitializationContext(),
            jvm.org.apache.flink.connector.kafka.sink.DefaultKafkaSinkContext(
                0, 1, jvm.java.util.Properties()))

        j_record = serialization_schema._j_serialization_schema.serialize(
            to_java_data_structure(Row('test')), None, None)
        self.assertEqual(j_record.topic(), 'test-topic')
        self.assertIsNone(j_record.key())
        self.assertEqual(j_record.value(), b'{"f0":"test"}')
Beispiel #8
0
        def _check_record(data, topic, serialized_data):
            input_type = Types.ROW([Types.STRING()])

            serialization_schema = KafkaRecordSerializationSchema.builder() \
                .set_topic_selector(_select) \
                .set_value_serialization_schema(
                    JsonRowSerializationSchema.builder().with_type_info(input_type).build()) \
                .build()
            sink = KafkaSink.builder() \
                .set_bootstrap_servers('localhost:9092') \
                .set_record_serializer(serialization_schema) \
                .build()

            ds = MockDataStream(Types.ROW([Types.STRING()]))
            ds.sink_to(sink)
            row = Row(data)
            topic_row = ds.feed(row)  # type: Row
            j_record = serialization_schema._j_serialization_schema.serialize(
                to_java_data_structure(topic_row), None, None)
            self.assertEqual(j_record.topic(), topic)
            self.assertIsNone(j_record.key())
            self.assertEqual(j_record.value(), serialized_data)
def python_data_stream_example():
    env = StreamExecutionEnvironment.get_execution_environment()

    source_type_info = Types.ROW([Types.STRING(), Types.INT()])
    json_row_deserialization_schema = JsonRowDeserializationSchema.builder()\
        .type_info(source_type_info).build()
    source_topic = 'test-python-data-stream-source'
    consumer_props = {
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'pyflink-e2e-source'
    }
    kafka_consumer_1 = FlinkKafkaConsumer(source_topic,
                                          json_row_deserialization_schema,
                                          consumer_props)
    kafka_consumer_1.set_start_from_earliest()
    source_stream_1 = env.add_source(kafka_consumer_1).name('kafka source 1')
    mapped_type_info = Types.ROW([Types.STRING(), Types.INT(), Types.INT()])

    keyed_stream = source_stream_1.map(add_one, output_type=mapped_type_info) \
        .key_by(lambda x: x[2])

    flat_mapped_stream = keyed_stream.flat_map(m_flat_map,
                                               result_type=mapped_type_info)
    flat_mapped_stream.name("flat-map").set_parallelism(3)

    sink_topic = 'test-python-data-stream-sink'
    producer_props = {
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'pyflink-e2e-1'
    }
    json_row_serialization_schema = JsonRowSerializationSchema.builder()\
        .with_type_info(mapped_type_info).build()
    kafka_producer = FlinkKafkaProducer(
        topic=sink_topic,
        producer_config=producer_props,
        serialization_schema=json_row_serialization_schema)
    flat_mapped_stream.add_sink(kafka_producer)
    env.execute_async("test data stream to kafka")
Beispiel #10
0
    def _check_serialization_schema_implementations(check_function):
        input_type = Types.ROW([Types.STRING()])

        check_function(
            JsonRowSerializationSchema.builder().with_type_info(
                input_type).build(),
            'org.apache.flink.formats.json.JsonRowSerializationSchema')
        check_function(
            CsvRowSerializationSchema.Builder(input_type).build(),
            'org.apache.flink.formats.csv.CsvRowSerializationSchema')
        avro_schema_string = """
        {
            "type": "record",
            "name": "test_record",
            "fields": []
        }
        """
        check_function(
            AvroRowSerializationSchema(avro_schema_string=avro_schema_string),
            'org.apache.flink.formats.avro.AvroRowSerializationSchema')
        check_function(
            SimpleStringSchema(),
            'org.apache.flink.api.common.serialization.SimpleStringSchema')