def read_from_kafka(env): deserialization_schema = JsonRowDeserializationSchema.Builder() \ .type_info(Types.ROW([Types.INT(), Types.STRING()])) \ .build() kafka_consumer = FlinkKafkaConsumer( topics='test_csv_topic', deserialization_schema=deserialization_schema, properties={'bootstrap.servers': 'localhost:9092', 'group.id': 'test_group_1'} ) kafka_consumer.set_start_from_earliest() env.add_source(kafka_consumer).print() env.execute()
def test_add_classpaths(self): # find kafka connector jars flink_source_root = _find_flink_source_root() jars_abs_path = flink_source_root + '/flink-connectors/flink-sql-connector-kafka' specific_jars = glob.glob(jars_abs_path + '/target/flink*.jar') specific_jars = [ 'file://' + specific_jar for specific_jar in specific_jars ] self.env.add_classpaths(*specific_jars) source_topic = 'test_source_topic' props = { 'bootstrap.servers': 'localhost:9092', 'group.id': 'test_group' } type_info = Types.ROW([Types.INT(), Types.STRING()]) # Test for kafka consumer deserialization_schema = JsonRowDeserializationSchema.builder() \ .type_info(type_info=type_info).build() # It Will raise a ClassNotFoundException if the kafka connector is not added into the # pipeline classpaths. kafka_consumer = FlinkKafkaConsumer(source_topic, deserialization_schema, props) self.env.add_source(kafka_consumer).print() self.env.get_execution_plan()
def read_from_kafka(env): deserialization_schema = AvroRowDeserializationSchema( avro_schema_string=""" { "type": "record", "name": "TestRecord", "fields": [ {"name": "id", "type": "int"}, {"name": "name", "type": "string"} ] }""") kafka_consumer = FlinkKafkaConsumer( topics='test_avro_topic', deserialization_schema=deserialization_schema, properties={ 'bootstrap.servers': 'localhost:9092', 'group.id': 'test_group_1' }) kafka_consumer.set_start_from_earliest() env.add_source(kafka_consumer).print() env.execute()
def test_legacy_kafka_connector(self): source_topic = 'test_source_topic' sink_topic = 'test_sink_topic' props = { 'bootstrap.servers': 'localhost:9092', 'group.id': 'test_group' } type_info = Types.ROW([Types.INT(), Types.STRING()]) # Test for kafka consumer deserialization_schema = JsonRowDeserializationSchema.builder() \ .type_info(type_info=type_info).build() flink_kafka_consumer = FlinkKafkaConsumer(source_topic, deserialization_schema, props) flink_kafka_consumer.set_start_from_earliest() flink_kafka_consumer.set_commit_offsets_on_checkpoints(True) j_properties = get_field_value( flink_kafka_consumer.get_java_function(), 'properties') self.assertEqual('localhost:9092', j_properties.getProperty('bootstrap.servers')) self.assertEqual('test_group', j_properties.getProperty('group.id')) self.assertTrue( get_field_value(flink_kafka_consumer.get_java_function(), 'enableCommitOnCheckpoints')) j_start_up_mode = get_field_value( flink_kafka_consumer.get_java_function(), 'startupMode') j_deserializer = get_field_value( flink_kafka_consumer.get_java_function(), 'deserializer') j_deserialize_type_info = invoke_java_object_method( j_deserializer, "getProducedType") deserialize_type_info = typeinfo._from_java_type( j_deserialize_type_info) self.assertTrue(deserialize_type_info == type_info) self.assertTrue( j_start_up_mode.equals( get_gateway().jvm.org.apache.flink.streaming.connectors.kafka. config.StartupMode.EARLIEST)) j_topic_desc = get_field_value( flink_kafka_consumer.get_java_function(), 'topicsDescriptor') j_topics = invoke_java_object_method(j_topic_desc, 'getFixedTopics') self.assertEqual(['test_source_topic'], list(j_topics)) # Test for kafka producer serialization_schema = JsonRowSerializationSchema.builder().with_type_info(type_info) \ .build() flink_kafka_producer = FlinkKafkaProducer(sink_topic, serialization_schema, props) flink_kafka_producer.set_write_timestamp_to_kafka(False) j_producer_config = get_field_value( flink_kafka_producer.get_java_function(), 'producerConfig') self.assertEqual('localhost:9092', j_producer_config.getProperty('bootstrap.servers')) self.assertEqual('test_group', j_producer_config.getProperty('group.id')) self.assertFalse( get_field_value(flink_kafka_producer.get_java_function(), 'writeTimestampToKafka'))