def _build_source(initializer: KafkaOffsetsInitializer): return KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_topics('test_topic') \ .set_value_only_deserializer(SimpleStringSchema()) \ .set_group_id('test_group') \ .set_unbounded(initializer) \ .build()
def test_compiling(self): source = KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_topics('test_topic') \ .set_value_only_deserializer(SimpleStringSchema()) \ .build() ds = self.env.from_source( source=source, watermark_strategy=WatermarkStrategy.for_monotonous_timestamps(), source_name='kafka source') ds.print() plan = json.loads(self.env.get_execution_plan()) self.assertEqual('Source: kafka source', plan['nodes'][0]['type'])
def test_set_properties(self): source = KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_group_id('test_group_id') \ .set_client_id_prefix('test_client_id_prefix') \ .set_property('test_property', 'test_value') \ .set_topics('test_topic') \ .set_value_only_deserializer(SimpleStringSchema()) \ .build() conf = self._get_kafka_source_configuration(source) self.assertEqual(conf.get_string('bootstrap.servers', ''), 'localhost:9092') self.assertEqual(conf.get_string('group.id', ''), 'test_group_id') self.assertEqual(conf.get_string('client.id.prefix', ''), 'test_client_id_prefix') self.assertEqual(conf.get_string('test_property', ''), 'test_value')
def _check(schema: DeserializationSchema, class_name: str): source = KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_topics('test_topic') \ .set_value_only_deserializer(schema) \ .build() deserialization_schema_wrapper = get_field_value( source.get_java_function(), 'deserializationSchema') self.assertEqual( deserialization_schema_wrapper.getClass().getCanonicalName(), 'org.apache.flink.connector.kafka.source.reader.deserializer' '.KafkaValueOnlyDeserializationSchemaWrapper') deserialization_schema = get_field_value( deserialization_schema_wrapper, 'deserializationSchema') self.assertEqual( deserialization_schema.getClass().getCanonicalName(), class_name)
def test_set_topic_pattern(self): source = KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_topic_pattern('test_topic*') \ .set_value_only_deserializer(SimpleStringSchema()) \ .build() kafka_subscriber = get_field_value(source.get_java_function(), 'subscriber') self.assertEqual( kafka_subscriber.getClass().getCanonicalName(), 'org.apache.flink.connector.kafka.source.enumerator.subscriber.TopicPatternSubscriber' ) topic_pattern = get_field_value(kafka_subscriber, 'topicPattern') self.assertTrue( is_instance_of(topic_pattern, get_gateway().jvm.java.util.regex.Pattern)) self.assertEqual(topic_pattern.toString(), 'test_topic*')
def test_set_topics(self): source = KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_topics('test_topic1', 'test_topic2') \ .set_value_only_deserializer(SimpleStringSchema()) \ .build() kafka_subscriber = get_field_value(source.get_java_function(), 'subscriber') self.assertEqual( kafka_subscriber.getClass().getCanonicalName(), 'org.apache.flink.connector.kafka.source.enumerator.subscriber.TopicListSubscriber' ) topics = get_field_value(kafka_subscriber, 'topics') self.assertTrue( is_instance_of(topics, get_gateway().jvm.java.util.List)) self.assertEqual(topics.size(), 2) self.assertEqual(topics[0], 'test_topic1') self.assertEqual(topics[1], 'test_topic2')
def test_set_partitions(self): topic_partition_1 = KafkaTopicPartition('test_topic', 1) topic_partition_2 = KafkaTopicPartition('test_topic', 2) source = KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_partitions({topic_partition_1, topic_partition_2}) \ .set_value_only_deserializer(SimpleStringSchema()) \ .build() kafka_subscriber = get_field_value(source.get_java_function(), 'subscriber') self.assertEqual( kafka_subscriber.getClass().getCanonicalName(), 'org.apache.flink.connector.kafka.source.enumerator.subscriber.PartitionSetSubscriber' ) partitions = get_field_value(kafka_subscriber, 'subscribedPartitions') self.assertTrue( is_instance_of(partitions, get_gateway().jvm.java.util.Set)) self.assertTrue( topic_partition_1._to_j_topic_partition() in partitions) self.assertTrue( topic_partition_2._to_j_topic_partition() in partitions)