def _check_specified_offsets_initializer( self, source: KafkaSource, offsets: Dict[KafkaTopicPartition, int], reset_strategy: KafkaOffsetResetStrategy, is_start: bool = True): if is_start: field_name = 'startingOffsetsInitializer' else: field_name = 'stoppingOffsetsInitializer' offsets_initializer = get_field_value(source.get_java_function(), field_name) self.assertEqual( offsets_initializer.getClass().getCanonicalName(), 'org.apache.flink.connector.kafka.source.enumerator.initializer' '.SpecifiedOffsetsInitializer') initial_offsets = get_field_value(offsets_initializer, 'initialOffsets') self.assertTrue( is_instance_of(initial_offsets, get_gateway().jvm.java.util.Map)) self.assertEqual(initial_offsets.size(), len(offsets)) for j_topic_partition in initial_offsets: topic_partition = KafkaTopicPartition( j_topic_partition.topic(), j_topic_partition.partition()) self.assertIsNotNone(offsets.get(topic_partition)) self.assertEqual(initial_offsets[j_topic_partition], offsets[topic_partition]) offset_reset_strategy = get_field_value(offsets_initializer, 'offsetResetStrategy') self.assertTrue( offset_reset_strategy.equals( reset_strategy._to_j_offset_reset_strategy()))
def _check_reader_handled_offsets_initializer( self, source: KafkaSource, offset: int, reset_strategy: KafkaOffsetResetStrategy, is_start: bool = True): if is_start: field_name = 'startingOffsetsInitializer' else: field_name = 'stoppingOffsetsInitializer' offsets_initializer = get_field_value(source.get_java_function(), field_name) self.assertEqual( offsets_initializer.getClass().getCanonicalName(), 'org.apache.flink.connector.kafka.source.enumerator.initializer' '.ReaderHandledOffsetsInitializer') starting_offset = get_field_value(offsets_initializer, 'startingOffset') self.assertEqual(starting_offset, offset) offset_reset_strategy = get_field_value(offsets_initializer, 'offsetResetStrategy') self.assertTrue( offset_reset_strategy.equals( reset_strategy._to_j_offset_reset_strategy()))
def _build_source(initializer: KafkaOffsetsInitializer): return KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_topics('test_topic') \ .set_value_only_deserializer(SimpleStringSchema()) \ .set_group_id('test_group') \ .set_unbounded(initializer) \ .build()
def _get_kafka_source_configuration(source: KafkaSource): jvm = get_gateway().jvm j_source = source.get_java_function() j_to_configuration = j_source.getClass().getDeclaredMethod( 'getConfiguration', to_jarray(jvm.java.lang.Class, [])) j_to_configuration.setAccessible(True) j_configuration = j_to_configuration.invoke( j_source, to_jarray(jvm.java.lang.Object, [])) return Configuration(j_configuration=j_configuration)
def test_compiling(self): source = KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_topics('test_topic') \ .set_value_only_deserializer(SimpleStringSchema()) \ .build() ds = self.env.from_source( source=source, watermark_strategy=WatermarkStrategy.for_monotonous_timestamps(), source_name='kafka source') ds.print() plan = json.loads(self.env.get_execution_plan()) self.assertEqual('Source: kafka source', plan['nodes'][0]['type'])
def test_set_properties(self): source = KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_group_id('test_group_id') \ .set_client_id_prefix('test_client_id_prefix') \ .set_property('test_property', 'test_value') \ .set_topics('test_topic') \ .set_value_only_deserializer(SimpleStringSchema()) \ .build() conf = self._get_kafka_source_configuration(source) self.assertEqual(conf.get_string('bootstrap.servers', ''), 'localhost:9092') self.assertEqual(conf.get_string('group.id', ''), 'test_group_id') self.assertEqual(conf.get_string('client.id.prefix', ''), 'test_client_id_prefix') self.assertEqual(conf.get_string('test_property', ''), 'test_value')
def _check(schema: DeserializationSchema, class_name: str): source = KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_topics('test_topic') \ .set_value_only_deserializer(schema) \ .build() deserialization_schema_wrapper = get_field_value( source.get_java_function(), 'deserializationSchema') self.assertEqual( deserialization_schema_wrapper.getClass().getCanonicalName(), 'org.apache.flink.connector.kafka.source.reader.deserializer' '.KafkaValueOnlyDeserializationSchemaWrapper') deserialization_schema = get_field_value( deserialization_schema_wrapper, 'deserializationSchema') self.assertEqual( deserialization_schema.getClass().getCanonicalName(), class_name)
def test_set_topic_pattern(self): source = KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_topic_pattern('test_topic*') \ .set_value_only_deserializer(SimpleStringSchema()) \ .build() kafka_subscriber = get_field_value(source.get_java_function(), 'subscriber') self.assertEqual( kafka_subscriber.getClass().getCanonicalName(), 'org.apache.flink.connector.kafka.source.enumerator.subscriber.TopicPatternSubscriber' ) topic_pattern = get_field_value(kafka_subscriber, 'topicPattern') self.assertTrue( is_instance_of(topic_pattern, get_gateway().jvm.java.util.regex.Pattern)) self.assertEqual(topic_pattern.toString(), 'test_topic*')
def _check_timestamp_offsets_initializer(self, source: KafkaSource, timestamp: int, is_start: bool = True): if is_start: field_name = 'startingOffsetsInitializer' else: field_name = 'stoppingOffsetsInitializer' offsets_initializer = get_field_value(source.get_java_function(), field_name) self.assertEqual( offsets_initializer.getClass().getCanonicalName(), 'org.apache.flink.connector.kafka.source.enumerator.initializer' '.TimestampOffsetsInitializer') starting_timestamp = get_field_value(offsets_initializer, 'startingTimestamp') self.assertEqual(starting_timestamp, timestamp)
def test_set_topics(self): source = KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_topics('test_topic1', 'test_topic2') \ .set_value_only_deserializer(SimpleStringSchema()) \ .build() kafka_subscriber = get_field_value(source.get_java_function(), 'subscriber') self.assertEqual( kafka_subscriber.getClass().getCanonicalName(), 'org.apache.flink.connector.kafka.source.enumerator.subscriber.TopicListSubscriber' ) topics = get_field_value(kafka_subscriber, 'topics') self.assertTrue( is_instance_of(topics, get_gateway().jvm.java.util.List)) self.assertEqual(topics.size(), 2) self.assertEqual(topics[0], 'test_topic1') self.assertEqual(topics[1], 'test_topic2')
def test_set_partitions(self): topic_partition_1 = KafkaTopicPartition('test_topic', 1) topic_partition_2 = KafkaTopicPartition('test_topic', 2) source = KafkaSource.builder() \ .set_bootstrap_servers('localhost:9092') \ .set_partitions({topic_partition_1, topic_partition_2}) \ .set_value_only_deserializer(SimpleStringSchema()) \ .build() kafka_subscriber = get_field_value(source.get_java_function(), 'subscriber') self.assertEqual( kafka_subscriber.getClass().getCanonicalName(), 'org.apache.flink.connector.kafka.source.enumerator.subscriber.PartitionSetSubscriber' ) partitions = get_field_value(kafka_subscriber, 'subscribedPartitions') self.assertTrue( is_instance_of(partitions, get_gateway().jvm.java.util.Set)) self.assertTrue( topic_partition_1._to_j_topic_partition() in partitions) self.assertTrue( topic_partition_2._to_j_topic_partition() in partitions)
def _check_bounded(source: KafkaSource): self.assertEqual( get_field_value(source.get_java_function(), 'boundedness').toString(), 'CONTINUOUS_UNBOUNDED')
def _check_bounded(source: KafkaSource): self.assertEqual( self._get_java_field(source.get_java_function(), 'boundedness').toString(), 'BOUNDED')