Esempio n. 1
0
    def _check_specified_offsets_initializer(
            self,
            source: KafkaSource,
            offsets: Dict[KafkaTopicPartition, int],
            reset_strategy: KafkaOffsetResetStrategy,
            is_start: bool = True):
        if is_start:
            field_name = 'startingOffsetsInitializer'
        else:
            field_name = 'stoppingOffsetsInitializer'
        offsets_initializer = get_field_value(source.get_java_function(),
                                              field_name)
        self.assertEqual(
            offsets_initializer.getClass().getCanonicalName(),
            'org.apache.flink.connector.kafka.source.enumerator.initializer'
            '.SpecifiedOffsetsInitializer')

        initial_offsets = get_field_value(offsets_initializer,
                                          'initialOffsets')
        self.assertTrue(
            is_instance_of(initial_offsets,
                           get_gateway().jvm.java.util.Map))
        self.assertEqual(initial_offsets.size(), len(offsets))
        for j_topic_partition in initial_offsets:
            topic_partition = KafkaTopicPartition(
                j_topic_partition.topic(), j_topic_partition.partition())
            self.assertIsNotNone(offsets.get(topic_partition))
            self.assertEqual(initial_offsets[j_topic_partition],
                             offsets[topic_partition])

        offset_reset_strategy = get_field_value(offsets_initializer,
                                                'offsetResetStrategy')
        self.assertTrue(
            offset_reset_strategy.equals(
                reset_strategy._to_j_offset_reset_strategy()))
Esempio n. 2
0
    def _check_reader_handled_offsets_initializer(
            self,
            source: KafkaSource,
            offset: int,
            reset_strategy: KafkaOffsetResetStrategy,
            is_start: bool = True):
        if is_start:
            field_name = 'startingOffsetsInitializer'
        else:
            field_name = 'stoppingOffsetsInitializer'
        offsets_initializer = get_field_value(source.get_java_function(),
                                              field_name)
        self.assertEqual(
            offsets_initializer.getClass().getCanonicalName(),
            'org.apache.flink.connector.kafka.source.enumerator.initializer'
            '.ReaderHandledOffsetsInitializer')

        starting_offset = get_field_value(offsets_initializer,
                                          'startingOffset')
        self.assertEqual(starting_offset, offset)

        offset_reset_strategy = get_field_value(offsets_initializer,
                                                'offsetResetStrategy')
        self.assertTrue(
            offset_reset_strategy.equals(
                reset_strategy._to_j_offset_reset_strategy()))
Esempio n. 3
0
 def _build_source(initializer: KafkaOffsetsInitializer):
     return KafkaSource.builder() \
         .set_bootstrap_servers('localhost:9092') \
         .set_topics('test_topic') \
         .set_value_only_deserializer(SimpleStringSchema()) \
         .set_group_id('test_group') \
         .set_unbounded(initializer) \
         .build()
Esempio n. 4
0
 def _get_kafka_source_configuration(source: KafkaSource):
     jvm = get_gateway().jvm
     j_source = source.get_java_function()
     j_to_configuration = j_source.getClass().getDeclaredMethod(
         'getConfiguration', to_jarray(jvm.java.lang.Class, []))
     j_to_configuration.setAccessible(True)
     j_configuration = j_to_configuration.invoke(
         j_source, to_jarray(jvm.java.lang.Object, []))
     return Configuration(j_configuration=j_configuration)
Esempio n. 5
0
    def test_compiling(self):
        source = KafkaSource.builder() \
            .set_bootstrap_servers('localhost:9092') \
            .set_topics('test_topic') \
            .set_value_only_deserializer(SimpleStringSchema()) \
            .build()

        ds = self.env.from_source(
            source=source,
            watermark_strategy=WatermarkStrategy.for_monotonous_timestamps(),
            source_name='kafka source')
        ds.print()
        plan = json.loads(self.env.get_execution_plan())
        self.assertEqual('Source: kafka source', plan['nodes'][0]['type'])
Esempio n. 6
0
 def test_set_properties(self):
     source = KafkaSource.builder() \
         .set_bootstrap_servers('localhost:9092') \
         .set_group_id('test_group_id') \
         .set_client_id_prefix('test_client_id_prefix') \
         .set_property('test_property', 'test_value') \
         .set_topics('test_topic') \
         .set_value_only_deserializer(SimpleStringSchema()) \
         .build()
     conf = self._get_kafka_source_configuration(source)
     self.assertEqual(conf.get_string('bootstrap.servers', ''),
                      'localhost:9092')
     self.assertEqual(conf.get_string('group.id', ''), 'test_group_id')
     self.assertEqual(conf.get_string('client.id.prefix', ''),
                      'test_client_id_prefix')
     self.assertEqual(conf.get_string('test_property', ''), 'test_value')
Esempio n. 7
0
 def _check(schema: DeserializationSchema, class_name: str):
     source = KafkaSource.builder() \
         .set_bootstrap_servers('localhost:9092') \
         .set_topics('test_topic') \
         .set_value_only_deserializer(schema) \
         .build()
     deserialization_schema_wrapper = get_field_value(
         source.get_java_function(), 'deserializationSchema')
     self.assertEqual(
         deserialization_schema_wrapper.getClass().getCanonicalName(),
         'org.apache.flink.connector.kafka.source.reader.deserializer'
         '.KafkaValueOnlyDeserializationSchemaWrapper')
     deserialization_schema = get_field_value(
         deserialization_schema_wrapper, 'deserializationSchema')
     self.assertEqual(
         deserialization_schema.getClass().getCanonicalName(),
         class_name)
Esempio n. 8
0
 def test_set_topic_pattern(self):
     source = KafkaSource.builder() \
         .set_bootstrap_servers('localhost:9092') \
         .set_topic_pattern('test_topic*') \
         .set_value_only_deserializer(SimpleStringSchema()) \
         .build()
     kafka_subscriber = get_field_value(source.get_java_function(),
                                        'subscriber')
     self.assertEqual(
         kafka_subscriber.getClass().getCanonicalName(),
         'org.apache.flink.connector.kafka.source.enumerator.subscriber.TopicPatternSubscriber'
     )
     topic_pattern = get_field_value(kafka_subscriber, 'topicPattern')
     self.assertTrue(
         is_instance_of(topic_pattern,
                        get_gateway().jvm.java.util.regex.Pattern))
     self.assertEqual(topic_pattern.toString(), 'test_topic*')
Esempio n. 9
0
    def _check_timestamp_offsets_initializer(self,
                                             source: KafkaSource,
                                             timestamp: int,
                                             is_start: bool = True):
        if is_start:
            field_name = 'startingOffsetsInitializer'
        else:
            field_name = 'stoppingOffsetsInitializer'
        offsets_initializer = get_field_value(source.get_java_function(),
                                              field_name)
        self.assertEqual(
            offsets_initializer.getClass().getCanonicalName(),
            'org.apache.flink.connector.kafka.source.enumerator.initializer'
            '.TimestampOffsetsInitializer')

        starting_timestamp = get_field_value(offsets_initializer,
                                             'startingTimestamp')
        self.assertEqual(starting_timestamp, timestamp)
Esempio n. 10
0
 def test_set_topics(self):
     source = KafkaSource.builder() \
         .set_bootstrap_servers('localhost:9092') \
         .set_topics('test_topic1', 'test_topic2') \
         .set_value_only_deserializer(SimpleStringSchema()) \
         .build()
     kafka_subscriber = get_field_value(source.get_java_function(),
                                        'subscriber')
     self.assertEqual(
         kafka_subscriber.getClass().getCanonicalName(),
         'org.apache.flink.connector.kafka.source.enumerator.subscriber.TopicListSubscriber'
     )
     topics = get_field_value(kafka_subscriber, 'topics')
     self.assertTrue(
         is_instance_of(topics,
                        get_gateway().jvm.java.util.List))
     self.assertEqual(topics.size(), 2)
     self.assertEqual(topics[0], 'test_topic1')
     self.assertEqual(topics[1], 'test_topic2')
Esempio n. 11
0
 def test_set_partitions(self):
     topic_partition_1 = KafkaTopicPartition('test_topic', 1)
     topic_partition_2 = KafkaTopicPartition('test_topic', 2)
     source = KafkaSource.builder() \
         .set_bootstrap_servers('localhost:9092') \
         .set_partitions({topic_partition_1, topic_partition_2}) \
         .set_value_only_deserializer(SimpleStringSchema()) \
         .build()
     kafka_subscriber = get_field_value(source.get_java_function(),
                                        'subscriber')
     self.assertEqual(
         kafka_subscriber.getClass().getCanonicalName(),
         'org.apache.flink.connector.kafka.source.enumerator.subscriber.PartitionSetSubscriber'
     )
     partitions = get_field_value(kafka_subscriber, 'subscribedPartitions')
     self.assertTrue(
         is_instance_of(partitions,
                        get_gateway().jvm.java.util.Set))
     self.assertTrue(
         topic_partition_1._to_j_topic_partition() in partitions)
     self.assertTrue(
         topic_partition_2._to_j_topic_partition() in partitions)
Esempio n. 12
0
 def _check_bounded(source: KafkaSource):
     self.assertEqual(
         get_field_value(source.get_java_function(),
                         'boundedness').toString(),
         'CONTINUOUS_UNBOUNDED')
Esempio n. 13
0
 def _check_bounded(source: KafkaSource):
     self.assertEqual(
         self._get_java_field(source.get_java_function(),
                              'boundedness').toString(), 'BOUNDED')