def test_file_source_and_sink(self, converter="org.apache.kafka.connect.json.JsonConverter", schemas=True, security_protocol='PLAINTEXT'): """ Validates basic end-to-end functionality of Connect standalone using the file source and sink converters. Includes parameterizations to test different converters (which also test per-connector converter overrides), schema/schemaless modes, and security support. """ assert converter != None, "converter type must be set" # Template parameters. Note that we don't set key/value.converter. These default to JsonConverter and we validate # converter overrides via the connector configuration. if converter != "org.apache.kafka.connect.json.JsonConverter": self.override_key_converter = converter self.override_value_converter = converter self.schemas = schemas self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk, security_protocol=security_protocol, interbroker_security_protocol=security_protocol, topics=self.topics) self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE]) self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE]) self.consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, self.TOPIC_TEST, consumer_timeout_ms=10000) self.zk.start() self.kafka.start() self.source.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-source.properties")]) self.sink.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-sink.properties")]) self.source.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node)) self.sink.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node)) self.source.start() self.sink.start() # Generating data on the source node should generate new records and create new output on the sink node self.source.node.account.ssh("echo -e -n " + repr(self.FIRST_INPUT) + " >> " + self.INPUT_FILE) wait_until(lambda: self.validate_output(self.FIRST_INPUT), timeout_sec=60, err_msg="Data added to input file was not seen in the output file in a reasonable amount of time.") # Restarting both should result in them picking up where they left off, # only processing new data. self.source.restart() self.sink.restart() self.source.node.account.ssh("echo -e -n " + repr(self.SECOND_INPUT) + " >> " + self.INPUT_FILE) wait_until(lambda: self.validate_output(self.FIRST_INPUT + self.SECOND_INPUT), timeout_sec=60, err_msg="Sink output file never converged to the same state as the input file") # Validate the format of the data in the Kafka topic self.consumer_validator.run() expected = json.dumps([line if not self.schemas else { "schema": self.SCHEMA, "payload": line } for line in self.FIRST_INPUT_LIST + self.SECOND_INPUT_LIST]) decoder = (json.loads if converter.endswith("JsonConverter") else str) actual = json.dumps([decoder(x) for x in self.consumer_validator.messages_consumed[1]]) assert expected == actual, "Expected %s but saw %s in Kafka" % (expected, actual)
def __init__(self, test_context): super(ConnectStandaloneFileTest, self).__init__( test_context, num_zk=1, num_brokers=1, topics={'test': { 'partitions': 1, 'replication-factor': 1 }}) self.source = ConnectStandaloneService( test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE]) self.sink = ConnectStandaloneService( test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE]) self.consumer_validator = ConsoleConsumer(test_context, 1, self.kafka, self.TOPIC, consumer_timeout_ms=1000)
def test_skip_and_log_to_dlq(self, error_tolerance): self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk, topics=self.topics) # set config props self.override_error_tolerance_props = error_tolerance self.enable_deadletterqueue = True successful_records = [] faulty_records = [] records = [] for i in range(0, 1000): if i % 2 == 0: records.append('{"some_key":' + str(i) + '}') successful_records.append('{some_key=' + str(i) + '}') else: # badly formatted json records (missing a quote after the key) records.append('{"some_key:' + str(i) + '}') faulty_records.append('{"some_key:' + str(i) + '}') records = "\n".join(records) + "\n" successful_records = "\n".join(successful_records) + "\n" if error_tolerance == ErrorTolerance.ALL: faulty_records = ",".join(faulty_records) else: faulty_records = faulty_records[0] self.source = ConnectStandaloneService( self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE]) self.sink = ConnectStandaloneService( self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE]) self.zk.start() self.kafka.start() self.override_key_converter = "org.apache.kafka.connect.storage.StringConverter" self.override_value_converter = "org.apache.kafka.connect.storage.StringConverter" self.source.set_configs( lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-source.properties")]) self.override_key_converter = "org.apache.kafka.connect.json.JsonConverter" self.override_value_converter = "org.apache.kafka.connect.json.JsonConverter" self.override_key_converter_schemas_enable = False self.override_value_converter_schemas_enable = False self.sink.set_configs( lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-sink.properties")]) self.source.set_external_configs(lambda node: self.render( "connect-file-external.properties", node=node)) self.sink.set_external_configs(lambda node: self.render( "connect-file-external.properties", node=node)) self.source.start() self.sink.start() # Generating data on the source node should generate new records and create new output on the sink node self.source.node.account.ssh("echo -e -n " + repr(records) + " >> " + self.INPUT_FILE) if error_tolerance == ErrorTolerance.NONE: try: wait_until( lambda: self.validate_output(successful_records), timeout_sec=15, err_msg= "Clean records added to input file were not seen in the output file in a reasonable amount of time." ) raise Exception( "Expected to not find any results in this file.") except TimeoutError: self.logger.info("Caught expected exception") else: wait_until( lambda: self.validate_output(successful_records), timeout_sec=15, err_msg= "Clean records added to input file were not seen in the output file in a reasonable amount of time." ) if self.enable_deadletterqueue: self.logger.info("Reading records from deadletterqueue") consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, "my-connector-errors", consumer_timeout_ms=10000) consumer_validator.run() actual = ",".join(consumer_validator.messages_consumed[1]) assert faulty_records == actual, "Expected %s but saw %s in dead letter queue" % ( faulty_records, actual)