def test_terasort(): if utils.hdfs_enabled(): _delete_hdfs_terasort_files() _run_teragen() _run_terasort() _run_teravalidate()
def feed_sample_data(jar_uri, kafka_brokers, topic, common_args, messages): producer_args = ["--class", "KerberizedKafkaProducer"] + common_args producer_id = utils.submit_job(app_url=jar_uri, app_args="{} {} {} {}".format( "kafka", kafka_brokers, topic, ' '.join(messages)), service_name=utils.SPARK_SERVICE_NAME, args=producer_args) # validating producer output utils.check_job_output(producer_id, "{} messages sent to Kafka".format(len(messages))) @sdk_utils.dcos_ee_only @pytest.mark.skipif(not utils.hdfs_enabled(), reason='HDFS_ENABLED is false') @pytest.mark.sanity def test_structured_streaming_recovery(kerberized_spark, kerberized_kafka): kafka_brokers = ','.join( sdk_cmd.svc_cli(KAFKA_PACKAGE_NAME, KAFKA_SERVICE_NAME, 'endpoints broker', json=True)['dns']) LOGGER.info("Kafka brokers: {}".format(kafka_brokers)) _uri = upload_jaas() uris = "spark.mesos.uris={}".format(_uri) jar_uri = utils.upload_dcos_test_jar() kafka_kerberos_args = get_kerberized_kafka_spark_conf(