Example #1
0
def test_terasort():
    if utils.hdfs_enabled():
        _delete_hdfs_terasort_files()
        _run_teragen()
        _run_terasort()
        _run_teravalidate()
Example #2
0
def feed_sample_data(jar_uri, kafka_brokers, topic, common_args, messages):
    producer_args = ["--class", "KerberizedKafkaProducer"] + common_args
    producer_id = utils.submit_job(app_url=jar_uri,
                                   app_args="{} {} {} {}".format(
                                       "kafka", kafka_brokers, topic,
                                       ' '.join(messages)),
                                   service_name=utils.SPARK_SERVICE_NAME,
                                   args=producer_args)

    # validating producer output
    utils.check_job_output(producer_id,
                           "{} messages sent to Kafka".format(len(messages)))


@sdk_utils.dcos_ee_only
@pytest.mark.skipif(not utils.hdfs_enabled(), reason='HDFS_ENABLED is false')
@pytest.mark.sanity
def test_structured_streaming_recovery(kerberized_spark, kerberized_kafka):
    kafka_brokers = ','.join(
        sdk_cmd.svc_cli(KAFKA_PACKAGE_NAME,
                        KAFKA_SERVICE_NAME,
                        'endpoints broker',
                        json=True)['dns'])
    LOGGER.info("Kafka brokers: {}".format(kafka_brokers))

    _uri = upload_jaas()
    uris = "spark.mesos.uris={}".format(_uri)

    jar_uri = utils.upload_dcos_test_jar()

    kafka_kerberos_args = get_kerberized_kafka_spark_conf(