Beispiel #1
0
def test_coarse_mode(options_proxy):
    opts = spark(options_proxy)()
    assert opts.get('spark.mesos.coarse') == 'False'

    opts = spark(options_proxy, coarse=10)()
    assert opts.get('spark.mesos.coarse') == 'True'
    assert opts.get('spark.cores.max') == '10'
Beispiel #2
0
def test_custom_options(options_proxy):
    custom_opts = {'driver_maxResultSize': '2g', 'shuffle_compress': False}
    opts = spark(options_proxy, **custom_opts)()
    assert opts['spark.driver.maxResultSize'] == '2g'
    assert opts['spark.shuffle.compress'] == 'False'
Beispiel #3
0
def test_executor_envs(options_proxy):
    envs = {'TEST_VARIABLE': 'test value', 'TEST_ENV_VAR': 'test env value'}
    opts = spark(options_proxy, envs=envs)()
    for k, v in envs.items():
        assert opts.get('spark.executorEnv.{}'.format(k)) == v
Beispiel #4
0
def test_docker_executor(options_proxy):
    opts = spark(options_proxy)()
    assert opts.get('spark.mesos.executor.docker.image') == 'lensa/epos'
    opts = spark(options_proxy, docker='testimage')()
    assert opts.get('spark.mesos.executor.docker.image') == 'testimage'
Beispiel #5
0
def test_app_name(options_proxy):
    opts = spark(options_proxy)()
    assert opts.get('spark.app.name') == 'test_job'

    opts = spark(options_proxy, name='test_name')()
    assert opts.get('spark.app.name') == 'test_name'
Beispiel #6
0
def test_argument_injection():
    sc, sql = spark(lambda sc, sql: (sc, sql))()

    assert isinstance(sc, SparkContext)
    assert isinstance(sql, SQLContext)
Beispiel #7
0
def curried_sum():
    def job(sc, sql, lst):
        rdd = sc.parallelize(lst)
        return rdd.sum()

    return spark(job)