Exemple #1
0
def get_spark_conf(app_name):
    """Query Stolos's dag graph for all information necessary to
    create a pyspark.SparkContext"""
    dg = api.get_tasks_config()
    _conf = dg[app_name].get('spark_conf', {})
    validate_spark_conf(app_name, _conf)
    conf = dict(**_conf)
    conf['spark.app.name'] = app_name
    osenv = {k: os.environ[k] for k in dg[app_name].get('env_from_os', [])}
    _env = dg[app_name].get('env', {})
    validate_env(app_name, _env)
    osenv.update(_env)
    pyFiles = dg[app_name].get('uris', [])
    validate_uris(app_name, pyFiles)
    files = []  # for now, we're ignoring files.
    return conf, osenv, files, pyFiles
Exemple #2
0
def get_bash_cmd(app_name):
    """Lookup the bash command-line options for a bash task
    If they don't exist, return empty string"""
    dg = api.get_tasks_config()
    meta = dg[app_name]
    job_type = meta.get('job_type', 'bash')
    try:
        assert job_type == 'bash'
    except AssertionError:
        log.error(
            "App is not a bash job", extra=dict(
                app_name=app_name, job_type=job_type))
    rv = meta.get('bash_cmd', '')
    if not isinstance(rv, six.string_types):
        log_and_raise(
            "App config for bash plugin is misconfigured:"
            " bash_cmd is not a string", dict(app_name=app_name))
    return rv
Exemple #3
0
def get_pymodule(app_name):
    dg = api.get_tasks_config()
    module_name = dg[app_name]['pymodule']
    return importlib.import_module(module_name)