def get_spark_conf(app_name): """Query Stolos's dag graph for all information necessary to create a pyspark.SparkContext""" dg = api.get_tasks_config() _conf = dg[app_name].get('spark_conf', {}) validate_spark_conf(app_name, _conf) conf = dict(**_conf) conf['spark.app.name'] = app_name osenv = {k: os.environ[k] for k in dg[app_name].get('env_from_os', [])} _env = dg[app_name].get('env', {}) validate_env(app_name, _env) osenv.update(_env) pyFiles = dg[app_name].get('uris', []) validate_uris(app_name, pyFiles) files = [] # for now, we're ignoring files. return conf, osenv, files, pyFiles
def get_bash_cmd(app_name): """Lookup the bash command-line options for a bash task If they don't exist, return empty string""" dg = api.get_tasks_config() meta = dg[app_name] job_type = meta.get('job_type', 'bash') try: assert job_type == 'bash' except AssertionError: log.error( "App is not a bash job", extra=dict( app_name=app_name, job_type=job_type)) rv = meta.get('bash_cmd', '') if not isinstance(rv, six.string_types): log_and_raise( "App config for bash plugin is misconfigured:" " bash_cmd is not a string", dict(app_name=app_name)) return rv
def get_pymodule(app_name): dg = api.get_tasks_config() module_name = dg[app_name]['pymodule'] return importlib.import_module(module_name)