def main(ns): """ A generic plugin that uses Spark to: read data, transform data with given code, and maybe write transformed data Assume code is written in Python. For Scala or R code, use another option. """ job_id = ns.job_id module = get_pymodule(ns.app_name) pjob_id = api.parse_job_id(ns.app_name, job_id) log_details = dict( module_name=module.__name__, app_name=ns.app_name, job_id=job_id) conf, osenv, files, pyFiles = pyspark_context.get_spark_conf(ns.app_name) conf['spark.app.name'] = "%s__%s" % (conf['spark.app.name'], job_id) conf.update(ns.spark_conf) osenv.update(ns.spark_env) sc = pyspark_context.get_spark_context( conf=conf, osenv=osenv, files=files, pyFiles=pyFiles) apply_data_transform( ns=ns, sc=sc, log_details=log_details, pjob_id=pjob_id, module=module) sc.stop()
def main(ns): """ A generic plugin that schedules arbitrary bash jobs using Stolos Assume code is written in Python. For Scala or R code, use another option. """ job_id = ns.job_id ld = dict(app_name=ns.app_name, job_id=ns.job_id) log.info('Running bash job', extra=ld) cmd = get_bash_cmd(ns.app_name) if ns.bash_cmd: cmd += ' '.join(ns.bash_cmd) log.debug("Appending user-supplied bash options to defaults", extra=dict(app_name=ns.app_name, job_id=job_id, cmd=cmd)) ld.update(cmd=cmd) if not cmd: raise UserWarning( "You need to specify bash options or configure default bash" " options") _cmdargs = dict(**ns.__dict__) _cmdargs.update(api.parse_job_id(ns.app_name, job_id)) cmd = cmd.format(**_cmdargs) if ns.redirect_to_stderr: _std = sys.stderr else: _std = PIPE log.info('running command', extra=ld) returncode, stdout, stderr = run(cmd, shell=True, timeout=ns.watch, stdout=_std, stderr=_std) ld = dict(bash_returncode=returncode, stdout=stdout, stderr=stderr, **ld) if returncode == -9: log_and_raise("Bash job timed out", ld) elif returncode != 0: # this raises an error and logs output: log_and_raise("Bash job failed", ld) else: log.info("Bash job succeeded", extra=ld)
def main(ns): """ A generic plugin that schedules arbitrary bash jobs using Stolos Assume code is written in Python. For Scala or R code, use another option. """ job_id = ns.job_id ld = dict(app_name=ns.app_name, job_id=ns.job_id) log.info('Running bash job', extra=ld) cmd = get_bash_cmd(ns.app_name) if ns.bash_cmd: cmd += ' '.join(ns.bash_cmd) log.debug( "Appending user-supplied bash options to defaults", extra=dict( app_name=ns.app_name, job_id=job_id, cmd=cmd)) ld.update(cmd=cmd) if not cmd: raise UserWarning( "You need to specify bash options or configure default bash" " options") _cmdargs = dict(**ns.__dict__) _cmdargs.update(api.parse_job_id(ns.app_name, job_id)) cmd = cmd.format(**_cmdargs) if ns.redirect_to_stderr: _std = sys.stderr else: _std = PIPE log.info('running command', extra=ld) returncode, stdout, stderr = run( cmd, shell=True, timeout=ns.watch, stdout=_std, stderr=_std) ld = dict(bash_returncode=returncode, stdout=stdout, stderr=stderr, **ld) if returncode == -9: log_and_raise("Bash job timed out", ld) elif returncode != 0: # this raises an error and logs output: log_and_raise("Bash job failed", ld) else: log.info("Bash job succeeded", extra=ld)