Ejemplo n.º 1
0
def main(ns):
    """
    A generic plugin that uses Spark to:
        read data,
        transform data with given code,
        and maybe write transformed data

    Assume code is written in Python.  For Scala or R code, use another option.
    """
    job_id = ns.job_id
    module = get_pymodule(ns.app_name)

    pjob_id = api.parse_job_id(ns.app_name, job_id)
    log_details = dict(
        module_name=module.__name__,
        app_name=ns.app_name, job_id=job_id)

    conf, osenv, files, pyFiles = pyspark_context.get_spark_conf(ns.app_name)
    conf['spark.app.name'] = "%s__%s" % (conf['spark.app.name'], job_id)
    conf.update(ns.spark_conf)
    osenv.update(ns.spark_env)
    sc = pyspark_context.get_spark_context(
        conf=conf, osenv=osenv, files=files, pyFiles=pyFiles)
    apply_data_transform(
        ns=ns, sc=sc, log_details=log_details, pjob_id=pjob_id, module=module)
    sc.stop()
Ejemplo n.º 2
0
def main(ns):
    """
    A generic plugin that schedules arbitrary bash jobs using Stolos

    Assume code is written in Python.  For Scala or R code, use another option.
    """
    job_id = ns.job_id
    ld = dict(app_name=ns.app_name, job_id=ns.job_id)
    log.info('Running bash job', extra=ld)
    cmd = get_bash_cmd(ns.app_name)
    if ns.bash_cmd:
        cmd += ' '.join(ns.bash_cmd)
        log.debug("Appending user-supplied bash options to defaults",
                  extra=dict(app_name=ns.app_name, job_id=job_id, cmd=cmd))
    ld.update(cmd=cmd)
    if not cmd:
        raise UserWarning(
            "You need to specify bash options or configure default bash"
            " options")

    _cmdargs = dict(**ns.__dict__)
    _cmdargs.update(api.parse_job_id(ns.app_name, job_id))
    cmd = cmd.format(**_cmdargs)

    if ns.redirect_to_stderr:
        _std = sys.stderr
    else:
        _std = PIPE

    log.info('running command', extra=ld)
    returncode, stdout, stderr = run(cmd,
                                     shell=True,
                                     timeout=ns.watch,
                                     stdout=_std,
                                     stderr=_std)
    ld = dict(bash_returncode=returncode, stdout=stdout, stderr=stderr, **ld)
    if returncode == -9:
        log_and_raise("Bash job timed out", ld)
    elif returncode != 0:
        # this raises an error and logs output:
        log_and_raise("Bash job failed", ld)
    else:
        log.info("Bash job succeeded", extra=ld)
Ejemplo n.º 3
0
def main(ns):
    """
    A generic plugin that schedules arbitrary bash jobs using Stolos

    Assume code is written in Python.  For Scala or R code, use another option.
    """
    job_id = ns.job_id
    ld = dict(app_name=ns.app_name, job_id=ns.job_id)
    log.info('Running bash job', extra=ld)
    cmd = get_bash_cmd(ns.app_name)
    if ns.bash_cmd:
        cmd += ' '.join(ns.bash_cmd)
        log.debug(
            "Appending user-supplied bash options to defaults", extra=dict(
                app_name=ns.app_name, job_id=job_id, cmd=cmd))
    ld.update(cmd=cmd)
    if not cmd:
        raise UserWarning(
            "You need to specify bash options or configure default bash"
            " options")

    _cmdargs = dict(**ns.__dict__)
    _cmdargs.update(api.parse_job_id(ns.app_name, job_id))
    cmd = cmd.format(**_cmdargs)

    if ns.redirect_to_stderr:
        _std = sys.stderr
    else:
        _std = PIPE

    log.info('running command', extra=ld)
    returncode, stdout, stderr = run(
        cmd, shell=True, timeout=ns.watch, stdout=_std, stderr=_std)
    ld = dict(bash_returncode=returncode, stdout=stdout, stderr=stderr, **ld)
    if returncode == -9:
        log_and_raise("Bash job timed out", ld)
    elif returncode != 0:
        # this raises an error and logs output:
        log_and_raise("Bash job failed", ld)
    else:
        log.info("Bash job succeeded", extra=ld)