Example #1
0
    def from_d(d):

        # fixme
        from pbsmrtpipe.cluster import ClusterTemplateRender, ClusterTemplate

        if d['cluster']:
            tmplates = [ClusterTemplate(k, v) for k, v in d['cluster'].iteritems()]
            c = ClusterTemplateRender(tmplates)
        else:
            c = None

        task = Task.from_d(d['task'])
        return RunnableTask(task, c, d['env'])
Example #2
0
def run_task_on_cluster(runnable_task, task_manifest_path, output_dir,
                        debug_mode):
    """

    :param runnable_task:
    :param output_dir:
    :param debug_mode:
    :return:

    :type runnable_task: RunnableTask
    """
    def _to_p(x_):
        return os.path.join(output_dir, x_)

    stdout_ = _to_p('stdout')
    stderr_ = _to_p('stderr')

    if runnable_task.task.is_distributed is False:
        return run_task(runnable_task, output_dir, stdout_, stderr_,
                        debug_mode)

    if runnable_task.cluster is None:
        log.warn("No cluster provided. Running task locally.")
        return run_task(runnable_task, output_dir, stdout_, stderr_,
                        debug_mode)

    env_json = os.path.join(output_dir, 'env.json')
    IO.write_env_to_json(env_json)

    # sloppy API
    if isinstance(runnable_task.cluster, ClusterTemplateRender):
        render = runnable_task.cluster
    else:
        ctmpls = [
            ClusterTemplate(name, tmpl)
            for name, tmpl in runnable_task.cluster.iteritems()
        ]
        render = ClusterTemplateRender(ctmpls)

    job_id = to_random_job_id(runnable_task.task.task_id)
    log.debug("Using job id {i}".format(i=job_id))

    qstdout = _to_p('cluster.stdout')
    qstderr = _to_p('cluster.stderr')
    qshell = _to_p('cluster.sh')

    rcmd_shell = _to_p('run.sh')

    # Task Manifest Runner output
    stdout = _to_p('stdout')
    stderr = _to_p('stderr')
    mstdout = _to_p('mstdout')
    mstderr = _to_p('mstderr')

    with open(qstdout, 'w+') as f:
        f.write("Creating cluster stdout for Job {i} {r}\n".format(
            i=job_id, r=runnable_task))

    debug_str = " --debug "
    _d = dict(t=task_manifest_path,
              o=stdout,
              e=stderr,
              d=debug_str,
              m=mstdout,
              n=mstderr)
    cmd = "pbtools-runner run {d} --task-stderr=\"{e}\" --task-stdout=\"{o}\" \"{t}\" > \"{m}\" 2> \"{n}\"".format(
        **_d)

    with open(rcmd_shell, 'w+') as x:
        x.write(cmd + "\n")

    # Make +x
    os.chmod(rcmd_shell, os.stat(rcmd_shell).st_mode | stat.S_IEXEC)

    cluster_cmd = render.render('interactive', rcmd_shell, job_id, qstdout,
                                qstderr, runnable_task.task.nproc)
    log.debug(cluster_cmd)

    with open(qshell, 'w') as f:
        f.write("#!/bin/bash\n")
        f.write(cluster_cmd + "\n")

    os.chmod(qshell, os.stat(qshell).st_mode | stat.S_IEXEC)

    # host = socket.getfqdn()
    host = platform.node()

    # so core dumps are written to the job dir
    os.chdir(output_dir)

    rcode, cstdout, cstderr, run_time = backticks("bash {q}".format(q=qshell))

    if rcode == 0:
        err_msg = ""
        warn_msg = ""
    else:
        # not sure how to scrape this from the stderr/stdout
        err_msg = "task {i} failed".format(i=runnable_task.task.task_id)
        warn_msg = ""

    msg_ = "Completed running cluster command in {t:.2f} sec. Exit code {r}".format(
        r=rcode, t=run_time)
    log.info(msg_)

    with open(qstdout, 'a') as qf:
        qf.write(str(cstdout) + "\n")
        qf.write(msg_ + "\n")

    with open(qstderr, 'a') as f:
        if rcode != 0:
            f.write(str(cstderr) + "\n")
            f.write(msg_ + "\n")

    r = to_task_report(host, runnable_task.task.task_id, run_time, rcode,
                       err_msg, warn_msg)
    task_report_path = os.path.join(output_dir, 'task-report.json')
    msg = "Writing task id {i} task report to {r}".format(
        r=task_report_path, i=runnable_task.task.task_id)
    log.info(msg)
    r.write_json(task_report_path)

    return rcode, run_time
Example #3
0
def run_task_on_cluster(runnable_task, task_manifest_path, output_dir, debug_mode):
    """

    :param runnable_task:
    :param output_dir:
    :param debug_mode:
    :return:

    :type runnable_task: RunnableTask
    """
    def _to_p(x_):
        return os.path.join(output_dir, x_)

    stdout_ = _to_p('stdout')
    stderr_ = _to_p('stderr')

    if runnable_task.task.is_distributed is False:
        return run_task(runnable_task, output_dir, stdout_, stderr_, debug_mode)

    if runnable_task.cluster is None:
        log.warn("No cluster provided. Running task locally.")
        return run_task(runnable_task, output_dir, stdout_, stderr_, debug_mode)

    os.chdir(runnable_task.task.output_dir)
    env_json = os.path.join(output_dir, '.cluster-env.json')
    IO.write_env_to_json(env_json)

    # sloppy API
    if isinstance(runnable_task.cluster, ClusterTemplateRender):
        render = runnable_task.cluster
    else:
        ctmpls = [ClusterTemplate(name, tmpl) for name, tmpl in runnable_task.cluster.iteritems()]
        render = ClusterTemplateRender(ctmpls)

    job_id = to_random_job_id(runnable_task.task.task_id)
    log.debug("Using job id {i}".format(i=job_id))

    qstdout = _to_p('cluster.stdout')
    qstderr = _to_p('cluster.stderr')
    qshell = _to_p('cluster.sh')

    rcmd_shell = _to_p('run.sh')

    # This needs to be flattened due to the new RTC layer
    # Task Manifest Runner output
    stdout = _to_p('stdout')
    stderr = _to_p('stderr')

    with open(qstdout, 'w+') as f:
        f.write("Creating cluster stdout for Job {i} {r}\n".format(i=job_id, r=runnable_task))

    debug_str = " --debug "
    exe = _resolve_exe("pbtools-runner")
    _d = dict(x=exe,
              t=task_manifest_path,
              o=stdout,
              e=stderr,
              d=debug_str,
              m=stdout,
              n=stderr,
              r=output_dir)

    # the quoting here is explicitly to handle spaces in paths
    cmd = "{x} run {d} --output-dir=\"{r}\" --task-stderr=\"{e}\" --task-stdout=\"{o}\" \"{t}\" > \"{m}\" 2> \"{n}\"".format(**_d)

    # write the pbtools-runner exe command
    with open(rcmd_shell, 'w+') as x:
        x.write(cmd + "\n")

    chmod_x(rcmd_shell)

    cluster_cmd = render.render(ClusterConstants.START, rcmd_shell, job_id, qstdout, qstderr, runnable_task.task.nproc)
    log.info("Job submission command: " + cluster_cmd)

    with open(qshell, 'w') as f:
        f.write("#!/bin/bash\n")
        f.write("set -o errexit\n")
        f.write("set -o pipefail\n")
        f.write("set -o nounset\n")
        f.write(cluster_cmd.rstrip("\n") + " ${1+\"$@\"}\n")
        f.write("exit $?")

    chmod_x(qshell)

    host = platform.node()

    # so core dumps are written to the job dir
    os.chdir(output_dir)

    # print the underlying jms command if using runjmscmd
    if re.search(r'/runjmscmd\b', cluster_cmd):
        rcode, cstdout, cstderr, run_time = backticks("bash {q} --printcmd".format(q=qshell))
        if rcode == 0:
            log.info("Underlying JMS job submission command: " + "\n".join(cstdout))

    # Blocking call
    rcode, cstdout, cstderr, run_time = backticks("bash {q}".format(q=qshell))

    log.info("Cluster command return code {r} in {s:.2f} sec".format(r=rcode, s=run_time))

    msg_t = "{n} Completed running cluster command in {t:.2f} sec. Exit code {r} (task-type {i})"
    msg_ = msg_t.format(r=rcode, t=run_time, i=runnable_task.task.task_type_id, n=datetime.datetime.now())
    log.info(msg_)

    # Append the bash cluster.sh stderr and stdout call to
    # the cluster.stderr and cluster.stdout
    with open(qstdout, 'a') as qf:
        if cstdout:
            qf.write("\n".join(cstdout) + "\n")
        qf.write(msg_ + "\n")

    with open(qstderr, 'a') as f:
        if rcode != 0:
            if cstderr:
                f.write(str(cstderr) + "\n")

    # fundamental output error str message of this func
    err_msg = ""
    warn_msg = ""

    if rcode != 0:
        p_err_msg = "task {i} failed (exit-code {x}) after {r:.2f} sec".format(i=runnable_task.task.task_id, r=run_time, x=rcode)
        raw_stderr = _extract_last_nlines(stderr)
        cluster_raw_stderr = _extract_last_nlines(qstderr)
        err_msg = "\n".join([p_err_msg, raw_stderr, cluster_raw_stderr])
        warn_msg = ""

    # write the result status message to stderr if task failure
    # doing this here to avoid having a double message
    with open(qstderr, 'a') as f:
        if rcode != 0:
            if cstderr:
                f.write(msg_ + "\n")

    r = to_task_report(host, runnable_task.task.task_id, run_time, rcode, err_msg, warn_msg)
    task_report_path = os.path.join(output_dir, 'task-report.json')
    msg = "Writing task id {i} task report to {r}".format(r=task_report_path, i=runnable_task.task.task_id)
    log.info(msg)
    r.write_json(task_report_path)

    return rcode, err_msg, run_time
Example #4
0
 def test_cluster_template_bad_template_type(self):
     with self.assertRaises(ValueError) as e:
         t = ClusterTemplate('BAD TYPE', 'qdel ${JOB_ID}')
         log.error(e)
Example #5
0
 def test_cluster_template_str(self):
     t = ClusterTemplate('kill', 'qdel ${JOB_ID}')
     self.assertIsNotNone(str(t))
     self.assertIsNotNone(repr(t))
Example #6
0
def run_task_on_cluster(runnable_task, task_manifest_path, output_dir, debug_mode):
    """

    :param runnable_task:
    :param output_dir:
    :param debug_mode:
    :return:

    :type runnable_task: RunnableTask
    """
    def _to_p(x_):
        return os.path.join(output_dir, x_)

    stdout_ = _to_p('stdout')
    stderr_ = _to_p('stderr')

    if runnable_task.task.is_distributed is False:
        return run_task(runnable_task, output_dir, stdout_, stderr_, debug_mode)

    if runnable_task.cluster is None:
        log.warn("No cluster provided. Running task locally.")
        return run_task(runnable_task, output_dir, stdout_, stderr_, debug_mode)

    os.chdir(runnable_task.task.output_dir)
    env_json = os.path.join(output_dir, '.cluster-env.json')
    IO.write_env_to_json(env_json)

    # sloppy API
    if isinstance(runnable_task.cluster, ClusterTemplateRender):
        render = runnable_task.cluster
    else:
        ctmpls = [ClusterTemplate(name, tmpl) for name, tmpl in runnable_task.cluster.iteritems()]
        render = ClusterTemplateRender(ctmpls)

    job_id = to_random_job_id(runnable_task.task.task_id)
    log.debug("Using job id {i}".format(i=job_id))

    qstdout = _to_p('cluster.stdout')
    qstderr = _to_p('cluster.stderr')
    qshell = _to_p('cluster.sh')

    rcmd_shell = _to_p('run.sh')

    # This needs to be flattened due to the new RTC layer
    # Task Manifest Runner output
    stdout = _to_p('stdout')
    stderr = _to_p('stderr')
    mstdout = _to_p('mstdout')
    mstderr = _to_p('mstderr')

    with open(qstdout, 'w+') as f:
        f.write("Creating cluster stdout for Job {i} {r}\n".format(i=job_id, r=runnable_task))

    debug_str = " --debug "
    exe = _resolve_exe("pbtools-runner")
    _d = dict(x=exe,
              t=task_manifest_path,
              o=stdout,
              e=stderr,
              d=debug_str,
              m=stdout,
              n=stderr,
              r=output_dir)

    cmd = "{x} run {d} --output-dir=\"{r}\" --task-stderr=\"{e}\" --task-stdout=\"{o}\" \"{t}\" > \"{m}\" 2> \"{n}\"".format(**_d)

    with open(rcmd_shell, 'w+') as x:
        x.write(cmd + "\n")

    # Make +x
    os.chmod(rcmd_shell, os.stat(rcmd_shell).st_mode | stat.S_IEXEC)

    cluster_cmd = render.render(ClusterConstants.START, rcmd_shell, job_id, qstdout, qstderr, runnable_task.task.nproc)
    log.debug(cluster_cmd)

    with open(qshell, 'w') as f:
        f.write("#!/bin/bash\n")
        f.write("set -o errexit\n")
        f.write("set -o pipefail\n")
        f.write("set -o nounset\n")
        f.write(cluster_cmd + "\n")
        f.write("exit $?")

    os.chmod(qshell, os.stat(qshell).st_mode | stat.S_IEXEC)

    # host = socket.getfqdn()
    host = platform.node()

    # so core dumps are written to the job dir
    os.chdir(output_dir)

    rcode, cstdout, cstderr, run_time = backticks("bash {q}".format(q=qshell))

    log.info("Cluster command return code {r} in {s:.2f} sec".format(r=rcode, s=run_time))

    if rcode == 0:
        err_msg = ""
        warn_msg = ""
    else:
        p_err_msg = "task {i} failed (exit-code {x}) after {r:.2f} sec".format(i=runnable_task.task.task_id, r=run_time, x=rcode)
        raw_stderr = _extract_last_nlines(stderr)
        cluster_raw_stderr = _extract_last_nlines(qstderr)
        err_msg = "\n".join([p_err_msg, raw_stderr, cluster_raw_stderr])
        warn_msg = ""

    msg_ = "Completed running cluster command in {t:.2f} sec. Exit code {r} (task-type {i})".format(r=rcode, t=run_time, i=runnable_task.task.task_type_id)
    log.info(msg_)

    with open(qstdout, 'a') as qf:
        qf.write(str(cstdout) + "\n")
        qf.write(msg_ + "\n")

    with open(qstderr, 'a') as f:
        if rcode != 0:
            f.write(str(cstderr) + "\n")
            f.write(msg_ + "\n")

    r = to_task_report(host, runnable_task.task.task_id, run_time, rcode, err_msg, warn_msg)
    task_report_path = os.path.join(output_dir, 'task-report.json')
    msg = "Writing task id {i} task report to {r}".format(r=task_report_path, i=runnable_task.task.task_id)
    log.info(msg)
    r.write_json(task_report_path)

    return rcode, err_msg, run_time
Example #7
0
 def test_cluster_template_str(self):
     t = ClusterTemplate(ClusterConstants.STOP, 'qdel ${JOB_ID}')
     self.assertIsNotNone(str(t))
     self.assertIsNotNone(repr(t))