Example #1
0
def job_resource_create_and_setup_logs(job_root_dir, bg, task_opts, workflow_level_opts, ep_d):
    """
    Create job resource dirs and setup log handlers

    :type job_root_dir: str
    :type bg: BindingsGraph
    :type task_opts: dict
    :type workflow_level_opts: WorkflowLevelOptions
    :type ep_d: dict
    """

    job_resources = to_job_resources_and_create_dirs(job_root_dir)

    pb_log_path = os.path.join(job_resources.logs, 'pbsmrtpipe.log')
    master_log_path = os.path.join(job_resources.logs, "master.log")
    master_log_level = logging.INFO
    stdout_level = logging.INFO
    if workflow_level_opts.debug_mode:
        master_log_level = logging.DEBUG
        stdout_level = logging.DEBUG

    setup_internal_logs(master_log_path, master_log_level, pb_log_path, stdout_level)

    log.info("Starting pbsmrtpipe v{v}".format(v=pbsmrtpipe.get_version()))
    log.info("\n" + _log_pbsmrptipe_header())

    BU.write_binding_graph_images(bg, job_resources.workflow)

    write_entry_points_json(job_resources.entry_points_json, ep_d)

    # Need to map entry points to a FileType and store in the DataStore? or
    # does DataStore only represent outputs?
    smrtpipe_log_df = DataStoreFile(str(uuid.uuid4()), "pbsmrtpipe::pbsmrtpipe.log", FileTypes.LOG.file_type_id, pb_log_path)
    master_log_df = DataStoreFile(str(uuid.uuid4()), "pbsmrtpipe::master.log", FileTypes.LOG.file_type_id, master_log_path)
    ds = write_and_initialize_data_store_json(job_resources.datastore_json, [smrtpipe_log_df, master_log_df])
    slog.info("successfully initialized datastore.")

    write_workflow_settings(workflow_level_opts, os.path.join(job_resources.workflow, 'options-workflow.json'))
    log.info("Workflow Options:")
    log.info(pprint.pformat(workflow_level_opts.to_dict(), indent=4))

    task_opts_path = os.path.join(job_resources.workflow, 'options-task.json')
    with open(task_opts_path, 'w') as f:
        f.write(json.dumps(task_opts, sort_keys=True, indent=4))

    env_path = os.path.join(job_resources.workflow, '.env.json')
    IO.write_env_to_json(env_path)

    try:
        sa_system, sa_components = IO.get_smrtanalysis_system_and_components_from_env()
        log.info(sa_system)
        for c in sa_components:
            log.info(c)
    except Exception:
        # black hole exception
        log.warn("unable to determine SMRT Analysis version.")
        pass

    slog.info("completed setting up job directory resources and logs in {r}".format(r=job_root_dir))
    return job_resources, ds
Example #2
0
def job_resource_create_and_setup_logs(job_root_dir, bg, task_opts, workflow_level_opts, ep_d):
    """
    Create job resource dirs and setup log handlers

    :type job_root_dir: str
    :type bg: BindingsGraph
    :type task_opts: dict
    :type workflow_level_opts: WorkflowLevelOptions
    :type ep_d: dict
    """

    job_resources = to_job_resources_and_create_dirs(job_root_dir)

    pb_log_path = os.path.join(job_resources.logs, 'pbsmrtpipe.log')
    master_log_path = os.path.join(job_resources.logs, "master.log")
    master_log_level = logging.INFO
    stdout_level = logging.INFO
    if workflow_level_opts.debug_mode:
        master_log_level = logging.DEBUG
        stdout_level = logging.DEBUG

    setup_internal_logs(master_log_path, master_log_level, pb_log_path, stdout_level)

    log.info("Starting pbsmrtpipe v{v}".format(v=pbsmrtpipe.get_version()))
    log.info("\n" + _log_pbsmrptipe_header())

    BU.write_binding_graph_images(bg, job_resources.workflow)

    write_entry_points_json(job_resources.entry_points_json, ep_d)

    # Need to map entry points to a FileType
    ds = write_and_initialize_data_store_json(job_resources.datastore_json, [])
    slog.info("successfully initialized datastore.")

    write_workflow_settings(workflow_level_opts, os.path.join(job_resources.workflow, 'options-workflow.json'))
    log.info("Workflow Options:")
    log.info(pprint.pformat(workflow_level_opts.to_dict(), indent=4))

    task_opts_path = os.path.join(job_resources.workflow, 'options-task.json')
    with open(task_opts_path, 'w') as f:
        f.write(json.dumps(task_opts, sort_keys=True, indent=4))

    env_path = os.path.join(job_resources.workflow, '.env.json')
    IO.write_env_to_json(env_path)

    try:
        sa_system, sa_components = IO.get_smrtanalysis_system_and_components_from_env()
        log.info(sa_system)
        for c in sa_components:
            log.info(c)
    except Exception:
        # black hole exception
        log.warn("unable to determine SMRT Analysis version.")
        pass

    slog.info("completed setting up job directory resources and logs in {r}".format(r=job_root_dir))
    return job_resources, ds
Example #3
0
def run_task_on_cluster(runnable_task, task_manifest_path, output_dir, debug_mode):
    """

    :param runnable_task:
    :param output_dir:
    :param debug_mode:
    :return:

    :type runnable_task: RunnableTask
    """
    def _to_p(x_):
        return os.path.join(output_dir, x_)

    stdout_ = _to_p('stdout')
    stderr_ = _to_p('stderr')

    if runnable_task.task.is_distributed is False:
        return run_task(runnable_task, output_dir, stdout_, stderr_, debug_mode)

    if runnable_task.cluster is None:
        log.warn("No cluster provided. Running task locally.")
        return run_task(runnable_task, output_dir, stdout_, stderr_, debug_mode)

    os.chdir(runnable_task.task.output_dir)
    env_json = os.path.join(output_dir, '.cluster-env.json')
    IO.write_env_to_json(env_json)

    # sloppy API
    if isinstance(runnable_task.cluster, ClusterTemplateRender):
        render = runnable_task.cluster
    else:
        ctmpls = [ClusterTemplate(name, tmpl) for name, tmpl in runnable_task.cluster.iteritems()]
        render = ClusterTemplateRender(ctmpls)

    job_id = to_random_job_id(runnable_task.task.task_id)
    log.debug("Using job id {i}".format(i=job_id))

    qstdout = _to_p('cluster.stdout')
    qstderr = _to_p('cluster.stderr')
    qshell = _to_p('cluster.sh')

    rcmd_shell = _to_p('run.sh')

    # This needs to be flattened due to the new RTC layer
    # Task Manifest Runner output
    stdout = _to_p('stdout')
    stderr = _to_p('stderr')
    mstdout = _to_p('mstdout')
    mstderr = _to_p('mstderr')

    with open(qstdout, 'w+') as f:
        f.write("Creating cluster stdout for Job {i} {r}\n".format(i=job_id, r=runnable_task))

    debug_str = " --debug "
    exe = _resolve_exe("pbtools-runner")
    _d = dict(x=exe,
              t=task_manifest_path,
              o=stdout,
              e=stderr,
              d=debug_str,
              m=stdout,
              n=stderr,
              r=output_dir)

    cmd = "{x} run {d} --output-dir=\"{r}\" --task-stderr=\"{e}\" --task-stdout=\"{o}\" \"{t}\" > \"{m}\" 2> \"{n}\"".format(**_d)

    with open(rcmd_shell, 'w+') as x:
        x.write(cmd + "\n")

    # Make +x
    os.chmod(rcmd_shell, os.stat(rcmd_shell).st_mode | stat.S_IEXEC)

    cluster_cmd = render.render(ClusterConstants.START, rcmd_shell, job_id, qstdout, qstderr, runnable_task.task.nproc)
    log.debug(cluster_cmd)

    with open(qshell, 'w') as f:
        f.write("#!/bin/bash\n")
        f.write(cluster_cmd + "\n")

    os.chmod(qshell, os.stat(qshell).st_mode | stat.S_IEXEC)

    # host = socket.getfqdn()
    host = platform.node()

    # so core dumps are written to the job dir
    os.chdir(output_dir)

    rcode, cstdout, cstderr, run_time = backticks("bash {q}".format(q=qshell))

    if rcode == 0:
        err_msg = ""
        warn_msg = ""
    else:
        p_err_msg = "task {i} failed (exit-code {x}) after {r:.2f} sec".format(i=runnable_task.task.task_id, r=run_time, x=rcode)
        raw_stderr = _extract_last_nlines(stderr)
        cluster_raw_stderr = _extract_last_nlines(cstderr)
        err_msg = "\n".join([p_err_msg, raw_stderr, cluster_raw_stderr])
        warn_msg = ""

    msg_ = "Completed running cluster command in {t:.2f} sec. Exit code {r}".format(r=rcode, t=run_time)
    log.info(msg_)

    with open(qstdout, 'a') as qf:
        qf.write(str(cstdout) + "\n")
        qf.write(msg_ + "\n")

    with open(qstderr, 'a') as f:
        if rcode != 0:
            f.write(str(cstderr) + "\n")
            f.write(msg_ + "\n")

    r = to_task_report(host, runnable_task.task.task_id, run_time, rcode, err_msg, warn_msg)
    task_report_path = os.path.join(output_dir, 'task-report.json')
    msg = "Writing task id {i} task report to {r}".format(r=task_report_path, i=runnable_task.task.task_id)
    log.info(msg)
    r.write_json(task_report_path)

    return rcode, run_time
Example #4
0
def run_task(runnable_task, output_dir, task_stdout, task_stderr, debug_mode):
    """
    Run a runnable task locally.

    :param runnable_task:
    :type runnable_task: RunnableTask

    :return: exit code, run_time
    :rtype: (int, int)
    """
    started_at = time.time()

    rcode = -1
    err_msg = ""
    # host = socket.getfqdn()
    host = platform.node()

    ncmds = len(runnable_task.task.cmds)

    # so core dumps are written to the job dir
    os.chdir(output_dir)

    env_json = os.path.join(output_dir, '.env.json')

    IO.write_env_to_json(env_json)

    with open(task_stdout, 'w') as stdout_fh:
        with open(task_stderr, 'w') as stderr_fh:
            stdout_fh.write(repr(runnable_task) + "\n")
            stdout_fh.write("Created at {x} on {h}\n".format(x=datetime.datetime.now(), h=host))
            stderr_fh.write("Running task in {o}\n".format(o=output_dir))

            # Validate Inputs
            for input_file in runnable_task.task.input_files:
                if os.path.exists(input_file):
                    stdout_fh.write("Validated INPUT file '{i}\n".format(i=input_file))
                else:
                    err_msg = "Unable to find INPUT file '{i}".format(i=input_file)
                    stderr_fh.write(err_msg + "\n")
                    sys.stderr.write(err_msg + "\n")
                    break

            # Create resources if necessary
            #if runnable_task.task.resources:
            #    create_tmp_resources_ignore_error(runnable_task.task.resources)

            # Run commands
            for i, cmd in enumerate(runnable_task.task.cmds):
                log.debug("Running command \n" + cmd)

                rcode, out, error, run_time = run_command(cmd, stdout_fh, stderr_fh, time_out=None)

                if rcode != 0:
                    err_msg_ = "Failed task {i} exit code {r} in {s:.2f} sec".format(i=runnable_task.task.task_id, r=rcode, s=run_time)
                    t_error_msg = _extract_last_nlines(task_stderr)
                    err_msg = "\n".join([err_msg_, t_error_msg])
                    log.error(err_msg)
                    log.error(error)

                    stderr_fh.write(str(error) + "\n")
                    sys.stderr.write(str(error) + "\n")

                    stderr_fh.write(err_msg + "\n")
                    sys.stderr.write(err_msg + "\n")
                    break
                else:
                    stdout_fh.write("completed running cmd {i} of {n}. exit code {x} in {s:.2f} sec on host {h}\n".format(x=rcode, s=run_time, h=host, i=i + 1, n=ncmds))

            smsg_ = "completed running commands. Exit code {i}".format(i=rcode)
            log.debug(smsg_)

            if rcode == 0:
                # Validate output files of a successful task.
                for ix, output_file in enumerate(runnable_task.task.output_files):
                    if os.path.exists(output_file):
                        stdout_fh.write("Successfully validated {i} output file '{o}' on {h} \n".format(o=output_file, i=ix, h=host))
                    else:
                        err_msg = "Unable to find {i} output file '{x}'. Marking task as failed.".format(x=output_file, i=ix)
                        stderr_fh.write(err_msg + "\n")
                        stdout_fh.write(err_msg + "\n")
                        sys.stderr.write(err_msg + "\n")
                        rcode = -1

            total_run_time = time.time() - started_at
            # FIXME. There should be a better way to communicate warnings
            warn_msg = ""
            r = to_task_report(host, runnable_task.task.task_id, total_run_time, rcode, err_msg, warn_msg)
            task_report_path = os.path.join(output_dir, 'task-report.json')
            msg = "Writing task id {i} task report to {r}".format(r=task_report_path, i=runnable_task.task.task_id)
            log.info(msg)
            stdout_fh.write(msg + "\n")
            r.write_json(task_report_path)
            stderr_fh.flush()
            stdout_fh.flush()

    # Cleanup resource files
    if not debug_mode and runnable_task.task.resources:
        try:
            cleanup_resources(runnable_task)
            log.debug("successfully cleaned up {n} resources.".format(n=len(runnable_task.task.resources)))
        except Exception as e:
            log.error(str(e))
            log.error("failed to successfully cleanup resources. {f}".format(f=runnable_task.task.resources))

    run_time = time.time() - started_at
    return rcode, run_time
Example #5
0
def run_task_on_cluster(runnable_task, task_manifest_path, output_dir,
                        debug_mode):
    """

    :param runnable_task:
    :param output_dir:
    :param debug_mode:
    :return:

    :type runnable_task: RunnableTask
    """
    def _to_p(x_):
        return os.path.join(output_dir, x_)

    stdout_ = _to_p('stdout')
    stderr_ = _to_p('stderr')

    if runnable_task.task.is_distributed is False:
        return run_task(runnable_task, output_dir, stdout_, stderr_,
                        debug_mode)

    if runnable_task.cluster is None:
        log.warn("No cluster provided. Running task locally.")
        return run_task(runnable_task, output_dir, stdout_, stderr_,
                        debug_mode)

    env_json = os.path.join(output_dir, 'env.json')
    IO.write_env_to_json(env_json)

    # sloppy API
    if isinstance(runnable_task.cluster, ClusterTemplateRender):
        render = runnable_task.cluster
    else:
        ctmpls = [
            ClusterTemplate(name, tmpl)
            for name, tmpl in runnable_task.cluster.iteritems()
        ]
        render = ClusterTemplateRender(ctmpls)

    job_id = to_random_job_id(runnable_task.task.task_id)
    log.debug("Using job id {i}".format(i=job_id))

    qstdout = _to_p('cluster.stdout')
    qstderr = _to_p('cluster.stderr')
    qshell = _to_p('cluster.sh')

    rcmd_shell = _to_p('run.sh')

    # Task Manifest Runner output
    stdout = _to_p('stdout')
    stderr = _to_p('stderr')
    mstdout = _to_p('mstdout')
    mstderr = _to_p('mstderr')

    with open(qstdout, 'w+') as f:
        f.write("Creating cluster stdout for Job {i} {r}\n".format(
            i=job_id, r=runnable_task))

    debug_str = " --debug "
    _d = dict(t=task_manifest_path,
              o=stdout,
              e=stderr,
              d=debug_str,
              m=mstdout,
              n=mstderr)
    cmd = "pbtools-runner run {d} --task-stderr=\"{e}\" --task-stdout=\"{o}\" \"{t}\" > \"{m}\" 2> \"{n}\"".format(
        **_d)

    with open(rcmd_shell, 'w+') as x:
        x.write(cmd + "\n")

    # Make +x
    os.chmod(rcmd_shell, os.stat(rcmd_shell).st_mode | stat.S_IEXEC)

    cluster_cmd = render.render('interactive', rcmd_shell, job_id, qstdout,
                                qstderr, runnable_task.task.nproc)
    log.debug(cluster_cmd)

    with open(qshell, 'w') as f:
        f.write("#!/bin/bash\n")
        f.write(cluster_cmd + "\n")

    os.chmod(qshell, os.stat(qshell).st_mode | stat.S_IEXEC)

    # host = socket.getfqdn()
    host = platform.node()

    # so core dumps are written to the job dir
    os.chdir(output_dir)

    rcode, cstdout, cstderr, run_time = backticks("bash {q}".format(q=qshell))

    if rcode == 0:
        err_msg = ""
        warn_msg = ""
    else:
        # not sure how to scrape this from the stderr/stdout
        err_msg = "task {i} failed".format(i=runnable_task.task.task_id)
        warn_msg = ""

    msg_ = "Completed running cluster command in {t:.2f} sec. Exit code {r}".format(
        r=rcode, t=run_time)
    log.info(msg_)

    with open(qstdout, 'a') as qf:
        qf.write(str(cstdout) + "\n")
        qf.write(msg_ + "\n")

    with open(qstderr, 'a') as f:
        if rcode != 0:
            f.write(str(cstderr) + "\n")
            f.write(msg_ + "\n")

    r = to_task_report(host, runnable_task.task.task_id, run_time, rcode,
                       err_msg, warn_msg)
    task_report_path = os.path.join(output_dir, 'task-report.json')
    msg = "Writing task id {i} task report to {r}".format(
        r=task_report_path, i=runnable_task.task.task_id)
    log.info(msg)
    r.write_json(task_report_path)

    return rcode, run_time
Example #6
0
def run_task(runnable_task, output_dir, task_stdout, task_stderr, debug_mode):
    """
    Run a runnable task locally.

    :param runnable_task:
    :type runnable_task: RunnableTask

    :return: exit code, run_time
    :rtype: (int, int)
    """
    started_at = time.time()

    rcode = -1
    err_msg = ""
    # host = socket.getfqdn()
    host = platform.node()

    ncmds = len(runnable_task.task.cmds)

    # so core dumps are written to the job dir
    os.chdir(output_dir)

    env_json = os.path.join(output_dir, 'env.json')

    IO.write_env_to_json(env_json)

    with open(task_stdout, 'w') as stdout_fh:
        with open(task_stderr, 'w') as stderr_fh:
            stdout_fh.write(repr(runnable_task) + "\n")
            stdout_fh.write("Created at {x} on {h}\n".format(
                x=datetime.datetime.now(), h=host))

            # Validate Inputs
            for input_file in runnable_task.task.input_files:
                if os.path.exists(input_file):
                    stdout_fh.write(
                        "Validated INPUT file '{i}\n".format(i=input_file))
                else:
                    err_msg = "Unable to find INPUT file '{i}".format(
                        i=input_file)
                    stderr_fh.write(err_msg + "\n")
                    sys.stderr.write(err_msg + "\n")
                    break

            # Create resources if necessary
            #if runnable_task.task.resources:
            #    create_tmp_resources_ignore_error(runnable_task.task.resources)

            # Run commands
            for i, cmd in enumerate(runnable_task.task.cmds):
                log.debug("Running command \n" + cmd)

                rcode, out, error, run_time = run_command(cmd,
                                                          stdout_fh,
                                                          stderr_fh,
                                                          time_out=None)

                if rcode != 0:
                    err_msg = "Failed task {i} exit code {r} in {s:.2f} sec".format(
                        i=runnable_task.task.task_id, r=rcode, s=run_time)
                    log.error(err_msg)
                    log.error(error)

                    stderr_fh.write(str(error) + "\n")
                    sys.stderr.write(str(error) + "\n")

                    stderr_fh.write(err_msg + "\n")
                    sys.stderr.write(err_msg + "\n")
                    break
                else:
                    stdout_fh.write(
                        "completed running cmd {i} of {n}. exit code {x} in {s:.2f} sec on host {h}\n"
                        .format(x=rcode, s=run_time, h=host, i=i + 1, n=ncmds))

            smsg_ = "completed running commands. Exit code {i}".format(i=rcode)
            log.debug(smsg_)

            if rcode == 0:
                # Validate output files of a successful task.
                for output_file in runnable_task.task.output_files:
                    if os.path.exists(output_file):
                        stdout_fh.write(
                            "Successfully validated file '{o}'\n".format(
                                o=output_file))
                    else:
                        err_msg = "Unable to find file '{x}'".format(
                            x=output_file)
                        stderr_fh.write(err_msg + "\n")
                        stdout_fh.write(err_msg + "\n")
                        sys.stderr.write(err_msg + "\n")
                        rcode = -1

            total_run_time = time.time() - started_at
            warn_msg = ""
            r = to_task_report(host, runnable_task.task.task_id,
                               total_run_time, rcode, err_msg, warn_msg)
            task_report_path = os.path.join(output_dir, 'task-report.json')
            msg = "Writing task id {i} task report to {r}".format(
                r=task_report_path, i=runnable_task.task.task_id)
            log.info(msg)
            stdout_fh.write(msg + "\n")
            r.write_json(task_report_path)
            stderr_fh.flush()
            stdout_fh.flush()

    # Cleanup resource files
    if not debug_mode and runnable_task.task.resources:
        try:
            cleanup_resources(runnable_task)
            log.debug("successfully cleaned up {n} resources.".format(
                n=len(runnable_task.task.resources)))
        except Exception as e:
            log.error(str(e))
            log.error("failed to successfully cleanup resources. {f}".format(
                f=runnable_task.task.resources))

    run_time = time.time() - started_at
    return rcode, run_time
Example #7
0
def run_task_on_cluster(runnable_task, task_manifest_path, output_dir, debug_mode):
    """

    :param runnable_task:
    :param output_dir:
    :param debug_mode:
    :return:

    :type runnable_task: RunnableTask
    """
    def _to_p(x_):
        return os.path.join(output_dir, x_)

    stdout_ = _to_p('stdout')
    stderr_ = _to_p('stderr')

    if runnable_task.task.is_distributed is False:
        return run_task(runnable_task, output_dir, stdout_, stderr_, debug_mode)

    if runnable_task.cluster is None:
        log.warn("No cluster provided. Running task locally.")
        return run_task(runnable_task, output_dir, stdout_, stderr_, debug_mode)

    os.chdir(runnable_task.task.output_dir)
    env_json = os.path.join(output_dir, '.cluster-env.json')
    IO.write_env_to_json(env_json)

    # sloppy API
    if isinstance(runnable_task.cluster, ClusterTemplateRender):
        render = runnable_task.cluster
    else:
        ctmpls = [ClusterTemplate(name, tmpl) for name, tmpl in runnable_task.cluster.iteritems()]
        render = ClusterTemplateRender(ctmpls)

    job_id = to_random_job_id(runnable_task.task.task_id)
    log.debug("Using job id {i}".format(i=job_id))

    qstdout = _to_p('cluster.stdout')
    qstderr = _to_p('cluster.stderr')
    qshell = _to_p('cluster.sh')

    rcmd_shell = _to_p('run.sh')

    # This needs to be flattened due to the new RTC layer
    # Task Manifest Runner output
    stdout = _to_p('stdout')
    stderr = _to_p('stderr')

    with open(qstdout, 'w+') as f:
        f.write("Creating cluster stdout for Job {i} {r}\n".format(i=job_id, r=runnable_task))

    debug_str = " --debug "
    exe = _resolve_exe("pbtools-runner")
    _d = dict(x=exe,
              t=task_manifest_path,
              o=stdout,
              e=stderr,
              d=debug_str,
              m=stdout,
              n=stderr,
              r=output_dir)

    # the quoting here is explicitly to handle spaces in paths
    cmd = "{x} run {d} --output-dir=\"{r}\" --task-stderr=\"{e}\" --task-stdout=\"{o}\" \"{t}\" > \"{m}\" 2> \"{n}\"".format(**_d)

    # write the pbtools-runner exe command
    with open(rcmd_shell, 'w+') as x:
        x.write(cmd + "\n")

    chmod_x(rcmd_shell)

    cluster_cmd = render.render(ClusterConstants.START, rcmd_shell, job_id, qstdout, qstderr, runnable_task.task.nproc)
    log.info("Job submission command: " + cluster_cmd)

    with open(qshell, 'w') as f:
        f.write("#!/bin/bash\n")
        f.write("set -o errexit\n")
        f.write("set -o pipefail\n")
        f.write("set -o nounset\n")
        f.write(cluster_cmd.rstrip("\n") + " ${1+\"$@\"}\n")
        f.write("exit $?")

    chmod_x(qshell)

    host = platform.node()

    # so core dumps are written to the job dir
    os.chdir(output_dir)

    # print the underlying jms command if using runjmscmd
    if re.search(r'/runjmscmd\b', cluster_cmd):
        rcode, cstdout, cstderr, run_time = backticks("bash {q} --printcmd".format(q=qshell))
        if rcode == 0:
            log.info("Underlying JMS job submission command: " + "\n".join(cstdout))

    # Blocking call
    rcode, cstdout, cstderr, run_time = backticks("bash {q}".format(q=qshell))

    log.info("Cluster command return code {r} in {s:.2f} sec".format(r=rcode, s=run_time))

    msg_t = "{n} Completed running cluster command in {t:.2f} sec. Exit code {r} (task-type {i})"
    msg_ = msg_t.format(r=rcode, t=run_time, i=runnable_task.task.task_type_id, n=datetime.datetime.now())
    log.info(msg_)

    # Append the bash cluster.sh stderr and stdout call to
    # the cluster.stderr and cluster.stdout
    with open(qstdout, 'a') as qf:
        if cstdout:
            qf.write("\n".join(cstdout) + "\n")
        qf.write(msg_ + "\n")

    with open(qstderr, 'a') as f:
        if rcode != 0:
            if cstderr:
                f.write(str(cstderr) + "\n")

    # fundamental output error str message of this func
    err_msg = ""
    warn_msg = ""

    if rcode != 0:
        p_err_msg = "task {i} failed (exit-code {x}) after {r:.2f} sec".format(i=runnable_task.task.task_id, r=run_time, x=rcode)
        raw_stderr = _extract_last_nlines(stderr)
        cluster_raw_stderr = _extract_last_nlines(qstderr)
        err_msg = "\n".join([p_err_msg, raw_stderr, cluster_raw_stderr])
        warn_msg = ""

    # write the result status message to stderr if task failure
    # doing this here to avoid having a double message
    with open(qstderr, 'a') as f:
        if rcode != 0:
            if cstderr:
                f.write(msg_ + "\n")

    r = to_task_report(host, runnable_task.task.task_id, run_time, rcode, err_msg, warn_msg)
    task_report_path = os.path.join(output_dir, 'task-report.json')
    msg = "Writing task id {i} task report to {r}".format(r=task_report_path, i=runnable_task.task.task_id)
    log.info(msg)
    r.write_json(task_report_path)

    return rcode, err_msg, run_time
Example #8
0
def run_task(runnable_task, output_dir, task_stdout, task_stderr, debug_mode):
    """
    Run a runnable task locally.

    :param debug_mode: Enabling debug mode will not cleanup temp resources upon failure
    :type debug_mode: bool

    :param runnable_task: Runnable task instance
    :type runnable_task: RunnableTask

    :param output_dir: Path to output dir
    :type output_dir: str

    :param task_stderr: Absolute path to task stderr file
    :type task_stderr: str

    :param task_stdout: Absolute path to task stdout file
    :type task_stdout: str

    :return: (exit code, error message, run_time)
    :rtype: (int, str, int)
    """

    started_at = time.time()

    def get_run_time():
        return time.time() - started_at

    # Default general catch all
    rcode = 1
    err_msg = ""
    # host = socket.getfqdn()
    host = platform.node()

    ncmds = len(runnable_task.task.cmds)

    # so core dumps are written to the job dir
    os.chdir(output_dir)

    env_json = os.path.join(output_dir, '.env.json')

    IO.write_env_to_json(env_json)

    with open(task_stdout, 'w') as stdout_fh:
        with open(task_stderr, 'w') as stderr_fh:
            stdout_fh.write(repr(runnable_task) + "\n")
            stdout_fh.write("Created at {x} on {h}\n".format(x=datetime.datetime.now(), h=host))
            stdout_fh.write("Running task in {o}\n".format(o=output_dir))

            # Validate Inputs
            for input_file in runnable_task.task.input_files:
                if os.path.exists(input_file):
                    stdout_fh.write("Validated INPUT file '{i}\n".format(i=input_file))
                else:
                    err_msg = "Unable to find INPUT file '{i}".format(i=input_file)
                    stderr_fh.write(err_msg + "\n")
                    log.error(err_msg)
                    break

            # Create resources if necessary
            #if runnable_task.task.resources:
            #    create_tmp_resources_ignore_error(runnable_task.task.resources)

            stdout_fh.write("Starting to run {n} cmds.".format(n=len(runnable_task.task.cmds)))
            stdout_fh.flush()
            stderr_fh.flush()

            for i, cmd in enumerate(runnable_task.task.cmds):
                log.info("Running command \n" + cmd)

                # see run_command API for future fixes
                rcode, _, _, run_time = run_command(cmd, stdout_fh, stderr_fh, time_out=None)

                if rcode != 0:
                    err_msg_ = "Failed task {i} exit code {r} in {s:.2f} sec (See file '{f}'.)".format(i=runnable_task.task.task_id, r=rcode, s=run_time, f=task_stderr)
                    stderr_fh.write(err_msg + "\n")
                    stderr_fh.flush()

                    t_error_msg = _extract_last_nlines(task_stderr)
                    err_msg = "\n".join([err_msg_, "Extracted from stderr", t_error_msg])

                    log.error(err_msg)
                    stdout_fh.write("breaking out. Unable to run remaining task commands.")
                    break
                else:
                    stdout_fh.write("completed running cmd {i} of {n}. exit code {x} in {s:.2f} sec on host {h}\n".format(x=rcode, s=run_time, h=host, i=i + 1, n=ncmds))

            smsg_ = "completed running commands. Exit code {i}".format(i=rcode)
            log.debug(smsg_)

            if rcode == 0:
                log.info("Core RTC runner was successful. Validating output files.")
                # Validate output files of a successful task.
                for ix, output_file in enumerate(runnable_task.task.output_files):
                    if os.path.exists(output_file):
                        stdout_fh.write("Successfully validated {i} output file '{o}' on {h} \n".format(o=output_file, i=ix, h=host))
                    else:
                        rcode = 127
                        err_msg = "Unable to find {i} output file '{x}'. Marking task as failed. Setting exit code to {r}".format(x=output_file, i=ix, r=rcode)
                        stderr_fh.write(err_msg + "\n")
                        stdout_fh.write(err_msg + "\n")
                        log.error(err_msg)

            # FIXME. There should be a better way to communicate warnings
            warn_msg = ""

            # Write the task summary to a pbcommand Report object
            r = to_task_report(host, runnable_task.task.task_id, get_run_time(), rcode, err_msg, warn_msg)
            task_report_path = os.path.join(output_dir, 'task-report.json')

            msg = "Writing task id {i} task report to {r}".format(r=task_report_path, i=runnable_task.task.task_id)
            log.info(msg)
            stdout_fh.write(msg + "\n")
            r.write_json(task_report_path)

            stderr_fh.flush()
            stdout_fh.flush()

    # Cleanup resource files
    if not debug_mode and runnable_task.task.resources:
        try:
            cleanup_resources(runnable_task)
            log.debug("successfully cleaned up {n} resources.".format(n=len(runnable_task.task.resources)))
        except Exception as e:
            log.error(str(e))
            log.error("failed to successfully cleanup resources. {f}".format(f=runnable_task.task.resources))

    return rcode, err_msg, get_run_time()
Example #9
0
def job_resource_create_and_setup_logs(job_root_dir, bg, task_opts,
                                       workflow_level_opts, ep_d):
    """
    Create job resource dirs and setup log handlers

    :type job_root_dir: str
    :type bg: BindingsGraph
    :type task_opts: dict
    :type workflow_level_opts: WorkflowLevelOptions
    :type ep_d: dict
    """

    job_resources = to_job_resources_and_create_dirs(job_root_dir)

    pb_log_path = os.path.join(job_resources.logs, 'pbsmrtpipe.log')
    master_log_path = os.path.join(job_resources.logs, "master.log")
    master_log_level = logging.INFO
    stdout_level = logging.INFO
    if workflow_level_opts.debug_mode:
        master_log_level = logging.DEBUG
        stdout_level = logging.DEBUG

    setup_internal_logs(master_log_path, master_log_level, pb_log_path,
                        stdout_level)

    log.info("Starting pbsmrtpipe v{v}".format(v=pbsmrtpipe.get_version()))
    log.info("\n" + _log_pbsmrptipe_header())

    BU.write_binding_graph_images(bg, job_resources.workflow)

    write_entry_points_json(job_resources.entry_points_json, ep_d)

    # Need to map entry points to a FileType and store in the DataStore? or
    # does DataStore only represent outputs?
    smrtpipe_log_df = DataStoreFile(str(uuid.uuid4()),
                                    "pbsmrtpipe::pbsmrtpipe.log",
                                    FileTypes.LOG.file_type_id,
                                    pb_log_path,
                                    name="Analysis Log",
                                    description="pbsmrtpipe log")
    master_log_df = DataStoreFile(str(uuid.uuid4()),
                                  "pbsmrtpipe::master.log",
                                  FileTypes.LOG.file_type_id,
                                  master_log_path,
                                  name="Master Log",
                                  description="Master log")
    ds = write_and_initialize_data_store_json(job_resources.datastore_json,
                                              [smrtpipe_log_df, master_log_df])
    slog.info("successfully initialized datastore.")

    write_workflow_settings(
        workflow_level_opts,
        os.path.join(job_resources.workflow, 'options-workflow.json'))
    if workflow_level_opts.system_message is not None:
        slog.info("Command: {m}".format(m=workflow_level_opts.system_message))

    slog.info("Entry Points:")
    slog.info("\n" + pprint.pformat(ep_d, indent=4))

    slog.info("Workflow Options:")
    slog.info("\n" + pprint.pformat(workflow_level_opts.to_dict(), indent=4))

    slog.info("Task Options:")
    slog.info("\n" + pprint.pformat(task_opts, indent=4))

    task_opts_path = os.path.join(job_resources.workflow, 'options-task.json')
    with open(task_opts_path, 'w') as f:
        f.write(json.dumps(task_opts, sort_keys=True, indent=4))

    env_path = os.path.join(job_resources.workflow, '.env.json')
    IO.write_env_to_json(env_path)
    log.info("wrote current env to {e}".format(e=env_path))

    try:
        sa_system, sa_components = IO.get_smrtanalysis_system_and_components_from_env(
        )
        log.info(sa_system)
        for c in sa_components:
            log.info(c)
    except Exception:
        # black hole exception
        log.warn("unable to determine SMRT Analysis version.")
        pass

    slog.info(
        "completed setting up job directory resources and logs in {r}".format(
            r=job_root_dir))
    return job_resources, ds, master_log_df
Example #10
0
def run_task_on_cluster(runnable_task, task_manifest_path, output_dir, debug_mode):
    """

    :param runnable_task:
    :param output_dir:
    :param debug_mode:
    :return:

    :type runnable_task: RunnableTask
    """
    def _to_p(x_):
        return os.path.join(output_dir, x_)

    stdout_ = _to_p('stdout')
    stderr_ = _to_p('stderr')

    if runnable_task.task.is_distributed is False:
        return run_task(runnable_task, output_dir, stdout_, stderr_, debug_mode)

    if runnable_task.cluster is None:
        log.warn("No cluster provided. Running task locally.")
        return run_task(runnable_task, output_dir, stdout_, stderr_, debug_mode)

    os.chdir(runnable_task.task.output_dir)
    env_json = os.path.join(output_dir, '.cluster-env.json')
    IO.write_env_to_json(env_json)

    # sloppy API
    if isinstance(runnable_task.cluster, ClusterTemplateRender):
        render = runnable_task.cluster
    else:
        ctmpls = [ClusterTemplate(name, tmpl) for name, tmpl in runnable_task.cluster.iteritems()]
        render = ClusterTemplateRender(ctmpls)

    job_id = to_random_job_id(runnable_task.task.task_id)
    log.debug("Using job id {i}".format(i=job_id))

    qstdout = _to_p('cluster.stdout')
    qstderr = _to_p('cluster.stderr')
    qshell = _to_p('cluster.sh')

    rcmd_shell = _to_p('run.sh')

    # This needs to be flattened due to the new RTC layer
    # Task Manifest Runner output
    stdout = _to_p('stdout')
    stderr = _to_p('stderr')
    mstdout = _to_p('mstdout')
    mstderr = _to_p('mstderr')

    with open(qstdout, 'w+') as f:
        f.write("Creating cluster stdout for Job {i} {r}\n".format(i=job_id, r=runnable_task))

    debug_str = " --debug "
    exe = _resolve_exe("pbtools-runner")
    _d = dict(x=exe,
              t=task_manifest_path,
              o=stdout,
              e=stderr,
              d=debug_str,
              m=stdout,
              n=stderr,
              r=output_dir)

    cmd = "{x} run {d} --output-dir=\"{r}\" --task-stderr=\"{e}\" --task-stdout=\"{o}\" \"{t}\" > \"{m}\" 2> \"{n}\"".format(**_d)

    with open(rcmd_shell, 'w+') as x:
        x.write(cmd + "\n")

    # Make +x
    os.chmod(rcmd_shell, os.stat(rcmd_shell).st_mode | stat.S_IEXEC)

    cluster_cmd = render.render(ClusterConstants.START, rcmd_shell, job_id, qstdout, qstderr, runnable_task.task.nproc)
    log.debug(cluster_cmd)

    with open(qshell, 'w') as f:
        f.write("#!/bin/bash\n")
        f.write("set -o errexit\n")
        f.write("set -o pipefail\n")
        f.write("set -o nounset\n")
        f.write(cluster_cmd + "\n")
        f.write("exit $?")

    os.chmod(qshell, os.stat(qshell).st_mode | stat.S_IEXEC)

    # host = socket.getfqdn()
    host = platform.node()

    # so core dumps are written to the job dir
    os.chdir(output_dir)

    rcode, cstdout, cstderr, run_time = backticks("bash {q}".format(q=qshell))

    log.info("Cluster command return code {r} in {s:.2f} sec".format(r=rcode, s=run_time))

    if rcode == 0:
        err_msg = ""
        warn_msg = ""
    else:
        p_err_msg = "task {i} failed (exit-code {x}) after {r:.2f} sec".format(i=runnable_task.task.task_id, r=run_time, x=rcode)
        raw_stderr = _extract_last_nlines(stderr)
        cluster_raw_stderr = _extract_last_nlines(qstderr)
        err_msg = "\n".join([p_err_msg, raw_stderr, cluster_raw_stderr])
        warn_msg = ""

    msg_ = "Completed running cluster command in {t:.2f} sec. Exit code {r} (task-type {i})".format(r=rcode, t=run_time, i=runnable_task.task.task_type_id)
    log.info(msg_)

    with open(qstdout, 'a') as qf:
        qf.write(str(cstdout) + "\n")
        qf.write(msg_ + "\n")

    with open(qstderr, 'a') as f:
        if rcode != 0:
            f.write(str(cstderr) + "\n")
            f.write(msg_ + "\n")

    r = to_task_report(host, runnable_task.task.task_id, run_time, rcode, err_msg, warn_msg)
    task_report_path = os.path.join(output_dir, 'task-report.json')
    msg = "Writing task id {i} task report to {r}".format(r=task_report_path, i=runnable_task.task.task_id)
    log.info(msg)
    r.write_json(task_report_path)

    return rcode, err_msg, run_time
Example #11
0
def job_resource_create_and_setup_logs(job_root_dir, bg, task_opts,
                                       workflow_level_opts, ep_d):
    """
    Create job resource dirs and setup log handlers

    :type job_root_dir: str
    :type bg: BindingsGraph
    :type task_opts: dict
    :type workflow_level_opts: WorkflowLevelOptions
    :type ep_d: dict
    """

    job_resources = to_job_resources_and_create_dirs(job_root_dir)

    pb_log_path = os.path.join(job_resources.logs, 'pbsmrtpipe.log')
    master_log_path = os.path.join(job_resources.logs, "master.log")
    master_log_level = logging.INFO
    stdout_level = logging.INFO
    if workflow_level_opts.debug_mode:
        master_log_level = logging.DEBUG
        stdout_level = logging.DEBUG

    setup_internal_logs(master_log_path, master_log_level, pb_log_path,
                        stdout_level)

    log.info("Starting pbsmrtpipe {v}".format(v=pbsmrtpipe.get_version()))
    log.info("\n" + _log_pbsmrptipe_header())

    BU.write_binding_graph_images(bg, job_resources.workflow)

    write_entry_points_json(job_resources.entry_points_json, ep_d)

    # Need to map entry points to a FileType and store in the DataStore? or
    # does DataStore only represent outputs?

    # For historical reasons, this is a bit non-obvious. The "master" log is now at the
    # the SMRT Link level, so we've promoted the pbsmrtpipe "master" log (i.e., master.log) to the
    # be Analysis Details Log using the pbsmrtpipe::pbsmrtpipe.log source Id. There's also this friction point
    # of marketing using "Analysis" vs "pbsmrtpipe which has generated some inconsistency.
    smrtpipe_log_df = DataStoreFile(str(uuid.uuid4()),
                                    GlobalConstants.SOURCE_ID_INFO_LOG,
                                    FileTypes.LOG.file_type_id,
                                    pb_log_path,
                                    name="Analysis Log",
                                    description="pbsmrtpipe INFO log")
    master_log_df = DataStoreFile(str(uuid.uuid4()),
                                  GlobalConstants.SOURCE_ID_MASTER_LOG,
                                  FileTypes.LOG.file_type_id,
                                  master_log_path,
                                  name="Analysis Details Log",
                                  description="Analysis Details log")

    ds = write_and_initialize_data_store_json(job_resources.datastore_json,
                                              [smrtpipe_log_df, master_log_df])
    slog.info("successfully initialized datastore.")

    write_workflow_settings(
        workflow_level_opts,
        os.path.join(job_resources.workflow, 'options-workflow.json'))
    if workflow_level_opts.system_message is not None:
        slog.info("Command: {m}".format(m=workflow_level_opts.system_message))

    slog.info("Entry Points:")
    slog.info("\n" + pprint.pformat(ep_d, indent=4))

    slog.info("Workflow Options:")
    slog.info("\n" + pprint.pformat(workflow_level_opts.to_dict(), indent=4))

    slog.info("Task Options:")
    slog.info("\n" + pprint.pformat(task_opts, indent=4))

    task_opts_path = os.path.join(job_resources.workflow, 'options-task.json')
    with open(task_opts_path, 'w') as f:
        f.write(json.dumps(task_opts, sort_keys=True, indent=4))

    env_path = os.path.join(job_resources.workflow, '.env.json')
    IO.write_env_to_json(env_path)
    log.info("wrote current env to {e}".format(e=env_path))

    slog.info(
        "completed setting up job directory resources and logs in {r}".format(
            r=job_root_dir))
    return job_resources, ds, master_log_df
Example #12
0
def run_task_on_cluster(runnable_task, task_manifest_path, output_dir, debug_mode):
    """

    :param runnable_task:
    :param output_dir:
    :param debug_mode:
    :return:

    :type runnable_task: RunnableTask
    """
    def _to_p(x_):
        return os.path.join(output_dir, x_)

    stdout_ = _to_p('stdout')
    stderr_ = _to_p('stderr')

    if runnable_task.task.is_distributed is False:
        return run_task(runnable_task, output_dir, stdout_, stderr_, debug_mode)

    if runnable_task.cluster is None:
        log.warn("No cluster provided. Running task locally.")
        return run_task(runnable_task, output_dir, stdout_, stderr_, debug_mode)

    os.chdir(runnable_task.task.output_dir)
    env_json = os.path.join(output_dir, '.cluster-env.json')
    IO.write_env_to_json(env_json)

    # sloppy API
    if isinstance(runnable_task.cluster, ClusterTemplateRender):
        render = runnable_task.cluster
    else:
        ctmpls = [ClusterTemplate(name, tmpl) for name, tmpl in runnable_task.cluster.iteritems()]
        render = ClusterTemplateRender(ctmpls)

    job_id = to_random_job_id(runnable_task.task.task_id)
    log.debug("Using job id {i}".format(i=job_id))

    qstdout = _to_p('cluster.stdout')
    qstderr = _to_p('cluster.stderr')
    qshell = _to_p('cluster.sh')

    rcmd_shell = _to_p('run.sh')

    # This needs to be flattened due to the new RTC layer
    # Task Manifest Runner output
    stdout = _to_p('stdout')
    stderr = _to_p('stderr')

    with open(qstdout, 'w+') as f:
        f.write("Creating cluster stdout for Job {i} {r}\n".format(i=job_id, r=runnable_task))

    debug_str = " --debug "
    exe = _resolve_exe("pbtools-runner")
    _d = dict(x=exe,
              t=task_manifest_path,
              o=stdout,
              e=stderr,
              d=debug_str,
              m=stdout,
              n=stderr,
              r=output_dir)

    # the quoting here is explicitly to handle spaces in paths
    cmd = "{x} run {d} --output-dir=\"{r}\" --task-stderr=\"{e}\" --task-stdout=\"{o}\" \"{t}\" > \"{m}\" 2> \"{n}\"".format(**_d)

    # write the pbtools-runner exe command
    with open(rcmd_shell, 'w+') as x:
        x.write(cmd + "\n")

    chmod_x(rcmd_shell)

    cluster_cmd = render.render(ClusterConstants.START, rcmd_shell, job_id, qstdout, qstderr, runnable_task.task.nproc)
    log.debug(cluster_cmd)

    with open(qshell, 'w') as f:
        f.write("#!/bin/bash\n")
        f.write("set -o errexit\n")
        f.write("set -o pipefail\n")
        f.write("set -o nounset\n")
        f.write(cluster_cmd + "\n")
        f.write("exit $?")

    chmod_x(qshell)

    host = platform.node()

    # so core dumps are written to the job dir
    os.chdir(output_dir)

    # Blocking call
    rcode, cstdout, cstderr, run_time = backticks("bash {q}".format(q=qshell))

    log.info("Cluster command return code {r} in {s:.2f} sec".format(r=rcode, s=run_time))

    msg_t = "{n} Completed running cluster command in {t:.2f} sec. Exit code {r} (task-type {i})"
    msg_ = msg_t.format(r=rcode, t=run_time, i=runnable_task.task.task_type_id, n=datetime.datetime.now())
    log.info(msg_)

    # Append the bash cluster.sh stderr and stdout call to
    # the cluster.stderr and cluster.stdout
    with open(qstdout, 'a') as qf:
        if cstdout:
            qf.write("\n".join(cstdout) + "\n")
        qf.write(msg_ + "\n")

    with open(qstderr, 'a') as f:
        if rcode != 0:
            if cstderr:
                f.write(str(cstderr) + "\n")

    # fundamental output error str message of this func
    err_msg = ""
    warn_msg = ""

    if rcode != 0:
        p_err_msg = "task {i} failed (exit-code {x}) after {r:.2f} sec".format(i=runnable_task.task.task_id, r=run_time, x=rcode)
        raw_stderr = _extract_last_nlines(stderr)
        cluster_raw_stderr = _extract_last_nlines(qstderr)
        err_msg = "\n".join([p_err_msg, raw_stderr, cluster_raw_stderr])
        warn_msg = ""

    # write the result status message to stderr if task failure
    # doing this here to avoid having a double message
    with open(qstderr, 'a') as f:
        if rcode != 0:
            if cstderr:
                f.write(msg_ + "\n")

    r = to_task_report(host, runnable_task.task.task_id, run_time, rcode, err_msg, warn_msg)
    task_report_path = os.path.join(output_dir, 'task-report.json')
    msg = "Writing task id {i} task report to {r}".format(r=task_report_path, i=runnable_task.task.task_id)
    log.info(msg)
    r.write_json(task_report_path)

    return rcode, err_msg, run_time
Example #13
0
def run_task(runnable_task, output_dir, task_stdout, task_stderr, debug_mode):
    """
    Run a runnable task locally.

    :param debug_mode: Enabling debug mode will not cleanup temp resources upon failure
    :type debug_mode: bool

    :param runnable_task: Runnable task instance
    :type runnable_task: RunnableTask

    :param output_dir: Path to output dir
    :type output_dir: str

    :param task_stderr: Absolute path to task stderr file
    :type task_stderr: str

    :param task_stdout: Absolute path to task stdout file
    :type task_stdout: str

    :return: (exit code, error message, run_time)
    :rtype: (int, str, int)
    """

    started_at = time.time()

    def get_run_time():
        return time.time() - started_at

    # Default general catch all
    rcode = 1
    err_msg = ""
    # host = socket.getfqdn()
    host = platform.node()

    ncmds = len(runnable_task.task.cmds)

    # so core dumps are written to the job dir
    os.chdir(output_dir)

    env_json = os.path.join(output_dir, '.env.json')

    IO.write_env_to_json(env_json)

    with open(task_stdout, 'w') as stdout_fh:
        with open(task_stderr, 'w') as stderr_fh:
            stdout_fh.write(repr(runnable_task) + "\n")
            stdout_fh.write("Created at {x} on {h}\n".format(x=datetime.datetime.now(), h=host))
            stdout_fh.write("Running task in {o}\n".format(o=output_dir))

            # Validate Inputs
            for input_file in runnable_task.task.input_files:
                if os.path.exists(input_file):
                    stdout_fh.write("Validated INPUT file '{i}\n".format(i=input_file))
                else:
                    err_msg = "Unable to find INPUT file '{i}".format(i=input_file)
                    stderr_fh.write(err_msg + "\n")
                    log.error(err_msg)
                    break

            # Create resources if necessary
            #if runnable_task.task.resources:
            #    create_tmp_resources_ignore_error(runnable_task.task.resources)

            stdout_fh.write("Starting to run {n} cmds.".format(n=len(runnable_task.task.cmds)))
            stdout_fh.flush()
            stderr_fh.flush()

            for i, cmd in enumerate(runnable_task.task.cmds):
                log.info("Running command \n" + cmd)

                # see run_command API for future fixes
                rcode, _, _, run_time = run_command(cmd, stdout_fh, stderr_fh, time_out=None)

                if rcode != 0:
                    err_msg_ = "Failed task {i} exit code {r} in {s:.2f} sec (See file '{f}'.)".format(i=runnable_task.task.task_id, r=rcode, s=run_time, f=task_stderr)
                    stderr_fh.write(err_msg + "\n")
                    stderr_fh.flush()

                    t_error_msg = _extract_last_nlines(task_stderr)
                    err_msg = "\n".join([err_msg_, "Extracted from stderr", t_error_msg])

                    log.error(err_msg)
                    stdout_fh.write("breaking out. Unable to run remaining task commands.")
                    break
                else:
                    stdout_fh.write("completed running cmd {i} of {n}. exit code {x} in {s:.2f} sec on host {h}\n".format(x=rcode, s=run_time, h=host, i=i + 1, n=ncmds))

            smsg_ = "completed running commands. Exit code {i}".format(i=rcode)
            log.debug(smsg_)

            if rcode == 0:
                log.info("Core RTC runner was successful. Validating output files.")
                # Validate output files of a successful task.
                for ix, output_file in enumerate(runnable_task.task.output_files):
                    if os.path.exists(output_file):
                        stdout_fh.write("Successfully validated {i} output file '{o}' on {h} \n".format(o=output_file, i=ix, h=host))
                    else:
                        rcode = 127
                        err_msg = "Unable to find {i} output file '{x}'. Marking task as failed. Setting exit code to {r}".format(x=output_file, i=ix, r=rcode)
                        stderr_fh.write(err_msg + "\n")
                        stdout_fh.write(err_msg + "\n")
                        log.error(err_msg)

            # FIXME. There should be a better way to communicate warnings
            warn_msg = ""

            # Write the task summary to a pbcommand Report object
            r = to_task_report(host, runnable_task.task.task_id, get_run_time(), rcode, err_msg, warn_msg)
            task_report_path = os.path.join(output_dir, 'task-report.json')

            msg = "Writing task id {i} task report to {r}".format(r=task_report_path, i=runnable_task.task.task_id)
            log.info(msg)
            stdout_fh.write(msg + "\n")
            r.write_json(task_report_path)

            stderr_fh.flush()
            stdout_fh.flush()

    # Cleanup resource files
    if not debug_mode and runnable_task.task.resources:
        try:
            cleanup_resources(runnable_task)
            log.debug("successfully cleaned up {n} resources.".format(n=len(runnable_task.task.resources)))
        except Exception as e:
            log.error(str(e))
            log.error("failed to successfully cleanup resources. {f}".format(f=runnable_task.task.resources))

    return rcode, err_msg, get_run_time()