Example #1
0
def WorkQueueSubmitThread(task_queue=multiprocessing.Queue(),
                          queue_lock=threading.Lock(),
                          launch_cmd=None,
                          env=None,
                          collector_queue=multiprocessing.Queue(),
                          see_worker_output=False,
                          data_dir=".",
                          full=False,
                          cancel_value=multiprocessing.Value('i', 1),
                          port=WORK_QUEUE_DEFAULT_PORT,
                          wq_log_dir=None,
                          project_password=None,
                          project_password_file=None,
                          project_name=None):
    """Thread to handle Parsl app submissions to the Work Queue objects.
    Takes in Parsl functions submitted using submit(), and creates a
    Work Queue task with the appropriate specifications, which is then
    submitted to Work Queue. After tasks are completed, processes the
    exit status and exit code of the task, and sends results to the
    Work Queue collector thread.
    """
    logger.debug("Starting WorkQueue Submit/Wait Process")

    # Enable debugging flags and create logging file
    if wq_log_dir is not None:
        logger.debug("Setting debugging flags and creating logging file")
        wq_debug_log = os.path.join(wq_log_dir, "debug_log")
        cctools_debug_flags_set("all")
        cctools_debug_config_file(wq_debug_log)

    # Create WorkQueue queue object
    logger.debug("Creating WorkQueue Object")
    try:
        logger.debug("Listening on port {}".format(port))
        q = WorkQueue(port)
    except Exception as e:
        logger.error("Unable to create WorkQueue object: {}".format(e))
        raise e

    # Specify WorkQueue queue attributes
    if project_name:
        q.specify_name(project_name)
    if project_password:
        q.specify_password(project_password)
    elif project_password_file:
        q.specify_password_file(project_password_file)

    # Only write logs when the wq_log_dir is specified, which it most likely will be
    if wq_log_dir is not None:
        wq_master_log = os.path.join(wq_log_dir, "master_log")
        wq_trans_log = os.path.join(wq_log_dir, "transaction_log")
        if full:
            wq_resource_log = os.path.join(wq_log_dir, "resource_logs")
            q.enable_monitoring_full(dirname=wq_resource_log)
        q.specify_log(wq_master_log)
        q.specify_transactions_log(wq_trans_log)

    wq_tasks = set()
    orig_ppid = os.getppid()
    continue_running = True
    while (continue_running):
        # Monitor the task queue
        ppid = os.getppid()
        if ppid != orig_ppid:
            logger.debug("new Process")
            continue_running = False
            continue

        # Submit tasks
        while task_queue.qsize() > 0:
            if cancel_value.value == 0:
                logger.debug("cancel value set to cancel")
                continue_running = False
                break

            # Obtain task from task_queue
            try:
                item = task_queue.get(timeout=1)
                logger.debug("Removing task from queue")
            except queue.Empty:
                continue
            parsl_id = item["task_id"]

            # Extract information about the task
            function_data_loc = item["data_loc"]
            function_data_loc_remote = function_data_loc.split("/")[-1]
            function_result_loc = item["result_loc"]
            function_result_loc_remote = function_result_loc.split("/")[-1]
            input_files = item["input_files"]
            output_files = item["output_files"]
            std_files = item["std_files"]

            full_script_name = workqueue_worker.__file__
            script_name = full_script_name.split("/")[-1]

            remapping_string = ""
            std_string = ""

            # Parse input file information
            logger.debug("Looking at input")
            for item in input_files:
                if item[3] == "std":
                    std_string += "mv " + item[1] + " " + item[0] + "; "
                else:
                    remapping_string += item[0] + ":" + item[1] + ","
            logger.debug(remapping_string)

            # Parse output file information
            logger.debug("Looking at output")
            for item in output_files:
                remapping_string += item[0] + ":" + item[1] + ","
            logger.debug(remapping_string)

            if len(input_files) + len(output_files) > 0:
                remapping_string = "-r " + remapping_string
                remapping_string = remapping_string[:-1]

            # Create command string
            logger.debug(launch_cmd)
            command_str = launch_cmd.format(
                input_file=function_data_loc_remote,
                output_file=function_result_loc_remote,
                remapping_string=remapping_string)
            command_str = std_string + command_str
            logger.debug(command_str)

            # Create WorkQueue task for the command
            logger.debug("Sending task {} with command: {}".format(
                parsl_id, command_str))
            try:
                t = Task(command_str)
            except Exception as e:
                logger.error("Unable to create task: {}".format(e))
                continue

            # Specify environment variables for the task
            if env is not None:
                for var in env:
                    t.specify_environment_variable(var, env[var])

            # Specify script, and data/result files for task
            t.specify_file(full_script_name,
                           script_name,
                           WORK_QUEUE_INPUT,
                           cache=True)
            t.specify_file(function_data_loc,
                           function_data_loc_remote,
                           WORK_QUEUE_INPUT,
                           cache=False)
            t.specify_file(function_result_loc,
                           function_result_loc_remote,
                           WORK_QUEUE_OUTPUT,
                           cache=False)
            t.specify_tag(str(parsl_id))
            logger.debug("Parsl ID: {}".format(t.id))

            # Specify all input/output files for task
            for item in input_files:
                t.specify_file(item[0],
                               item[1],
                               WORK_QUEUE_INPUT,
                               cache=item[2])
            for item in output_files:
                t.specify_file(item[0],
                               item[1],
                               WORK_QUEUE_OUTPUT,
                               cache=item[2])
            for item in std_files:
                t.specify_file(item[0],
                               item[1],
                               WORK_QUEUE_OUTPUT,
                               cache=item[2])

            # Submit the task to the WorkQueue object
            logger.debug("Submitting task {} to WorkQueue".format(parsl_id))
            try:
                wq_id = q.submit(t)
                wq_tasks.add(wq_id)
            except Exception as e:
                logger.error("Unable to create task: {}".format(e))

                msg = {
                    "tid": parsl_id,
                    "result_received": False,
                    "reason": "Workqueue Task Start Failure",
                    "status": 1
                }

                collector_queue.put_nowait(msg)
                continue

            logger.debug("Task {} submitted to WorkQueue with id {}".format(
                parsl_id, wq_id))

        if cancel_value.value == 0:
            continue_running = False

        # If the queue is not empty wait on the WorkQueue queue for a task
        task_found = True
        if not q.empty() and continue_running:
            while task_found is True:
                if cancel_value.value == 0:
                    continue_running = False
                    task_found = False
                    continue

                # Obtain the task from the queue
                t = q.wait(1)
                if t is None:
                    task_found = False
                    continue
                else:
                    parsl_tid = t.tag
                    logger.debug(
                        "Completed WorkQueue task {}, parsl task {}".format(
                            t.id, parsl_tid))
                    status = t.return_status
                    task_result = t.result
                    msg = None

                    # Task failure
                    if status != 0 or (task_result != WORK_QUEUE_RESULT_SUCCESS
                                       and task_result !=
                                       WORK_QUEUE_RESULT_OUTPUT_MISSING):
                        logger.debug(
                            "Wrapper Script status: {}\nWorkQueue Status: {}".
                            format(status, task_result))
                        # Wrapper script failure
                        if status != 0:
                            logger.debug(
                                "WorkQueue task {} failed with status {}".
                                format(t.id, status))
                            reason = "Wrapper Script Failure: "
                            if status == 1:
                                reason += "problem parsing command line options"
                            elif status == 2:
                                reason += "problem loading function data"
                            elif status == 3:
                                reason += "problem remapping file names"
                            elif status == 4:
                                reason += "problem writing out function result"
                            else:
                                reason += "unable to process wrapper script failure with status = {}".format(
                                    status)
                            reason += "\nTrace:\n" + str(t.output)
                            logger.debug(
                                "WorkQueue runner script failed for task {} because {}\n"
                                .format(parsl_tid, reason))
                        # WorkQueue system failure
                        else:
                            reason = "WorkQueue System Failure: "
                            if task_result == 1:
                                reason += "missing input file"
                            elif task_result == 2:
                                reason += "unable to generate output file"
                            elif task_result == 4:
                                reason += "stdout has been truncated"
                            elif task_result == 1 << 3:
                                reason += "task terminated with a signal"
                            elif task_result == 2 << 3:
                                reason += "task used more resources than requested"
                            elif task_result == 3 << 3:
                                reason += "task ran past the specified end time"
                            elif task_result == 4 << 3:
                                reason += "result could not be classified"
                            elif task_result == 5 << 3:
                                reason += "task failed, but not a task error"
                            elif task_result == 6 << 3:
                                reason += "unable to complete after specified number of retries"
                            elif task_result == 7 << 3:
                                reason += "task ran for more than the specified time"
                            elif task_result == 8 << 3:
                                reason += "task needed more space to complete task"
                            else:
                                reason += "unable to process Work Queue system failure"

                        msg = {
                            "tid": parsl_tid,
                            "result_received": False,
                            "reason": reason,
                            "status": status
                        }

                        collector_queue.put_nowait(msg)

                    # Task Success
                    else:
                        # Print the output from the task
                        if see_worker_output:
                            print(t.output)

                        # Load result into result file
                        result_loc = os.path.join(
                            data_dir,
                            "task_" + str(parsl_tid) + "_function_result")
                        logger.debug(
                            "Looking for result in {}".format(result_loc))
                        f = open(result_loc, "rb")
                        result = pickle.load(f)
                        f.close()

                        msg = {
                            "tid": parsl_tid,
                            "result_received": True,
                            "result": result
                        }
                        wq_tasks.remove(t.id)

                    collector_queue.put_nowait(msg)

        if continue_running is False:
            logger.debug("Exiting WorkQueue Master Thread event loop")
            break

    # Remove all WorkQueue tasks that remain in the queue object
    for wq_task in wq_tasks:
        logger.debug("Cancelling WorkQueue Task {}".format(wq_task))
        q.cancel_by_taskid(wq_task)

    logger.debug("Exiting WorkQueue Monitoring Process")
    return 0
Example #2
0
def _work_queue_submit_wait(task_queue=multiprocessing.Queue(),
                            launch_cmd=None,
                            env=None,
                            collector_queue=multiprocessing.Queue(),
                            data_dir=".",
                            full=False,
                            shared_fs=False,
                            autolabel=False,
                            autolabel_window=None,
                            autocategory=False,
                            should_stop=None,
                            port=WORK_QUEUE_DEFAULT_PORT,
                            wq_log_dir=None,
                            project_password_file=None,
                            project_name=None):
    """Thread to handle Parsl app submissions to the Work Queue objects.
    Takes in Parsl functions submitted using submit(), and creates a
    Work Queue task with the appropriate specifications, which is then
    submitted to Work Queue. After tasks are completed, processes the
    exit status and exit code of the task, and sends results to the
    Work Queue collector thread.
    To avoid python's global interpreter lock with work queue's wait, this
    function should be launched as a process, not as a lightweight thread. This
    means that any communication should be done using the multiprocessing
    module capabilities, rather than shared memory.
    """
    logger.debug("Starting WorkQueue Submit/Wait Process")

    # Enable debugging flags and create logging file
    wq_debug_log = None
    if wq_log_dir is not None:
        logger.debug("Setting debugging flags and creating logging file")
        wq_debug_log = os.path.join(wq_log_dir, "debug_log")

    # Create WorkQueue queue object
    logger.debug("Creating WorkQueue Object")
    try:
        logger.debug("Listening on port {}".format(port))
        q = WorkQueue(port, debug_log=wq_debug_log)
    except Exception as e:
        logger.error("Unable to create WorkQueue object: {}".format(e))
        raise e

    # Specify WorkQueue queue attributes
    if project_name:
        q.specify_name(project_name)

    if project_password_file:
        q.specify_password_file(project_password_file)

    if autolabel:
        q.enable_monitoring()
        if autolabel_window is not None:
            q.tune('category-steady-n-tasks', autolabel_window)

    # Only write logs when the wq_log_dir is specified, which it most likely will be
    if wq_log_dir is not None:
        wq_master_log = os.path.join(wq_log_dir, "master_log")
        wq_trans_log = os.path.join(wq_log_dir, "transaction_log")
        if full:
            wq_resource_log = os.path.join(wq_log_dir, "resource_logs")
            q.enable_monitoring_full(dirname=wq_resource_log)
        q.specify_log(wq_master_log)
        q.specify_transactions_log(wq_trans_log)

    orig_ppid = os.getppid()

    result_file_of_task_id = {
    }  # Mapping taskid -> result file for active tasks.

    while not should_stop.value:
        # Monitor the task queue
        ppid = os.getppid()
        if ppid != orig_ppid:
            logger.debug("new Process")
            break

        # Submit tasks
        while task_queue.qsize() > 0 and not should_stop.value:
            # Obtain task from task_queue
            try:
                task = task_queue.get(timeout=1)
                logger.debug("Removing task from queue")
            except queue.Empty:
                continue

            pkg_pfx = ""
            if task.env_pkg is not None:
                pkg_pfx = "./{} -e {} ".format(
                    os.path.basename(package_run_script),
                    os.path.basename(task.env_pkg))

            # Create command string
            logger.debug(launch_cmd)
            command_str = launch_cmd.format(
                package_prefix=pkg_pfx,
                mapping=os.path.basename(task.map_file),
                function=os.path.basename(task.function_file),
                result=os.path.basename(task.result_file))
            logger.debug(command_str)

            # Create WorkQueue task for the command
            logger.debug("Sending task {} with command: {}".format(
                task.id, command_str))
            try:
                t = Task(command_str)
            except Exception as e:
                logger.error("Unable to create task: {}".format(e))
                collector_queue.put_nowait(
                    WqTaskToParsl(
                        id=task.id,
                        result_received=False,
                        result=None,
                        reason="task could not be created by work queue",
                        status=-1))
                continue

            t.specify_category(task.category)
            if autolabel:
                q.specify_category_mode(
                    task.category, WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT)

            # Specify environment variables for the task
            if env is not None:
                for var in env:
                    t.specify_environment_variable(var, env[var])

            if task.env_pkg is not None:
                t.specify_input_file(package_run_script, cache=True)
                t.specify_input_file(task.env_pkg, cache=True)

            # Specify script, and data/result files for task
            t.specify_input_file(exec_parsl_function.__file__, cache=True)
            t.specify_input_file(task.function_file, cache=False)
            t.specify_input_file(task.map_file, cache=False)
            t.specify_output_file(task.result_file, cache=False)
            t.specify_tag(str(task.id))
            result_file_of_task_id[str(task.id)] = task.result_file

            logger.debug("Parsl ID: {}".format(task.id))

            # Specify input/output files that need to be staged.
            # Absolute paths are assumed to be in shared filesystem, and thus
            # not staged by work queue.
            if not shared_fs:
                for spec in task.input_files:
                    if spec.stage:
                        t.specify_input_file(spec.parsl_name,
                                             spec.parsl_name,
                                             cache=spec.cache)
                for spec in task.output_files:
                    if spec.stage:
                        t.specify_output_file(spec.parsl_name,
                                              spec.parsl_name,
                                              cache=spec.cache)

            # Submit the task to the WorkQueue object
            logger.debug("Submitting task {} to WorkQueue".format(task.id))
            try:
                wq_id = q.submit(t)
            except Exception as e:
                logger.error(
                    "Unable to submit task to work queue: {}".format(e))
                collector_queue.put_nowait(
                    WqTaskToParsl(
                        id=task.id,
                        result_received=False,
                        result=None,
                        reason="task could not be submited to work queue",
                        status=-1))
                continue
            logger.debug("Task {} submitted to WorkQueue with id {}".format(
                task.id, wq_id))

        # If the queue is not empty wait on the WorkQueue queue for a task
        task_found = True
        if not q.empty():
            while task_found and not should_stop.value:
                # Obtain the task from the queue
                t = q.wait(1)
                if t is None:
                    task_found = False
                    continue
                # When a task is found:
                parsl_id = t.tag
                logger.debug(
                    "Completed WorkQueue task {}, parsl task {}".format(
                        t.id, t.tag))
                result_file = result_file_of_task_id.pop(t.tag)

                # A tasks completes 'succesfully' if it has result file,
                # and it can be loaded. This may mean that the 'success' is
                # an exception.
                logger.debug("Looking for result in {}".format(result_file))
                try:
                    with open(result_file, "rb") as f_in:
                        result = pickle.load(f_in)
                    logger.debug("Found result in {}".format(result_file))
                    collector_queue.put_nowait(
                        WqTaskToParsl(id=parsl_id,
                                      result_received=True,
                                      result=result,
                                      reason=None,
                                      status=t.return_status))
                # If a result file could not be generated, explain the
                # failure according to work queue error codes. We generate
                # an exception and wrap it with RemoteExceptionWrapper, to
                # match the positive case.
                except Exception as e:
                    reason = _explain_work_queue_result(t)
                    logger.debug(
                        "Did not find result in {}".format(result_file))
                    logger.debug(
                        "Wrapper Script status: {}\nWorkQueue Status: {}".
                        format(t.return_status, t.result))
                    logger.debug(
                        "Task with id parsl {} / wq {} failed because:\n{}".
                        format(parsl_id, t.id, reason))
                    collector_queue.put_nowait(
                        WqTaskToParsl(id=parsl_id,
                                      result_received=False,
                                      result=e,
                                      reason=reason,
                                      status=t.return_status))
    logger.debug("Exiting WorkQueue Monitoring Process")
    return 0
wq.specify_algorithm(WORK_QUEUE_SCHEDULE_FCFS)
#wq.specify_name('workqueue_example')
#wq.specify_master_mode(WORK_QUEUE_MASTER_MODE_STANDALONE)
#wq.specify_worker_mode(WORK_QUEUE_WORKER_MODE_SHARED)
wq.specify_task_order(WORK_QUEUE_TASK_ORDER_LIFO)

if wq.empty():
    print 'work queue is empty'

outputs = []

for i in range(5):
    ifile = 'msg.%d' % i
    ofile = 'out.%d' % i
    task = Task('cat < %s > %s' % (ifile, ofile))

    task.specify_tag(str(time.time()))
    print task.command, task.tag

    task.specify_algorithm(WORK_QUEUE_SCHEDULE_FILES)
    print task.command, task.algorithm

    task.specify_buffer('hello from %d' % i, ifile, cache=False)
    if i % 2:
        task.specify_output_file(ofile, cache=False)
    else:
        task.specify_file(ofile, type=WORK_QUEUE_OUTPUT, cache=False)

    outputs.append(ofile)
    wq.submit(task)
Example #4
0
    def submit(self, traj):
        """ Submit a job to the work-queue for further sampling.
        
        Parameters
        ----------
        """
        if traj.submit_time is not None:
            raise ValueError("This traj has already been submitted")
        Session.add(traj)
        Session.flush()
        traj.populate_default_filenames()
        
        if not hasattr(traj, 'init_pdb'):
            raise ValueError('Traj is supposed to have a pdb object tacked on')            
        save_file(traj.init_pdb_fn, traj.init_pdb)
        
        remote_driver_fn = os.path.split(str(traj.forcefield.driver))[1]
        remote_pdb_fn = 'input.pdb'
        remote_output_fn = 'production_dry{}'.format(traj.forcefield.output_extension)
        
        if traj.mode is None or traj.forcefield is None:
            raise ValueError('malformed traj')

        task = Task('chmod +x ./{driver}; ./{driver} {pdb_fn} {ff} {water} {mode} {threads}'.format(
            pdb_fn=remote_pdb_fn,
            mode=traj.mode,
            driver=remote_driver_fn,
            ff=traj.forcefield.name,
            water=traj.forcefield.water,
            threads=traj.forcefield.threads))
        
        
        #why does traj.forcefield.driver come out as unicode?
        task.specify_input_file(str(traj.forcefield.driver), remote_driver_fn)
        task.specify_output_file(traj.wqlog_fn, 'logs/driver.log')
        task.specify_input_file(traj.init_pdb_fn, remote_pdb_fn)
        task.specify_output_file(traj.dry_xtc_fn, remote_output_fn)
        
        if self.return_wet_xtc:
            # this is the XTC file with waters, generated by the driver
            # when you're doing implicit solvent only, this stuff is not used.
            remote_wet_output_fn = 'production_wet{}'.format(traj.forcefield.output_extension)
            task.specify_output_file(traj.wet_xtc_fn, remote_wet_output_fn)
            task.specify_output_file(traj.last_wet_snapshot_fn, 'last_wet_snapshot.pdb')
        else:
            logger.debug('Not requesting production_wet%s from driver (implicit)', traj.forcefield.output_extension)
        
        task.specify_tag(str(traj.id))
        task.specify_algorithm(WORK_QUEUE_SCHEDULE_FILES) # what does this do?
        
        traj.submit_time = datetime.now()

        self.wq.submit(task)    
        logger.info('Submitted to queue: %s', traj)
Example #5
0
def WorkQueueSubmitThread(task_queue=multiprocessing.Queue(),
                          queue_lock=threading.Lock(),
                          launch_cmd=None,
                          env=None,
                          collector_queue=multiprocessing.Queue(),
                          see_worker_output=False,
                          data_dir=".",
                          full=False,
                          cancel_value=multiprocessing.Value('i', 1),
                          port=WORK_QUEUE_DEFAULT_PORT,
                          wq_log_dir=None,
                          project_password=None,
                          project_password_file=None,
                          project_name=None):

    logger.debug("Starting WorkQueue Submit/Wait Process")

    orig_ppid = os.getppid()

    wq_tasks = set()

    continue_running = True

    if wq_log_dir is not None:
        wq_debug_log = os.path.join(wq_log_dir, "debug")
        cctools_debug_flags_set("all")
        cctools_debug_config_file(wq_debug_log)

    logger.debug("Creating Workqueue Object")
    try:
        q = WorkQueue(port)
    except Exception as e:
        logger.error("Unable to create Workqueue object: {}", format(e))
        raise e

    if project_name:
        q.specify_name(project_name)

    if project_password:
        q.specify_password(project_password)
    elif project_password_file:
        q.specify_password_file(project_password_file)

    # Only write Logs when the log_dir is specified, which is most likely always will be
    if wq_log_dir is not None:
        wq_master_log = os.path.join(wq_log_dir, "master_log")
        wq_trans_log = os.path.join(wq_log_dir, "transaction_log")
        if full:
            wq_resource_log = os.path.join(wq_log_dir, "resource_logs")
            q.enable_monitoring_full(dirname=wq_resource_log)

        q.specify_log(wq_master_log)
        q.specify_transactions_log(wq_trans_log)

    while (continue_running):
        # Monitor the Task Queue
        ppid = os.getppid()
        if ppid != orig_ppid:
            continue_running = False
            continue

        # Submit Tasks
        while task_queue.qsize() > 0:
            if cancel_value.value == 0:
                continue_running = False
                break

            try:
                # item = task_queue.get_nowait()
                item = task_queue.get(timeout=1)
                logger.debug("Removing task from queue")
            except queue.Empty:
                continue
            parsl_id = item["task_id"]

            function_data_loc = item["data_loc"]
            function_result_loc = item["result_loc"]
            function_result_loc_remote = function_result_loc.split("/")[-1]
            function_data_loc_remote = function_data_loc.split("/")[-1]

            input_files = item["input_files"]
            output_files = item["output_files"]
            std_files = item["std_files"]

            full_script_name = workqueue_worker.__file__
            script_name = full_script_name.split("/")[-1]

            remapping_string = ""

            std_string = ""
            logger.debug("looking at input")
            for item in input_files:
                if item[3] == "std":
                    std_string += "mv " + item[1] + " " + item[0] + "; "
                else:
                    remapping_string += item[0] + ":" + item[1] + ","
            logger.debug(remapping_string)

            logger.debug("looking at output")
            for item in output_files:
                remapping_string += item[0] + ":" + item[1] + ","
            logger.debug(remapping_string)

            if len(input_files) + len(output_files) > 0:
                remapping_string = "-r " + remapping_string
                remapping_string = remapping_string[:-1]

            logger.debug(launch_cmd)
            command_str = launch_cmd.format(
                input_file=function_data_loc_remote,
                output_file=function_result_loc_remote,
                remapping_string=remapping_string)

            logger.debug(command_str)
            command_str = std_string + command_str
            logger.debug(command_str)

            logger.debug("Sending task {} with command: {}".format(
                parsl_id, command_str))
            try:
                t = Task(command_str)
            except Exception as e:
                logger.error("Unable to create task: {}".format(e))
                continue
            if env is not None:
                for var in env:
                    t.specify_environment_variable(var, env[var])

            t.specify_file(full_script_name,
                           script_name,
                           WORK_QUEUE_INPUT,
                           cache=True)
            t.specify_file(function_result_loc,
                           function_result_loc_remote,
                           WORK_QUEUE_OUTPUT,
                           cache=False)
            t.specify_file(function_data_loc,
                           function_data_loc_remote,
                           WORK_QUEUE_INPUT,
                           cache=False)
            t.specify_tag(str(parsl_id))

            for item in input_files:
                t.specify_file(item[0],
                               item[1],
                               WORK_QUEUE_INPUT,
                               cache=item[2])

            for item in output_files:
                t.specify_file(item[0],
                               item[1],
                               WORK_QUEUE_OUTPUT,
                               cache=item[2])

            for item in std_files:
                t.specify_file(item[0],
                               item[1],
                               WORK_QUEUE_OUTPUT,
                               cache=item[2])

            logger.debug("Submitting task {} to workqueue".format(parsl_id))
            try:
                wq_id = q.submit(t)
                wq_tasks.add(wq_id)
            except Exception as e:
                logger.error("Unable to create task: {}".format(e))

                msg = {
                    "tid": parsl_id,
                    "result_received": False,
                    "reason": "Workqueue Task Start Failure",
                    "status": 1
                }

                collector_queue.put_nowait(msg)
                continue

            logger.debug("Task {} submitted workqueue with id {}".format(
                parsl_id, wq_id))

        if cancel_value.value == 0:
            continue_running = False

        # Wait for Tasks
        task_found = True
        # If the queue is not empty wait on the workqueue queue for a task
        if not q.empty() and continue_running:
            while task_found is True:
                if cancel_value.value == 0:
                    continue_running = False
                    task_found = False
                    continue
                t = q.wait(1)
                if t is None:
                    task_found = False
                    continue
                else:
                    parsl_tid = t.tag
                    logger.debug(
                        "Completed workqueue task {}, parsl task {}".format(
                            t.id, parsl_tid))
                    status = t.return_status
                    task_result = t.result
                    msg = None

                    if status != 0 or (task_result != WORK_QUEUE_RESULT_SUCCESS
                                       and task_result !=
                                       WORK_QUEUE_RESULT_OUTPUT_MISSING):
                        if task_result == WORK_QUEUE_RESULT_SUCCESS:
                            logger.debug(
                                "Workqueue task {} failed with status {}".
                                format(t.id, status))

                            reason = "Wrapper Script Failure: "
                            if status == 1:
                                reason += "command line parsing"
                            if status == 2:
                                reason += "problem loading function data"
                            if status == 3:
                                reason += "problem remapping file names"
                            if status == 4:
                                reason += "problem writing out function result"

                            reason += "\nTrace:\n" + t.output

                            logger.debug(
                                "Workqueue runner script failed for task {} because {}\n"
                                .format(parsl_tid, reason))

                        else:
                            reason = "Workqueue system failure\n"

                        msg = {
                            "tid": parsl_tid,
                            "result_received": False,
                            "reason": reason,
                            "status": status
                        }

                        collector_queue.put_nowait(msg)

                    else:

                        if see_worker_output:
                            print(t.output)

                        result_loc = os.path.join(
                            data_dir,
                            "task_" + str(parsl_tid) + "_function_result")
                        logger.debug(
                            "Looking for result in {}".format(result_loc))
                        f = open(result_loc, "rb")
                        result = pickle.load(f)
                        f.close()

                        msg = {
                            "tid": parsl_tid,
                            "result_received": True,
                            "result": result
                        }
                        wq_tasks.remove(t.id)

                    collector_queue.put_nowait(msg)

        if continue_running is False:
            logger.debug("Exiting WorkQueue Master Thread event loop")
            break

    for wq_task in wq_tasks:
        logger.debug("Cancelling Workqueue Task {}".format(wq_task))
        q.cancel_by_taskid(wq_task)

    logger.debug("Exiting WorkQueue Monitoring Process")
    return 0
print wq.name

wq.specify_algorithm(WORK_QUEUE_SCHEDULE_FCFS)
#wq.specify_name('workqueue_example')
#wq.specify_master_mode(WORK_QUEUE_MASTER_MODE_STANDALONE)
#wq.specify_worker_mode(WORK_QUEUE_WORKER_MODE_SHARED)

if wq.empty():
    print 'work queue is empty'

outputs = []

for i in range(5):
    ifile = 'msg.%d' % i
    ofile = 'out.%d' % i
    task  = Task('cat < %s > %s' % (ifile, ofile))

    task.specify_tag(str(time.time()))
    print task.command, task.tag
    
    task.specify_algorithm(WORK_QUEUE_SCHEDULE_FILES)
    print task.command, task.algorithm

    task.specify_buffer('hello from %d' % i, ifile, cache=False)
    if i % 2:
	task.specify_output_file(ofile, cache=False)
    else:
	task.specify_file(ofile, type=WORK_QUEUE_OUTPUT, cache=False)

    outputs.append(ofile)
    wq.submit(task)
Example #7
0
def generate_tasks(command, task_inputs, infiles, outfile, tmpdir,
                   max_retries=0):
    """Generate a set of WorkQueue tasks.

    Parameters
    ----------
    command : str
        The shell command to execute on the remote worker.
    task_inputs : list
        List of input data to be sent to each task. Each entry in the list will
        be sent to exactly one task.
    infiles : list of str
        List of task-independent input files. These will be sent along with
        every task and cached on the worker between tasks.
    outfile : str
        Output task file.
    tmpdir : str
        Path to the output file store.

    Returns
    -------
    taskmap : dict of str -> work_queue.Task
        The tasks to run mapped to their tag.
    """
    taskmap = dict()
    for i in range(len(task_inputs)):
        t = Task(command)
        t.specify_tag(f'{str(uuid.uuid4())}_{str(i).zfill(8)}')
        t.specify_max_retries(max_retries)
        t.specify_buffer(json.dumps(task_inputs[i]), remote_name='input.json',
                         flags=WORK_QUEUE_NOCACHE)

        for f in infiles:
            t.specify_input_file(f, remote_name=os.path.basename(f),
                                 flags=WORK_QUEUE_CACHE)

        t.specify_output_file(os.path.join(tmpdir, '_'.join([t.tag, outfile])),
                              remote_name=outfile,
                              flags=WORK_QUEUE_NOCACHE)

        taskmap[t.tag] = t
    return taskmap