Ejemplo n.º 1
0
def queue_up(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True):
    """ 
    Submit a job to the Work Queue.

    @param[in] wq (Work Queue Object)
    @param[in] command (string) The command to run on the remote worker.
    @param[in] input_files (list of files) A list of locations of the input files.
    @param[in] output_files (list of files) A list of locations of the output files.
    """
    global WQIDS
    task = work_queue.Task(command)
    cwd = os.getcwd()
    for f in input_files:
        lf = os.path.join(cwd,f)
        task.specify_input_file(lf,f,cache=False)
    for f in output_files:
        lf = os.path.join(cwd,f)
        task.specify_output_file(lf,f,cache=False)
    task.specify_algorithm(work_queue.WORK_QUEUE_SCHEDULE_FCFS)
    if tag == None: tag = command
    task.specify_tag(tag)
    taskid = wq.submit(task)
    if verbose:
        logger.info("Submitting command '%s' to the Work Queue, %staskid %i\n" % (command, "tag %s, " % tag if tag != command else "", taskid))
    if tgt != None:
        WQIDS[tgt.name].append(taskid)
    else:
        WQIDS["None"].append(taskid)
Ejemplo n.º 2
0
def queue_up_src_dest(wq, command, input_files, output_files, tag=None, tgt=None, verbose=True):
    """ 
    Submit a job to the Work Queue.  This function is a bit fancier in that we can explicitly
    specify where the input files come from, and where the output files go to.

    @param[in] wq (Work Queue Object)
    @param[in] command (string) The command to run on the remote worker.
    @param[in] input_files (list of 2-tuples) A list of local and
    remote locations of the input files.
    @param[in] output_files (list of 2-tuples) A list of local and
    remote locations of the output files.
    """
    global WQIDS
    task = work_queue.Task(command)
    for f in input_files:
        # print f[0], f[1]
        task.specify_input_file(f[0],f[1],cache=False)
    for f in output_files:
        # print f[0], f[1]
        task.specify_output_file(f[0],f[1],cache=False)
    task.specify_algorithm(work_queue.WORK_QUEUE_SCHEDULE_FCFS)
    if tag == None: tag = command
    task.specify_tag(tag)
    taskid = wq.submit(task)
    if verbose:
        logger.info("Submitting command '%s' to the Work Queue, taskid %i\n" % (command, taskid))
    if tgt != None:
        WQIDS[tgt.name].append(taskid)
    else:
        WQIDS["None"].append(taskid)
Ejemplo n.º 3
0
    def add_task(self, cmd, tag, params, files=None, resource=None, value=None):
        """Create a task for this manager.

        Parameters
        ----------
        cmd : str
            The command to run on the remote worker, e.g. ``echo hello`` or
            ``python script.py``.
        tag : str
            The tag to give this task.
        files : list of `WQFile` or `WQBuffer`, optional
            The input and output files and data buffers that this task will
            send/receive.

        Notes
        -----
        Any command may be passed to the task to run, and the task makes stdout
        and stderr of the command available upon return, regardless of failure.

        See Also
        --------
        `shadho.managers.workqueue.WQFile`
        `shadho.managers.workqueue.WQBuffer`
        `work_queue.Task`
        """
        task = work_queue.Task(' '.join([cmd, tag]))
        task.specify_tag(tag)

        if files is None:
            files = []

        for f in files:
            if isinstance(f, tuple):
                f = WQFile(f[0], remotepath=f[1], ftype=f[2], cache=f[3])
            f.add_to_task(task)

        out = WQFile(os.path.join(self.tmpdir,
                                  '.'.join([tag, self.out_file])),
                     remotepath=self.out_file,
                     ftype='output',
                     cache=False)

        buff = WQBuffer(str(json.dumps(params)),
                        self.param_file,
                        cache=False)

        out.add_to_task(task)
        buff.add_to_task(task)

        if resource is not None:
            if resource == 'cores':
                task.specify_cores(value)
            elif resource == 'feature':
                task.specify_requirement(value)
            else:
                task.specify_resource(resource, value)

        self.submit(task)
        self.tasks_submitted = self.stats.tasks_submitted
Ejemplo n.º 4
0
def create_work_queue_task(task_counter,
                           tmpdir,
                           function,
                           input_args,
                           fn_wrapper='mdsim.py'):
    """ Returns a Work Queue task to execute the python code output = function(*input_args)
    The python function and input arguments are written to files in the
    directory tmpdir/ using dill to serialize them. These files are used as
    inputs to the Work Queue task.

    The Work Queue task executes the python function as a shell command using
    the fn_wrapper (mdsim.py by default), which reads the input files, converts
    them to valid python values, and writes to a file the python value obtained
    from the function evaluation. This output file is sent back to the Work
    Queue manager, where it can be read and decoded to obtained a valid python
    value. Should an exception occur, it is returned as the value of the function.
    """
    logger.debug("creating task {}: {}({})".format(
        task_counter, function.__name__,
        ','.join(str(arg) for arg in input_args)))

    args_file = os.path.join(tmpdir, "input_args_{}.p".format(task_counter))
    fn_file = os.path.join(tmpdir, "function_{}.p".format(task_counter))
    out_file = os.path.join(tmpdir, "out_{}.p".format(task_counter))

    # Save args to a dilled file.
    with open(args_file, "wb") as wf:
        dill.dump(input_args, wf)

    # Save the function to a dilled file.
    with open(fn_file, "wb") as wf:
        dill.dump(function, wf)

    # Base command just invokes python on the function and data.
    command = "./{wrapper} {fn} {args} {out}".format(
        wrapper=os.path.basename(fn_wrapper),
        fn=os.path.basename(fn_file),
        args=os.path.basename(args_file),
        out=os.path.basename(out_file))

    task = wq.Task(command)
    task.specify_tag(str(task_counter))

    task.specify_input_file(fn_wrapper, cache=True)
    task.specify_input_file(fn_file, cache=False)
    task.specify_input_file(args_file, cache=False)
    task.specify_output_file(out_file, cache=False)

    task.specify_cores(1)
    task.specify_memory(250)  #MB
    task.specify_disk(250)  #MB

    return task
Ejemplo n.º 5
0
    def new_task(self):
        cmd = self.cfg.executable.remotepath
        task = WQ.Task('./' + cmd)

        ### executable
        self.cfg.executable.add_to_task(task)

        ### cached files
        for wqf in self.cfg.getcache:
            wqf.add_to_task(task)

        return task
Ejemplo n.º 6
0
 def submit(self, command, inputfiles, outputfiles):
     command += ' 2>&1'
     task = work_queue.Task(command)
     cwd = os.getcwd()
     for f in inputfiles:
         lf = os.path.join(cwd, f)
         task.specify_input_file(lf, f, cache=False)
     for f in outputfiles:
         lf = os.path.join(cwd, f)
         task.specify_output_file(lf, f, cache=False)
     task.specify_algorithm(work_queue.WORK_QUEUE_SCHEDULE_RAND)
     task.specify_tag(cwd)
     task.print_time = 60
     taskid = self.wq.submit(task)
     return taskid
def create_task_sra(s):
    # Define constant values.
    sample = s['sample_name']
    refid = s['refid']
    sra = s['sra']

    get_log = sample + '_get.txt'
    run_log = sample + '_log.txt'
    p1_path = sra + '_1.fastq'
    p2_path = sra + '_2.fastq'

    # Define tasks.
    get = 'time fastq-dump --split-files %s > %s 2>&1' % (sra, get_log)

    es_cmd = 'time ericscript -p %d -db %s --refid %s -name %s -o ./%s %s %s > %s 2>&1' \
             % (avail_cores, references_remote, refid, sample, sample, p1_path, p2_path, run_log)
    cleanup = 'rm -r ${HOME}/ncbi'
    
    # Create & tag task.
    t = wq.Task('bash -c "' + ' && '.join([get, es_cmd]) + '"')
    t.specify_tag(sample)

    # Specify references folder as a cached input.
    t.specify_directory(references_local, references_remote, wq.WORK_QUEUE_INPUT, recursive=True, cache=True)

    # Specify anticipated outputs.
    # ... logs.
    t.specify_file(os.path.join(resultsdir, get_log), get_log, wq.WORK_QUEUE_OUTPUT, cache=False)  # get log
    t.specify_file(os.path.join(resultsdir, run_log), run_log, wq.WORK_QUEUE_OUTPUT, cache=False)  # ericscript log
    # ... results.
    t.specify_file(os.path.join(resultsdir, sample + '.results.filtered.tsv'),
                   os.path.join(sample, sample + '.results.filtered.tsv'),
                   wq.WORK_QUEUE_OUTPUT, cache=False)  # filtered results
    t.specify_file(os.path.join(resultsdir, sample + '.results.total.tsv'),
                   os.path.join(sample, sample + '.results.total.tsv'),
                   wq.WORK_QUEUE_OUTPUT, cache=False)  # total results
    t.specify_file(os.path.join(resultsdir, sample + '.Summary.RData'),
                   os.path.join(sample, sample + '.Summary.RData'),
                   wq.WORK_QUEUE_OUTPUT, cache=False)  # RData summary

    return t
def create_task_irods(s):
    # Define constant values.
    sample = s['sample_name']
    refid = s['refid']
    p1_irods = s['p1']
    p2_irods = s['p2']

    get_log = sample + '_get.txt'
    run_log = sample + '_log.txt'
    p1_path = os.path.basename(p1_irods)
    p2_path = os.path.basename(p2_irods)

    # Define tasks.
    p1_get = 'time iget -TV %s . > %s 2>&1' % (p1_irods, get_log)
    p2_get = 'time iget -TV %s . >> %s 2>&1' % (p2_irods, get_log)
    es_cmd = 'time ericscript -p %d -db %s --refid %s -name %s -o ./%s %s %s > %s 2>&1' \
             % (avail_cores, references_remote, refid, sample, sample, p1_path, p2_path, run_log)

    # Create & tag task.
    t = wq.Task('bash -c "' + ' && '.join([p1_get, p2_get, es_cmd]) + '"')
    t.specify_tag(sample)

    # Specify references folder as a cached input.
    t.specify_directory(references_local, references_remote, wq.WORK_QUEUE_INPUT, recursive=True, cache=True)

    # Specify anticipated outputs.
    # ... logs.
    t.specify_file(os.path.join(resultsdir, get_log), get_log, wq.WORK_QUEUE_OUTPUT, cache=False)  # get log
    t.specify_file(os.path.join(resultsdir, run_log), run_log, wq.WORK_QUEUE_OUTPUT, cache=False)  # ericscript log
    # ... results.
    t.specify_file(os.path.join(resultsdir, sample + '.results.filtered.tsv'),
                   os.path.join(sample, sample + '.results.filtered.tsv'),
                   wq.WORK_QUEUE_OUTPUT, cache=False)  # filtered results
    t.specify_file(os.path.join(resultsdir, sample + '.results.total.tsv'),
                   os.path.join(sample, sample + '.results.total.tsv'),
                   wq.WORK_QUEUE_OUTPUT, cache=False)  # total results
    t.specify_file(os.path.join(resultsdir, sample + '.Summary.RData'),
                   os.path.join(sample, sample + '.Summary.RData'),
                   wq.WORK_QUEUE_OUTPUT, cache=False)  # RData summary

    return t
Ejemplo n.º 9
0
    def new_task(self):
        """
        Generate a new task object and assign it a program to run. Ensures each
        task has the correct set of supporting files. See WorkQueue.Config for
        information on task files.

        Parameters:
            None

        Returns:
            A new cctools WorkQueue.Task instance
        """

        cmd = self.cfg.executable.remotepath
        task = WQ.Task('./' + cmd)

        ### executable
        self.cfg.executable.add_to_task(task)

        ### cached files
        for wqf in self.cfg.getcache:
            wqf.add_to_task(task)

        return task
Ejemplo n.º 10
0
import work_queue as WQ

# in case we want ${USER} for master name
from os import environ

#### SET MASTER NAME HERE
#### for example: master_name = environ['USER'] + '-my-first-master'
master_name = environ['USER'] + '-my-first-master'

# 1. run at some port at random
q = WQ.WorkQueue(name=master_name, port=0)

# 2. create a tasks that runs a command remotely, and ...
t = WQ.Task('./sim.exe A B')

# ...specify the name of input and output files
t.specify_input_file('sim.exe', cache=True)
t.specify_input_file('A')
t.specify_output_file('B')

# 3. submit the task to the queue
q.submit(t)

# 4. wait for all tasks to finish, 5 second timeout:
while not q.empty():
    t = q.wait(5)
    if t:
        print 'task {} finished'.format(t.id)

Ejemplo n.º 11
0
def work_queue_executor(items, function, accumulator, **kwargs):
    """Execute using Work Queue

    Parameters
    ----------
        items : list
            List of input arguments
        function : callable
            A function to be called on each input, which returns an accumulator instance
        accumulator : AccumulatorABC
            An accumulator to collect the output of the function
        status : bool
            If true (default), enable progress bar
        unit : str
            Label of progress bar unit
        desc : str
            Label of progress bar description
        compression : int, optional
            Compress accumulator outputs in flight with LZ4, at level specified (default 1)
            Set to ``None`` for no compression.

        # work queue specific options:
        environment-file : str
            Python environment to use. Required.
        cores : int
            Number of cores for work queue task. If unset, use a whole worker.
        memory : int
            Amount of memory (in MB) for work queue task. If unset, use a whole worker.
        disk : int
            Amount of disk space (in MB) for work queue task. If unset, use a whole worker.
        resources-mode : one of 'fixed', or 'auto'. Default is 'fixed'.
            'fixed' - allocate cores, memory, and disk specified for each task.
            'auto'  - use cores, memory, and disk as maximum values to allocate.
                      Useful when the resources used by a task are not known, as
                      it lets work queue find an efficient value for maximum
                      throughput.
        debug-log : str
            Filename for debug output
        stats-log : str
            Filename for tasks statistics output
        transactions-log : str
            Filename for tasks lifetime reports output
        master-name : str
            Name to refer to this work queue master.
            Sets port to 0 (any available port) if port not given.
        port : int
            Port number for work queue master program. Defaults to 9123 if
            master-name not given.
        wrapper : str
            Wrapper script to run/open python environment tarball. Defaults to python_package_run found in PATH.
        print-stdout : bool
            If true (default), print the standard output of work queue task on completion.
        queue-mode : one of 'persistent' or 'one-per-stage'. Default is 'persistent'.
            'persistent' - One queue is used for all stages of processing.
            'one-per-stage' - A new queue is used for each of the stages of processing.
        resource-monitor : bool
            If true, (false is the default) turns on resource monitoring for Work Queue.
    """
    try:
        import work_queue as wq
        import tempfile
        import dill
        import os
        from os.path import basename
    except ImportError as e:
        print('You must have Work Queue and dill installed to use work_queue_executor!')
        raise e

    global _wq_queue

    debug_log = kwargs.pop('debug-log', None)
    stats_log = kwargs.pop('stats-log', None)
    trans_log = kwargs.pop('transactions-log', None)

    master_name = kwargs.pop('master-name', None)
    port = kwargs.pop('port', None)
    if port is None:
        if master_name:
            port = 0
        else:
            port = 9123

    queue_mode = kwargs.pop('queue-mode', 'persistent')

    if _wq_queue is None or queue_mode == 'one-per-stage':
        _wq_queue = wq.WorkQueue(port, name=master_name, debug_log=debug_log, stats_log=stats_log, transactions_log=trans_log)

    print('Listening for work queue workers on port {}...'.format(_wq_queue.port))

    unit = kwargs.pop('unit', 'items')
    status = kwargs.pop('status', True)
    desc = kwargs.pop('desc', 'Processing')
    clevel = kwargs.pop('compression', 1)
    filepath = kwargs.pop('filepath', '.')
    output = kwargs.pop('print-stdout', False)

    if clevel is not None:
        function = _compression_wrapper(clevel, function)

    # work queue specific options:
    env_file = kwargs.pop('environment-file', None)
    wrapper = kwargs.pop('wrapper', shutil.which('python_package_run'))

    if not env_file:
        raise TypeError("environment-file argument missing. It should name a conda environment as a tar file.")
    elif not os.path.exists(env_file):
        raise ValueError("environment-file does not name an existing conda environment as a tar file.")

    if not wrapper:
        raise ValueError("Location of python_package_run could not be determined automatically.\nUse 'wrapper' argument to the work_queue_executor.")

    # fixed, or auto
    resources_mode = kwargs.pop('resources-mode', 'fixed')
    cores = kwargs.pop('cores', None)
    memory = kwargs.pop('memory', None)
    disk = kwargs.pop('disk', None)
    resource_monitor = kwargs.pop('resource-monitor', False)

    default_resources = {}
    if cores:
        default_resources['cores'] = cores
    if memory:
        default_resources['memory'] = memory
    if disk:
        default_resources['disk'] = disk

    with tempfile.TemporaryDirectory(prefix="wq-executor-tmp-", dir=filepath) as tmpdir:
        # Pickle function
        with open(os.path.join(tmpdir, 'function.p'), 'wb') as wf:
            dill.dump(function, wf)

        # Set up Work Queue
        command_path = _coffea_fn_as_file_wrapper(tmpdir)

        if resource_monitor:
            _wq_queue.enable_monitoring()

        _wq_queue.specify_category_max_resources('default', default_resources)
        if resources_mode == 'auto':
            _wq_queue.tune('category-steady-n-tasks', 3)
            _wq_queue.specify_category_max_resources('default', {})
            _wq_queue.specify_category_mode('default', wq.WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT)

        # Define function input here
        infile_function = os.path.join(tmpdir, 'function.p')

        # Dictionary to keep track of output file corresponding to task id
        id_output = {}

        # Iterative Executor Specifications
        if len(items) == 0:
            return accumulator

        add_fn = _iadd

        for i, item in tqdm(enumerate(items), disable=not status, unit=unit, total=len(items), desc=desc):
            with open(os.path.join(tmpdir, 'item_{}.p'.format(i)), 'wb') as wf:
                dill.dump(item, wf)

            infile_item = os.path.join(tmpdir, 'item_{}.p'.format(i))
            outfile = os.path.join(tmpdir, 'output_{}.p'.format(i))

            coffea_command = 'python {} {} {} {}'.format(basename(command_path), basename(infile_function), basename(infile_item), basename(outfile))
            wrapped_command = './{}'.format(basename(wrapper))
            wrapped_command += ' --environment {}'.format(basename(env_file))
            wrapped_command += ' --unpack-to "$WORK_QUEUE_SANDBOX"/{}-env {}'.format(env_file, coffea_command)

            t = wq.Task(wrapped_command)
            t.specify_category('default')

            t.specify_input_file(command_path, cache=True)
            t.specify_input_file(infile_function, cache=False)
            t.specify_input_file(infile_item, cache=False)

            # conda environment files
            t.specify_input_file(env_file, cache=True)
            t.specify_input_file(wrapper, cache=True)

            if re.search('://', item.filename):
                # This looks like an URL. Not transfering file.
                pass
            else:
                t.specify_input_file(item.filename, remote_name=item.filename, cache=True)

            t.specify_output_file(outfile, cache=False)

            task_id = _wq_queue.submit(t)
            # Add pair to dict
            id_output['{}'.format(task_id)] = outfile

            print('Submitted task (id #{}): {}'.format(task_id, wrapped_command))

        print('Waiting for tasks to complete...')

        while not _wq_queue.empty():
            t = _wq_queue.wait(5)
            if t:
                print('Task (id #{}) complete: {} (return code {})'.format(t.id, t.command, t.return_status))

                if output:
                    print('Output:\n{}'.format(t.output))
                    print('allocated cores: {}, memory: {} MB, disk: {} MB'.format(
                        t.resources_allocated.cores,
                        t.resources_allocated.memory,
                        t.resources_allocated.disk))
                    if resource_monitor:
                        print('measured cores: {}, memory: {} MB, disk {} MB, runtime {}'.format(
                            t.resources_measured.cores,
                            t.resources_measured.memory,
                            t.resources_measured.disk,
                            t.resources_measured.wall_time / 1000000))

                if t.result != 0:
                    print('Task id #{} failed with code: {}'.format(t.id, t.result))
                    print('Stopping execution')
                    break

                # Unpickle output, add to accumulator
                with open(id_output['{}'.format(t.id)], 'rb') as rf:
                    unpickle_output = dill.load(rf)

                add_fn(accumulator, unpickle_output)

        if os.path.exists(command_path):
            os.remove(command_path)

        return accumulator
#!/usr/bin/env python

import work_queue
import os

work_queue.set_debug_flag('all')

wq = work_queue.WorkQueue(port=work_queue.WORK_QUEUE_RANDOM_PORT, exclusive=False, shutdown=True)
wq.specify_name('test')

for i in range(5):
    task = work_queue.Task('date')
    task.specify_algorithm(work_queue.WORK_QUEUE_SCHEDULE_FCFS)
    task.specify_tag('current date/time [%d]' % i)
    task.specify_input_file('/bin/date')

    print task.id
    print task.algorithm
    print task.command
    print task.tag

    wq.submit(task)

os.environ['PATH'] = '../../../dttools/src:' + os.environ['PATH']
os.system('work_queue_worker -d all -t 5 localhost %d &' % wq.port)

while not wq.empty():
    print '** wait for task'
    task = wq.wait(1)
    if task:
    	print 'task'
Ejemplo n.º 13
0
shutil.copyfile('/bin/cat', path.join(test_dir, exec_file))
os.chmod(path.join(test_dir, exec_file), stat.S_IRWXU)

q = wq.WorkQueue(0)

with open(port_file, 'w') as f:
    print('Writing port {port} to file {file}'.format(port=q.port,
                                                      file=port_file))
    f.write(str(q.port))

# simple task
# define a task, sending stderr to console, and stdout to output
output = output_file()
t = wq.Task("./{exe} {input} 2>&1 > {output}".format(exe=exec_file,
                                                     input=input_file,
                                                     output=output))
t.specify_input_file(path.join(test_dir, exec_file), exec_file)
t.specify_input_file(path.join(test_dir, input_file), input_file)
t.specify_output_file(path.join(test_dir, output), output)

q.submit(t)
t = q.wait(5)
report_task(t, wq.WORK_QUEUE_RESULT_SUCCESS, 0, [path.join(test_dir, output)])

# same simple task, but now we send the directory as an input
output = output_file()
t = wq.Task("cd my_dir && ./{exe} {input} 2>&1 > {output}".format(
    exe=exec_file, input=input_file, output=output))
t.specify_directory(test_dir, 'my_dir', recursive=True)
t.specify_output_file(path.join(test_dir, output), path.join('my_dir', output))
Ejemplo n.º 14
0
PORT = 9199
TASKS = 100
ALPHABET = string.ascii_lowercase + string.digits

if __name__ == '__main__':
    JOURNAL = json.load(open('journal.json'))

    queue = work_queue.WorkQueue(PORT, name='hulk-amunch', catalog=True)
    queue.specify_log('fury.log')

    for num in range(1, 6):
        command = './hulk.py -l {}'.format(num)
        if command in JOURNAL:
            print >> sys.stderr, 'Already did', command
        else:
            task = work_queue.Task(command)
            for source in ('hulk.py', HASHES):
                task.specify_file(source, source, work_queue.WORK_QUEUE_INPUT)

            queue.submit(task)
    for num in range(1, 4):
        for prefix in itertools.product(ALPHABET, repeat=int(num)):
            prefix = ''.join(prefix)
            command = './hulk.py -l 5 -p {}'.format(prefix)
            if command in JOURNAL:
                print >> sys.stderr, 'Already did', command
            else:
                task = work_queue.Task(command)
                for source in ('hulk.py', HASHES):
                    task.specify_file(source, source,
                                      work_queue.WORK_QUEUE_INPUT)
Ejemplo n.º 15
0
    def sprint(self):
        with util.PartiallyMutable.unlock():
            self.source = TaskProvider(self.config)
        action = actions.Actions(self.config, self.source)

        logger.info("using wq from {0}".format(wq.__file__))
        logger.info("running Lobster version {0}".format(util.get_version()))
        logger.info("current PID is {0}".format(os.getpid()))

        wq.cctools_debug_flags_set("all")
        wq.cctools_debug_config_file(
            os.path.join(self.config.workdir, "work_queue_debug.log"))
        wq.cctools_debug_config_file_size(1 << 29)

        self.queue = wq.WorkQueue(self.config.advanced.wq_port)
        self.queue.specify_min_taskid(self.source.max_taskid() + 1)
        self.queue.specify_log(
            os.path.join(self.config.workdir, "work_queue.log"))
        self.queue.specify_transactions_log(
            os.path.join(self.config.workdir, "transactions.log"))
        self.queue.specify_name("lobster_" + self.config.label)
        self.queue.specify_keepalive_timeout(300)
        # self.queue.tune("short-timeout", 600)
        self.queue.tune("transfer-outlier-factor", 4)
        self.queue.specify_algorithm(wq.WORK_QUEUE_SCHEDULE_RAND)
        if self.config.advanced.full_monitoring:
            self.queue.enable_monitoring_full(None)
        else:
            self.queue.enable_monitoring(None)

        logger.info("starting queue as {0}".format(self.queue.name))

        abort_active = False
        abort_threshold = self.config.advanced.abort_threshold
        abort_multiplier = self.config.advanced.abort_multiplier

        wq_max_retries = self.config.advanced.wq_max_retries

        if util.checkpoint(self.config.workdir, 'KILLED') == 'PENDING':
            util.register_checkpoint(self.config.workdir, 'KILLED', 'RESTART')

        # time in seconds to wait for WQ to return tasks, with minimum wait
        # time in case no more tasks are waiting
        interval = 120
        interval_minimum = 30

        tasks_left = 0
        units_left = 0
        successful_tasks = 0

        categories = []

        self.setup_logging('all')
        # Workflows can be assigned categories, with each category having
        # different cpu/memory/walltime requirements that WQ will automatically
        # fine-tune
        for category in self.config.categories:
            constraints = category.wq()
            if category.name != 'merge':
                categories.append(category.name)
                self.setup_logging(category.name)
            self.queue.specify_category_mode(category.name, category.mode)
            if category.mode == wq.WORK_QUEUE_ALLOCATION_MODE_FIXED:
                self.queue.specify_category_max_resources(
                    category.name, constraints)
            else:
                self.queue.specify_category_first_allocation_guess(
                    category.name, constraints)
            logger.debug('Category {0}: {1}'.format(category.name,
                                                    constraints))
            if 'wall_time' not in constraints:
                self.queue.activate_fast_abort_category(
                    category.name, abort_multiplier)

        proxy_email_sent = False
        while not self.source.done():
            with self.measure('status'):
                tasks_left = self.source.tasks_left()
                units_left = self.source.work_left()

                logger.debug("expecting {0} tasks, still".format(tasks_left))
                self.queue.specify_num_tasks_left(tasks_left)

                for c in categories + ['all']:
                    self.log(c, units_left)

                if util.checkpoint(self.config.workdir, 'KILLED') == 'PENDING':
                    util.register_checkpoint(self.config.workdir, 'KILLED',
                                             str(datetime.datetime.utcnow()))

                    # let the task source shut down gracefully
                    logger.info("terminating task source")
                    self.source.terminate()
                    logger.info("terminating gracefully")
                    break

            with self.measure('create'):
                have = {}
                for c in categories:
                    cstats = self.queue.stats_category(c)
                    have[c] = {
                        'running': cstats.tasks_running,
                        'queued': cstats.tasks_waiting
                    }

                stats = self.queue.stats_hierarchy
                tasks = self.source.obtain(stats.total_cores, have)

                expiry = None
                if self.config.advanced.proxy:
                    expiry = self.config.advanced.proxy.expires()
                    proxy_time_left = self.config.advanced.proxy.time_left()
                    if proxy_time_left >= 24 * 3600:
                        proxy_email_sent = False
                    if proxy_time_left < 24 * 3600 and not proxy_email_sent:
                        util.sendemail(
                            "Your proxy is about to expire.\n" + "Timeleft: " +
                            str(datetime.timedelta(seconds=proxy_time_left)),
                            self.config)
                        proxy_email_sent = True

                for category, cmd, id, inputs, outputs, env, dir in tasks:
                    task = wq.Task(cmd)
                    task.specify_category(category)
                    task.specify_tag(id)
                    task.specify_max_retries(wq_max_retries)
                    task.specify_monitor_output(
                        os.path.join(dir, 'resource_monitor'))

                    for k, v in env.items():
                        task.specify_environment_variable(k, v)

                    for (local, remote, cache) in inputs:
                        cache_opt = wq.WORK_QUEUE_CACHE if cache else wq.WORK_QUEUE_NOCACHE
                        if os.path.isfile(local) or os.path.isdir(local):
                            task.specify_input_file(str(local), str(remote),
                                                    cache_opt)
                        else:
                            logger.critical(
                                "cannot send file to worker: {0}".format(
                                    local))
                            raise NotImplementedError

                    for (local, remote) in outputs:
                        task.specify_output_file(str(local), str(remote))

                    if expiry:
                        task.specify_end_time(expiry * 10**6)
                    self.queue.submit(task)

            with self.measure('status'):
                stats = self.queue.stats_hierarchy
                logger.info(
                    "{0} out of {1} workers busy; {2} tasks running, {3} waiting; {4} units left"
                    .format(stats.workers_busy,
                            stats.workers_busy + stats.workers_ready,
                            stats.tasks_running, stats.tasks_waiting,
                            units_left))

            with self.measure('update'):
                self.source.update(self.queue)

            # recurring actions are triggered here; plotting etc should run
            # while we have WQ hand us back tasks w/o any database
            # interaction
            with self.measure('action'):
                if action:
                    action.take()

            with self.measure('fetch'):
                starttime = time.time()
                task = self.queue.wait(interval)
                tasks = []
                while task:
                    if task.return_status == 0:
                        successful_tasks += 1
                    elif task.return_status in self.config.advanced.bad_exit_codes:
                        logger.warning(
                            "blacklisting host {0} due to bad exit code from task {1}"
                            .format(task.hostname, task.tag))
                        self.queue.blacklist(task.hostname)
                    tasks.append(task)

                    remaining = int(starttime + interval - time.time())
                    if (interval - remaining < interval_minimum
                            or self.queue.stats.tasks_waiting > 0
                        ) and remaining > 0:
                        task = self.queue.wait(remaining)
                    else:
                        task = None
                # TODO do we really need this?  We have everything based on
                # categories by now, so this should not be needed.
                if abort_threshold > 0 and successful_tasks >= abort_threshold and not abort_active:
                    logger.info(
                        "activating fast abort with multiplier: {0}".format(
                            abort_multiplier))
                    abort_active = True
                    self.queue.activate_fast_abort(abort_multiplier)
            if len(tasks) > 0:
                try:
                    with self.measure('return'):
                        self.source.release(tasks)
                except Exception:
                    tb = traceback.format_exc()
                    logger.critical(
                        "cannot recover from the following exception:\n" + tb)
                    util.sendemail(
                        "Your Lobster project has crashed from the following exception:\n"
                        + tb, self.config)
                    for task in tasks:
                        logger.critical(
                            "tried to return task {0} from {1}".format(
                                task.tag, task.hostname))
                    raise
        if units_left == 0:
            logger.info("no more work left to do")
            util.sendemail("Your Lobster project is done!", self.config)
            if self.config.elk:
                self.config.elk.end()
            if action:
                action.take(True)
Ejemplo n.º 16
0
    # ts = q.wait(wq.WORK_QUEUE_WAITFORTASK)

    time.sleep(2)

    seq = os.path.isdir('/proc/' + str(pid))
    if not seq:
        sys.exit()

    nfiles = glob.glob(wdir + '/*.cmd')

    for file in nfiles:
        command = readCMD(file)
        removeCMD(file)
        cmDict = parseCMD(command)

        t = wq.Task(cmDict['CMD'])

        t.specify_cores(int(cmDict['wqCMD']['-num_threads']))
        t.specify_algorithm(wq.WORK_QUEUE_SCHEDULE_FILES)
        # t.specify_memory(mem)

        t.specify_file('/usr/local/bin/'+cmDict['BLAST'], cmDict['BLAST'], \
                       wq.WORK_QUEUE_INPUT, cache=True)
        t.specify_file(cmDict['sqCMD']['SEQ'], cmDict['wqCMD']['-query'].strip("'"), \
                       wq.WORK_QUEUE_INPUT, cache=True)

        t.specify_file(cmDict['sqCMD']['REP'], cmDict['wqIO']['>'], \
                       wq.WORK_QUEUE_OUTPUT, cache=True)
        # t.specify_file(cmDict['sqCMD']['LOG'], cmDict['wqIO']['2>'], \
        #                wq.WORK_QUEUE_OUTPUT, cache=False)
Ejemplo n.º 17
0
q.specify_transactions_log('my_transactions.log')

print 'WorkQueue on port: {}'.format(q.port)

# enable the measuring of resources
q.enable_monitoring()

# create a category for all tasks
q.specify_category_max_resources('my-tasks', {'cores': 1, 'disk': 500})
q.specify_category_mode('my-tasks',
                        WQ.WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT)

# create 30 tasks. Each task simply creates a 200MB file, using 10MB of memory
# buffer.
for i in range(0, 30):
    t = WQ.Task('python task.py')
    t.specify_input_file('task.py', cache=True)
    t.specify_category('my-tasks')
    t.specify_max_retries(2)
    q.submit(t)

# create a task that will break the limits set
t = WQ.Task('python task.py 1000')
t.specify_input_file('task.py', cache=True)
t.specify_category('my-tasks')
t.specify_max_retries(2)
q.submit(t)

# wait for task to finish
while not q.empty():
    t = q.wait(60)