Example #1
0
def convert(type, input, output, fetch=True, status=None, **kwargs):
    """
    Convert data from one format to another.

    :param type: The type specifier string of the input data.
    :param input: A binding dict of the form
        ``{'format': format, 'data', data}``, where ``format`` is the format
        specifier string, and ``data`` is the raw data to convert.
        The dict may also be of the form
        ``{'format': format, 'uri', uri}``, where ``uri`` is the location of
        the data (see :py:mod:`girder_worker.uri` for URI formats).
    :param output: A binding of the form
        ``{'format': format}``, where ``format`` is the format
        specifier string to convert the data to.
        The binding may also be in the form
        ``{'format': format, 'uri', uri}``, where ``uri`` specifies
        where to place the converted data.
    :param fetch: Whether to do an initial data fetch before conversion
        (default ``True``).
    :returns: The output binding
        dict with an additional field ``'data'`` containing the converted data.
        If ``'uri'`` is present in the output binding, instead saves the data
        to the specified URI and
        returns the output binding unchanged.
    """
    if fetch:
        input['data'] = io.fetch(input, **kwargs)

    if input['format'] == output['format']:
        data = input['data']
    else:
        data_descriptor = input
        try:
            conversion_path = converter_path(Validator(type, input['format']),
                                             Validator(type, output['format']))
        except NetworkXNoPath:
            raise Exception('No conversion path from %s/%s to %s/%s' %
                            (type, input['format'], type, output['format']))

        # Run data_descriptor through each conversion in the path
        for conversion in conversion_path:
            result = run(conversion, {'input': data_descriptor},
                         auto_convert=False, status=status,
                         **kwargs)
            data_descriptor = result['output']
        data = data_descriptor['data']

    if status == JobStatus.CONVERTING_OUTPUT:
        job_mgr = kwargs.get('_job_manager')
        _job_status(job_mgr, JobStatus.PUSHING_OUTPUT)
    io.push(data, output, **kwargs)
    return output
Example #2
0
def run(task, inputs=None, outputs=None, fetch=True, status=None, **kwargs):
    """
    Run a task with the specified I/O bindings.

    :param task: Specification of the task to run.
    :type task: dict
    :param inputs: Specification of how input objects should be fetched
        into the runtime environment of this task.
    :type inputs: dict
    :param outputs: Specification of what should be done with outputs
        of this task.
    :type outputs: dict
    :param write_script: If ``True`` task scripts will be written to file before
        being passed to ``exec``. This improves interactive debugging with
        tools such as ``pdb`` at the cost of additional file I/O. Note that
        when passed to run *all* tasks will be written to file including
        validation and conversion tasks.
    :param fetch: If ``True`` will perform a fetch on the input before
        running the task (default ``True``).
    :param status: Job status to update to during execution of this task.
    :type status: girder_worker.utils.JobStatus
    :returns: A dictionary of the form ``name: binding`` where ``name`` is
        the name of the output and ``binding`` is an output binding of the form
        ``{'data': data}``. The ``'data'`` field may be absent if an output URI
        was provided. Instead, those outputs will be saved to that URI and the
        output binding will contain the location in the ``'uri'`` field.
    """
    inputs = inputs or {}
    outputs = outputs or {}

    task_inputs = {_extractId(d): d for d in task.get('inputs', ())}
    task_outputs = {_extractId(d): d for d in task.get('outputs', ())}
    mode = task.get('mode', 'python')

    if mode not in _task_map:
        raise Exception('Invalid mode: %s' % mode)

    job_mgr = kwargs.get('_job_manager')

    info = {
        'task': task,
        'task_inputs': task_inputs,
        'task_outputs': task_outputs,
        'mode': mode,
        'inputs': inputs,
        'outputs': outputs,
        'status': status,
        'job_mgr': job_mgr,
        'kwargs': kwargs
    }
    events.trigger('run.before', info)

    try:
        # If some inputs are not there, fill in with defaults
        _validateInputs(task_inputs, inputs)

        for name, d in inputs.iteritems():
            task_input = task_inputs[name]
            if task_input.get('stream'):
                continue  # this input will be fetched as a stream

            if fetch:
                if status == JobStatus.RUNNING and 'data' not in d:
                    set_job_status(job_mgr, JobStatus.FETCHING_INPUT)
                d['data'] = io.fetch(d, **dict({'task_input': task_input}, **kwargs))

            events.trigger('run.handle_input', {
                'info': info,
                'task_input': task_input,
                'input': d,
                'name': name
            })

            if 'script_data' not in d:
                d['script_data'] = d['data']

        for name, task_output in task_outputs.iteritems():
            if name not in outputs:
                outputs[name] = {}

        # Set the appropriate job status flag
        set_job_status(job_mgr, status)

        # Actually run the task for the given mode
        _task_map[mode](
            task=task, inputs=inputs, outputs=outputs, task_inputs=task_inputs,
            task_outputs=task_outputs, **kwargs)

        for name, task_output in task_outputs.iteritems():
            if task_output.get('stream'):
                continue  # this output has already been sent as a stream

            output = outputs[name]
            e = events.trigger('run.handle_output', {
                'info': info,
                'task_output': task_output,
                'output': output,
                'outputs': outputs,
                'name': name
            })

            if not e.default_prevented:
                data = outputs[name]['script_data']

                if status == JobStatus.RUNNING:
                    set_job_status(job_mgr, JobStatus.PUSHING_OUTPUT)
                io.push(data, outputs[name], **dict({'task_output': task_output}, **kwargs))

            output.pop('script_data', None)

        events.trigger('run.after', info)

        return outputs
    except StateTransitionException:
        if job_mgr:
            status = job_mgr.refreshStatus()
            # If we are canceling we want to stay in that state, otherwise raise
            # the exception
            if status != JobStatus.CANCELING:
                raise
        else:
            raise
    finally:
        events.trigger('run.finally', info)
Example #3
0
                        d['format'])
                    )

            # We should consider refactoring the logic below, reasoning about
            # the paths through this code is difficult, since this logic is
            # entered by 'run', 'isvalid', and 'convert'.
            if auto_convert:
                outputs[name] = convert(
                    task_output['type'], script_output, d,
                    status=JobStatus.CONVERTING_OUTPUT,
                    **dict({'task_output': task_output}, **kwargs))
            elif not validate or d['format'] == task_output['format']:
                data = d['script_data']

                if status == JobStatus.RUNNING:
                    _job_status(job_mgr, JobStatus.PUSHING_OUTPUT)
                io.push(
                    data, d, **dict({'task_output': task_output}, **kwargs))
            else:
                raise Exception('Expected exact format match but %s != %s.' % (
                    d['format'], task_output['format']))

            if 'script_data' in outputs[name]:
                del outputs[name]['script_data']

        events.trigger('run.after', info)

        return outputs
    finally:
        events.trigger('run.finally', info)