Exemple #1
0
def _resolve_scripts(task):
    if task.get('mode') != 'workflow':
        if 'script_uri' in task and 'script' not in task:
            task['script'] = io.fetch({
                'url': task['script_uri']
            })
    elif 'steps' in task:
        for step in task['steps']:
            _resolve_scripts(step['task'])
def _resolve_scripts(task):
    if task.get('mode') != 'workflow':
        if 'script_uri' in task and 'script' not in task:
            task['script'] = io.fetch({
                'url': task['script_uri']
            })
    elif 'steps' in task:
        for step in task['steps']:
            _resolve_scripts(step['task'])
def convert(type, input, output, fetch=True, status=None, **kwargs):
    """
    Convert data from one format to another.

    :param type: The type specifier string of the input data.
    :param input: A binding dict of the form
        ``{'format': format, 'data', data}``, where ``format`` is the format
        specifier string, and ``data`` is the raw data to convert.
        The dict may also be of the form
        ``{'format': format, 'uri', uri}``, where ``uri`` is the location of
        the data (see :py:mod:`girder_worker.uri` for URI formats).
    :param output: A binding of the form
        ``{'format': format}``, where ``format`` is the format
        specifier string to convert the data to.
        The binding may also be in the form
        ``{'format': format, 'uri', uri}``, where ``uri`` specifies
        where to place the converted data.
    :param fetch: Whether to do an initial data fetch before conversion
        (default ``True``).
    :returns: The output binding
        dict with an additional field ``'data'`` containing the converted data.
        If ``'uri'`` is present in the output binding, instead saves the data
        to the specified URI and
        returns the output binding unchanged.
    """
    if fetch:
        input['data'] = io.fetch(input, **kwargs)

    if input['format'] == output['format']:
        data = input['data']
    else:
        data_descriptor = input
        try:
            conversion_path = converter_path(Validator(type, input['format']),
                                             Validator(type, output['format']))
        except NetworkXNoPath:
            raise Exception('No conversion path from %s/%s to %s/%s' %
                            (type, input['format'], type, output['format']))

        # Run data_descriptor through each conversion in the path
        for conversion in conversion_path:
            result = run(conversion, {'input': data_descriptor},
                         auto_convert=False, status=status,
                         **kwargs)
            data_descriptor = result['output']
        data = data_descriptor['data']

    if status == JobStatus.CONVERTING_OUTPUT:
        job_mgr = kwargs.get('_job_manager')
        _job_status(job_mgr, JobStatus.PUSHING_OUTPUT)
    io.push(data, output, **kwargs)
    return output
Exemple #4
0
def run(task, inputs=None, outputs=None, fetch=True, status=None, **kwargs):
    """
    Run a task with the specified I/O bindings.

    :param task: Specification of the task to run.
    :type task: dict
    :param inputs: Specification of how input objects should be fetched
        into the runtime environment of this task.
    :type inputs: dict
    :param outputs: Specification of what should be done with outputs
        of this task.
    :type outputs: dict
    :param write_script: If ``True`` task scripts will be written to file before
        being passed to ``exec``. This improves interactive debugging with
        tools such as ``pdb`` at the cost of additional file I/O. Note that
        when passed to run *all* tasks will be written to file including
        validation and conversion tasks.
    :param fetch: If ``True`` will perform a fetch on the input before
        running the task (default ``True``).
    :param status: Job status to update to during execution of this task.
    :type status: girder_worker.utils.JobStatus
    :returns: A dictionary of the form ``name: binding`` where ``name`` is
        the name of the output and ``binding`` is an output binding of the form
        ``{'data': data}``. The ``'data'`` field may be absent if an output URI
        was provided. Instead, those outputs will be saved to that URI and the
        output binding will contain the location in the ``'uri'`` field.
    """
    inputs = inputs or {}
    outputs = outputs or {}

    task_inputs = {_extractId(d): d for d in task.get('inputs', ())}
    task_outputs = {_extractId(d): d for d in task.get('outputs', ())}
    mode = task.get('mode', 'python')

    if mode not in _task_map:
        raise Exception('Invalid mode: %s' % mode)

    job_mgr = kwargs.get('_job_manager')

    info = {
        'task': task,
        'task_inputs': task_inputs,
        'task_outputs': task_outputs,
        'mode': mode,
        'inputs': inputs,
        'outputs': outputs,
        'status': status,
        'job_mgr': job_mgr,
        'kwargs': kwargs
    }
    events.trigger('run.before', info)

    try:
        # If some inputs are not there, fill in with defaults
        _validateInputs(task_inputs, inputs)

        for name, d in inputs.iteritems():
            task_input = task_inputs[name]
            if task_input.get('stream'):
                continue  # this input will be fetched as a stream

            if fetch:
                if status == JobStatus.RUNNING and 'data' not in d:
                    set_job_status(job_mgr, JobStatus.FETCHING_INPUT)
                d['data'] = io.fetch(d, **dict({'task_input': task_input}, **kwargs))

            events.trigger('run.handle_input', {
                'info': info,
                'task_input': task_input,
                'input': d,
                'name': name
            })

            if 'script_data' not in d:
                d['script_data'] = d['data']

        for name, task_output in task_outputs.iteritems():
            if name not in outputs:
                outputs[name] = {}

        # Set the appropriate job status flag
        set_job_status(job_mgr, status)

        # Actually run the task for the given mode
        _task_map[mode](
            task=task, inputs=inputs, outputs=outputs, task_inputs=task_inputs,
            task_outputs=task_outputs, **kwargs)

        for name, task_output in task_outputs.iteritems():
            if task_output.get('stream'):
                continue  # this output has already been sent as a stream

            output = outputs[name]
            e = events.trigger('run.handle_output', {
                'info': info,
                'task_output': task_output,
                'output': output,
                'outputs': outputs,
                'name': name
            })

            if not e.default_prevented:
                data = outputs[name]['script_data']

                if status == JobStatus.RUNNING:
                    set_job_status(job_mgr, JobStatus.PUSHING_OUTPUT)
                io.push(data, outputs[name], **dict({'task_output': task_output}, **kwargs))

            output.pop('script_data', None)

        events.trigger('run.after', info)

        return outputs
    except StateTransitionException:
        if job_mgr:
            status = job_mgr.refreshStatus()
            # If we are canceling we want to stay in that state, otherwise raise
            # the exception
            if status != JobStatus.CANCELING:
                raise
        else:
            raise
    finally:
        events.trigger('run.finally', info)
def run(task, inputs=None, outputs=None, auto_convert=True, validate=True,
        fetch=True, status=None, **kwargs):
    """
    Run a task with the specified I/O bindings.

    :param task: Specification of the task to run.
    :type task: dict
    :param inputs: Specification of how input objects should be fetched
        into the runtime environment of this task.
    :type inputs: dict
    :param outputs: Speficiation of what should be done with outputs
        of this task.
    :type outputs: dict
    :param auto_convert: If ``True`` (the default), perform format conversions
        on inputs and outputs with :py:func:`convert` if they do not
        match the formats specified in the input and output bindings.
        If ``False``, an expection is raised for input or output bindings
        do not match the formats specified in the analysis.
    :param validate: If ``True`` (the default), perform input and output
        validation with :py:func:`isvalid` to ensure input bindings are in the
        appropriate format and outputs generated by the script are
        formatted correctly. This guards against dirty input as well as
        buggy scripts that do not produce the correct type of output. An
        invalid input or output will raise an exception. If ``False``, perform
        no validation.
    :param write_script: If ``True`` task scripts will be written to file before
        being passed to ``exec``. This improves interactive debugging with
        tools such as ``pdb`` at the cost of additional file I/O. Note that
        when passed to run *all* tasks will be written to file including
        validation and conversion tasks.
    :param fetch: If ``True`` will perform a fetch on the input before
        running the task (default ``True``).
    :param status: Job status to update to during execution of this task.
    :type status: girder_worker.utils.JobStatus
    :returns: A dictionary of the form ``name: binding`` where ``name`` is
        the name of the output and ``binding`` is an output binding of the form
        ``{'format': format, 'data': data}``. If the `outputs` param
        is specified, the formats of these bindings will match those given in
        `outputs`. Additionally, ``'data'`` may be absent if an output URI
        was provided. Instead, those outputs will be saved to that URI and
        the output binding will contain the location in the ``'uri'`` field.
    """
    def extractId(spec):
        return spec['id'] if 'id' in spec else spec['name']

    if inputs is None:
        inputs = {}

    task_inputs = {extractId(d): d for d in task.get('inputs', ())}
    task_outputs = {extractId(d): d for d in task.get('outputs', ())}
    mode = task.get('mode', 'python')

    if mode not in _task_map:
        raise Exception('Invalid mode: %s' % mode)

    job_mgr = kwargs.get('_job_manager')

    info = {
        'task': task,
        'task_inputs': task_inputs,
        'task_outputs': task_outputs,
        'mode': mode,
        'inputs': inputs,
        'outputs': outputs,
        'auto_convert': auto_convert,
        'validate': validate,
        'kwargs': kwargs
    }
    events.trigger('run.before', info)

    try:
        # If some inputs are not there, fill in with defaults
        for name, task_input in task_inputs.iteritems():
            if name not in inputs:
                if 'default' in task_input:
                    inputs[name] = task_input['default']
                else:
                    raise Exception(
                        'Required input \'%s\' not provided.' % name)

        for name, d in inputs.iteritems():
            task_input = task_inputs[name]
            if task_input.get('stream'):
                continue  # this input will be fetched as a stream

            # Fetch the input
            if fetch:
                if status == JobStatus.RUNNING and 'data' not in d:
                    _job_status(job_mgr, JobStatus.FETCHING_INPUT)
                d['data'] = io.fetch(
                    d, **dict({'task_input': task_input}, **kwargs))

            # Validate the input
            if validate and not isvalid(
                    task_input['type'], d,
                    **dict(
                        {'task_input': task_input, 'fetch': False}, **kwargs)):
                raise Exception(
                    'Input %s (Python type %s) is not in the expected type '
                    '(%s) and format (%s).' % (
                        name, type(d['data']), task_input['type'], d['format'])
                    )

            # Convert data
            if auto_convert:
                try:
                    converted = convert(
                        task_input['type'], d, {'format': task_input['format']},
                        status=JobStatus.CONVERTING_INPUT,
                        **dict(
                            {'task_input': task_input, 'fetch': False},
                            **kwargs))
                except Exception, e:
                    raise Exception('%s: %s' % (name, str(e)))

                d['script_data'] = converted['data']
            elif not validate or (d.get('format', task_input.get('format')) ==
                                  task_input.get('format')):
                d['script_data'] = d['data']
            else: