def _resolve_scripts(task): if task.get('mode') != 'workflow': if 'script_uri' in task and 'script' not in task: task['script'] = io.fetch({ 'url': task['script_uri'] }) elif 'steps' in task: for step in task['steps']: _resolve_scripts(step['task'])
def convert(type, input, output, fetch=True, status=None, **kwargs): """ Convert data from one format to another. :param type: The type specifier string of the input data. :param input: A binding dict of the form ``{'format': format, 'data', data}``, where ``format`` is the format specifier string, and ``data`` is the raw data to convert. The dict may also be of the form ``{'format': format, 'uri', uri}``, where ``uri`` is the location of the data (see :py:mod:`girder_worker.uri` for URI formats). :param output: A binding of the form ``{'format': format}``, where ``format`` is the format specifier string to convert the data to. The binding may also be in the form ``{'format': format, 'uri', uri}``, where ``uri`` specifies where to place the converted data. :param fetch: Whether to do an initial data fetch before conversion (default ``True``). :returns: The output binding dict with an additional field ``'data'`` containing the converted data. If ``'uri'`` is present in the output binding, instead saves the data to the specified URI and returns the output binding unchanged. """ if fetch: input['data'] = io.fetch(input, **kwargs) if input['format'] == output['format']: data = input['data'] else: data_descriptor = input try: conversion_path = converter_path(Validator(type, input['format']), Validator(type, output['format'])) except NetworkXNoPath: raise Exception('No conversion path from %s/%s to %s/%s' % (type, input['format'], type, output['format'])) # Run data_descriptor through each conversion in the path for conversion in conversion_path: result = run(conversion, {'input': data_descriptor}, auto_convert=False, status=status, **kwargs) data_descriptor = result['output'] data = data_descriptor['data'] if status == JobStatus.CONVERTING_OUTPUT: job_mgr = kwargs.get('_job_manager') _job_status(job_mgr, JobStatus.PUSHING_OUTPUT) io.push(data, output, **kwargs) return output
def run(task, inputs=None, outputs=None, fetch=True, status=None, **kwargs): """ Run a task with the specified I/O bindings. :param task: Specification of the task to run. :type task: dict :param inputs: Specification of how input objects should be fetched into the runtime environment of this task. :type inputs: dict :param outputs: Specification of what should be done with outputs of this task. :type outputs: dict :param write_script: If ``True`` task scripts will be written to file before being passed to ``exec``. This improves interactive debugging with tools such as ``pdb`` at the cost of additional file I/O. Note that when passed to run *all* tasks will be written to file including validation and conversion tasks. :param fetch: If ``True`` will perform a fetch on the input before running the task (default ``True``). :param status: Job status to update to during execution of this task. :type status: girder_worker.utils.JobStatus :returns: A dictionary of the form ``name: binding`` where ``name`` is the name of the output and ``binding`` is an output binding of the form ``{'data': data}``. The ``'data'`` field may be absent if an output URI was provided. Instead, those outputs will be saved to that URI and the output binding will contain the location in the ``'uri'`` field. """ inputs = inputs or {} outputs = outputs or {} task_inputs = {_extractId(d): d for d in task.get('inputs', ())} task_outputs = {_extractId(d): d for d in task.get('outputs', ())} mode = task.get('mode', 'python') if mode not in _task_map: raise Exception('Invalid mode: %s' % mode) job_mgr = kwargs.get('_job_manager') info = { 'task': task, 'task_inputs': task_inputs, 'task_outputs': task_outputs, 'mode': mode, 'inputs': inputs, 'outputs': outputs, 'status': status, 'job_mgr': job_mgr, 'kwargs': kwargs } events.trigger('run.before', info) try: # If some inputs are not there, fill in with defaults _validateInputs(task_inputs, inputs) for name, d in inputs.iteritems(): task_input = task_inputs[name] if task_input.get('stream'): continue # this input will be fetched as a stream if fetch: if status == JobStatus.RUNNING and 'data' not in d: set_job_status(job_mgr, JobStatus.FETCHING_INPUT) d['data'] = io.fetch(d, **dict({'task_input': task_input}, **kwargs)) events.trigger('run.handle_input', { 'info': info, 'task_input': task_input, 'input': d, 'name': name }) if 'script_data' not in d: d['script_data'] = d['data'] for name, task_output in task_outputs.iteritems(): if name not in outputs: outputs[name] = {} # Set the appropriate job status flag set_job_status(job_mgr, status) # Actually run the task for the given mode _task_map[mode]( task=task, inputs=inputs, outputs=outputs, task_inputs=task_inputs, task_outputs=task_outputs, **kwargs) for name, task_output in task_outputs.iteritems(): if task_output.get('stream'): continue # this output has already been sent as a stream output = outputs[name] e = events.trigger('run.handle_output', { 'info': info, 'task_output': task_output, 'output': output, 'outputs': outputs, 'name': name }) if not e.default_prevented: data = outputs[name]['script_data'] if status == JobStatus.RUNNING: set_job_status(job_mgr, JobStatus.PUSHING_OUTPUT) io.push(data, outputs[name], **dict({'task_output': task_output}, **kwargs)) output.pop('script_data', None) events.trigger('run.after', info) return outputs except StateTransitionException: if job_mgr: status = job_mgr.refreshStatus() # If we are canceling we want to stay in that state, otherwise raise # the exception if status != JobStatus.CANCELING: raise else: raise finally: events.trigger('run.finally', info)
def run(task, inputs=None, outputs=None, auto_convert=True, validate=True, fetch=True, status=None, **kwargs): """ Run a task with the specified I/O bindings. :param task: Specification of the task to run. :type task: dict :param inputs: Specification of how input objects should be fetched into the runtime environment of this task. :type inputs: dict :param outputs: Speficiation of what should be done with outputs of this task. :type outputs: dict :param auto_convert: If ``True`` (the default), perform format conversions on inputs and outputs with :py:func:`convert` if they do not match the formats specified in the input and output bindings. If ``False``, an expection is raised for input or output bindings do not match the formats specified in the analysis. :param validate: If ``True`` (the default), perform input and output validation with :py:func:`isvalid` to ensure input bindings are in the appropriate format and outputs generated by the script are formatted correctly. This guards against dirty input as well as buggy scripts that do not produce the correct type of output. An invalid input or output will raise an exception. If ``False``, perform no validation. :param write_script: If ``True`` task scripts will be written to file before being passed to ``exec``. This improves interactive debugging with tools such as ``pdb`` at the cost of additional file I/O. Note that when passed to run *all* tasks will be written to file including validation and conversion tasks. :param fetch: If ``True`` will perform a fetch on the input before running the task (default ``True``). :param status: Job status to update to during execution of this task. :type status: girder_worker.utils.JobStatus :returns: A dictionary of the form ``name: binding`` where ``name`` is the name of the output and ``binding`` is an output binding of the form ``{'format': format, 'data': data}``. If the `outputs` param is specified, the formats of these bindings will match those given in `outputs`. Additionally, ``'data'`` may be absent if an output URI was provided. Instead, those outputs will be saved to that URI and the output binding will contain the location in the ``'uri'`` field. """ def extractId(spec): return spec['id'] if 'id' in spec else spec['name'] if inputs is None: inputs = {} task_inputs = {extractId(d): d for d in task.get('inputs', ())} task_outputs = {extractId(d): d for d in task.get('outputs', ())} mode = task.get('mode', 'python') if mode not in _task_map: raise Exception('Invalid mode: %s' % mode) job_mgr = kwargs.get('_job_manager') info = { 'task': task, 'task_inputs': task_inputs, 'task_outputs': task_outputs, 'mode': mode, 'inputs': inputs, 'outputs': outputs, 'auto_convert': auto_convert, 'validate': validate, 'kwargs': kwargs } events.trigger('run.before', info) try: # If some inputs are not there, fill in with defaults for name, task_input in task_inputs.iteritems(): if name not in inputs: if 'default' in task_input: inputs[name] = task_input['default'] else: raise Exception( 'Required input \'%s\' not provided.' % name) for name, d in inputs.iteritems(): task_input = task_inputs[name] if task_input.get('stream'): continue # this input will be fetched as a stream # Fetch the input if fetch: if status == JobStatus.RUNNING and 'data' not in d: _job_status(job_mgr, JobStatus.FETCHING_INPUT) d['data'] = io.fetch( d, **dict({'task_input': task_input}, **kwargs)) # Validate the input if validate and not isvalid( task_input['type'], d, **dict( {'task_input': task_input, 'fetch': False}, **kwargs)): raise Exception( 'Input %s (Python type %s) is not in the expected type ' '(%s) and format (%s).' % ( name, type(d['data']), task_input['type'], d['format']) ) # Convert data if auto_convert: try: converted = convert( task_input['type'], d, {'format': task_input['format']}, status=JobStatus.CONVERTING_INPUT, **dict( {'task_input': task_input, 'fetch': False}, **kwargs)) except Exception, e: raise Exception('%s: %s' % (name, str(e))) d['script_data'] = converted['data'] elif not validate or (d.get('format', task_input.get('format')) == task_input.get('format')): d['script_data'] = d['data'] else: