def convert(type, input, output, fetch=True, status=None, **kwargs): """ Convert data from one format to another. :param type: The type specifier string of the input data. :param input: A binding dict of the form ``{'format': format, 'data', data}``, where ``format`` is the format specifier string, and ``data`` is the raw data to convert. The dict may also be of the form ``{'format': format, 'uri', uri}``, where ``uri`` is the location of the data (see :py:mod:`girder_worker.uri` for URI formats). :param output: A binding of the form ``{'format': format}``, where ``format`` is the format specifier string to convert the data to. The binding may also be in the form ``{'format': format, 'uri', uri}``, where ``uri`` specifies where to place the converted data. :param fetch: Whether to do an initial data fetch before conversion (default ``True``). :returns: The output binding dict with an additional field ``'data'`` containing the converted data. If ``'uri'`` is present in the output binding, instead saves the data to the specified URI and returns the output binding unchanged. """ if fetch: input['data'] = io.fetch(input, **kwargs) if input['format'] == output['format']: data = input['data'] else: data_descriptor = input try: conversion_path = converter_path(Validator(type, input['format']), Validator(type, output['format'])) except NetworkXNoPath: raise Exception('No conversion path from %s/%s to %s/%s' % (type, input['format'], type, output['format'])) # Run data_descriptor through each conversion in the path for conversion in conversion_path: result = run(conversion, {'input': data_descriptor}, auto_convert=False, status=status, **kwargs) data_descriptor = result['output'] data = data_descriptor['data'] if status == JobStatus.CONVERTING_OUTPUT: job_mgr = kwargs.get('_job_manager') _job_status(job_mgr, JobStatus.PUSHING_OUTPUT) io.push(data, output, **kwargs) return output
def run(task, inputs=None, outputs=None, fetch=True, status=None, **kwargs): """ Run a task with the specified I/O bindings. :param task: Specification of the task to run. :type task: dict :param inputs: Specification of how input objects should be fetched into the runtime environment of this task. :type inputs: dict :param outputs: Specification of what should be done with outputs of this task. :type outputs: dict :param write_script: If ``True`` task scripts will be written to file before being passed to ``exec``. This improves interactive debugging with tools such as ``pdb`` at the cost of additional file I/O. Note that when passed to run *all* tasks will be written to file including validation and conversion tasks. :param fetch: If ``True`` will perform a fetch on the input before running the task (default ``True``). :param status: Job status to update to during execution of this task. :type status: girder_worker.utils.JobStatus :returns: A dictionary of the form ``name: binding`` where ``name`` is the name of the output and ``binding`` is an output binding of the form ``{'data': data}``. The ``'data'`` field may be absent if an output URI was provided. Instead, those outputs will be saved to that URI and the output binding will contain the location in the ``'uri'`` field. """ inputs = inputs or {} outputs = outputs or {} task_inputs = {_extractId(d): d for d in task.get('inputs', ())} task_outputs = {_extractId(d): d for d in task.get('outputs', ())} mode = task.get('mode', 'python') if mode not in _task_map: raise Exception('Invalid mode: %s' % mode) job_mgr = kwargs.get('_job_manager') info = { 'task': task, 'task_inputs': task_inputs, 'task_outputs': task_outputs, 'mode': mode, 'inputs': inputs, 'outputs': outputs, 'status': status, 'job_mgr': job_mgr, 'kwargs': kwargs } events.trigger('run.before', info) try: # If some inputs are not there, fill in with defaults _validateInputs(task_inputs, inputs) for name, d in inputs.iteritems(): task_input = task_inputs[name] if task_input.get('stream'): continue # this input will be fetched as a stream if fetch: if status == JobStatus.RUNNING and 'data' not in d: set_job_status(job_mgr, JobStatus.FETCHING_INPUT) d['data'] = io.fetch(d, **dict({'task_input': task_input}, **kwargs)) events.trigger('run.handle_input', { 'info': info, 'task_input': task_input, 'input': d, 'name': name }) if 'script_data' not in d: d['script_data'] = d['data'] for name, task_output in task_outputs.iteritems(): if name not in outputs: outputs[name] = {} # Set the appropriate job status flag set_job_status(job_mgr, status) # Actually run the task for the given mode _task_map[mode]( task=task, inputs=inputs, outputs=outputs, task_inputs=task_inputs, task_outputs=task_outputs, **kwargs) for name, task_output in task_outputs.iteritems(): if task_output.get('stream'): continue # this output has already been sent as a stream output = outputs[name] e = events.trigger('run.handle_output', { 'info': info, 'task_output': task_output, 'output': output, 'outputs': outputs, 'name': name }) if not e.default_prevented: data = outputs[name]['script_data'] if status == JobStatus.RUNNING: set_job_status(job_mgr, JobStatus.PUSHING_OUTPUT) io.push(data, outputs[name], **dict({'task_output': task_output}, **kwargs)) output.pop('script_data', None) events.trigger('run.after', info) return outputs except StateTransitionException: if job_mgr: status = job_mgr.refreshStatus() # If we are canceling we want to stay in that state, otherwise raise # the exception if status != JobStatus.CANCELING: raise else: raise finally: events.trigger('run.finally', info)
d['format']) ) # We should consider refactoring the logic below, reasoning about # the paths through this code is difficult, since this logic is # entered by 'run', 'isvalid', and 'convert'. if auto_convert: outputs[name] = convert( task_output['type'], script_output, d, status=JobStatus.CONVERTING_OUTPUT, **dict({'task_output': task_output}, **kwargs)) elif not validate or d['format'] == task_output['format']: data = d['script_data'] if status == JobStatus.RUNNING: _job_status(job_mgr, JobStatus.PUSHING_OUTPUT) io.push( data, d, **dict({'task_output': task_output}, **kwargs)) else: raise Exception('Expected exact format match but %s != %s.' % ( d['format'], task_output['format'])) if 'script_data' in outputs[name]: del outputs[name]['script_data'] events.trigger('run.after', info) return outputs finally: events.trigger('run.finally', info)