def test_workflow_with_generated_task_classes(self): wf = specs.Workflow() AddTwo = spec_class_generator('AddTwo', self.add_two) AddThree = spec_class_generator('AddThree', self.add_three) Multiply = spec_class_generator('Multiply', self.multiply) wf.add_task(AddTwo(), 'a2') wf.add_task(AddThree(), 'a3') wf.add_task(Multiply(), 'm') wf.connect_tasks('a3', 'm', {'b': 'in1'}) wf.connect_tasks('a2', 'm', {'b': 'in2'}) # Add default as defined in self.workflow wf.set_default('a3.a', {'format': 'number', 'data': 10}) self.assertEquals(wf, self.workflow) inputs = {'a2.a': {'format': 'json', 'data': '1'}, 'a3.a': {'format': 'number', 'data': 2}} ground = run(self.workflow, inputs=inputs) system = run(wf, inputs=inputs) self.assertEquals(system, ground)
def testOutputValidation(self, from_env): from_env.return_value = docker_client_mock task = { 'mode': 'docker', 'docker_image': 'test/test', 'pull_image': True, 'inputs': [], 'outputs': [{ 'id': 'file_output_1', 'format': 'text', 'type': 'string' }] } msg = (r'^Docker outputs must be either "_stdout", "_stderr", or ' 'filepath-target outputs\.$') with self.assertRaisesRegexp(TaskSpecValidationError, msg): run(task) _reset_mocks() task['outputs'][0]['target'] = 'filepath' task['outputs'][0]['path'] = '/tmp/some/invalid/path' msg = (r'^Docker filepath output paths must either start with "%s/" ' 'or be specified relative to that directory\.$' % DATA_VOLUME) with self.assertRaisesRegexp(TaskSpecValidationError, msg): run(task) task['outputs'][0]['path'] = '%s/valid_path.txt' % DATA_VOLUME path = os.path.join(_tmp, '.*', 'valid_path\.txt') msg = r'^Output filepath %s does not exist\.$' % path with self.assertRaisesRegexp(Exception, msg): run(task) # Make sure docker stuff actually got called in this case. self.assertEqual(docker_client_mock.containers.run.call_count, 2) # Simulate a task that has written into the temp dir tmp = os.path.join(_tmp, 'simulated_output') if not os.path.isdir(tmp): os.makedirs(tmp) path = os.path.join(tmp, 'valid_path.txt') with open(path, 'w') as f: f.write('simulated output') _reset_mocks() outputs = run(task, _tempdir=tmp) self.assertEqual(outputs, { 'file_output_1': { 'data': path, 'format': 'text' } }) _reset_mocks() # If no path is specified, we should fall back to the input name del task['outputs'][0]['path'] path = os.path.join(_tmp, '.*', 'file_output_1') msg = r'^Output filepath %s does not exist\.$' % path with self.assertRaisesRegexp(Exception, msg): run(task)
def testOutputValidation(self, *args): task = { 'mode': 'docker', 'docker_image': 'test/test', 'pull_image': True, 'inputs': [], 'outputs': [{ 'id': 'file_output_1', }] } msg = (r'^Docker outputs must be either "_stdout", "_stderr", or ' 'filepath-target outputs\.$') with self.assertRaisesRegexp(TaskSpecValidationError, msg): run(task) _reset_mocks() task['outputs'][0]['target'] = 'filepath' task['outputs'][0]['path'] = '/tmp/some/invalid/path' msg = (r'^Docker filepath output paths must either start with "%s/" ' 'or be specified relative to that directory\.$' % DATA_VOLUME) with self.assertRaisesRegexp(TaskSpecValidationError, msg): run(task) task['outputs'][0]['path'] = '%s/valid_path.txt' % DATA_VOLUME path = os.path.join(_tmp, '.*', 'valid_path\.txt') msg = r'^Output filepath %s does not exist\.$' % path with self.assertRaisesRegexp(Exception, msg): run(task, _celery_task=celery_task) # Make sure docker stuff actually got called in this case. self.assertEqual(docker_client_mock.containers.run.call_count, 2) # Simulate a task that has written into the temp dir tmp = os.path.join(_tmp, 'simulated_output') if not os.path.isdir(tmp): os.makedirs(tmp) path = os.path.join(tmp, 'valid_path.txt') with open(path, 'w') as f: f.write('simulated output') _reset_mocks() outputs = run(task, _tempdir=tmp, _celery_task=celery_task) self.assertEqual(outputs, { 'file_output_1': { 'data': path } }) _reset_mocks() # If no path is specified, we should fall back to the input name del task['outputs'][0]['path'] path = os.path.join(_tmp, '.*', 'file_output_1') msg = r'^Output filepath %s does not exist\.$' % path with self.assertRaisesRegexp(Exception, msg): run(task, _celery_task=celery_task)
def testOutputTemplate(self, *args): task = { 'mode': 'docker', 'docker_image': 'test/test:latest', 'container_args': ['$output{foo}'], 'inputs': [], 'outputs': [{ 'id': 'foo', 'target': 'filepath', 'path': '$output{foo}' }] } outputs = { 'foo': { 'mode': 'http', 'url': 'http://foo.com', 'name': 'file.txt' } } reqs = [] @httmock.all_requests def handle_http(url, request): reqs.append(request) return '' tmp = os.path.join(_tmp, 'simulated_output') if not os.path.isdir(tmp): os.makedirs(tmp) path = os.path.join(tmp, 'file.txt') with open(path, 'w') as f: f.write('simulated output') with httmock.HTTMock(handle_http): run(task, outputs=outputs, _tempdir=tmp, _celery_task=celery_task) self.assertEqual(len(reqs), 1) self.assertEqual(reqs[0].method, 'POST') self.assertEqual(reqs[0].url, 'http://foo.com/') self.assertGreater( len(docker_client_mock.containers.run.mock_calls), 0) self.assertEqual( docker_client_mock.containers.run.call_args_list[0][0], ('test/test:latest', [os.path.join(DATA_VOLUME, 'file.txt')]))
def convert(type, input, output, fetch=True, status=None, **kwargs): """ Convert data from one format to another. :param type: The type specifier string of the input data. :param input: A binding dict of the form ``{'format': format, 'data', data}``, where ``format`` is the format specifier string, and ``data`` is the raw data to convert. The dict may also be of the form ``{'format': format, 'uri', uri}``, where ``uri`` is the location of the data (see :py:mod:`girder_worker.uri` for URI formats). :param output: A binding of the form ``{'format': format}``, where ``format`` is the format specifier string to convert the data to. The binding may also be in the form ``{'format': format, 'uri', uri}``, where ``uri`` specifies where to place the converted data. :param fetch: Whether to do an initial data fetch before conversion (default ``True``). :returns: The output binding dict with an additional field ``'data'`` containing the converted data. If ``'uri'`` is present in the output binding, instead saves the data to the specified URI and returns the output binding unchanged. """ kwargs = kwargs.copy() kwargs['auto_convert'] = False if fetch: input['data'] = io.fetch(input, **kwargs) if input['format'] == output['format']: data = input['data'] else: data_descriptor = input try: conversion_path = converter_path(Validator(type, input['format']), Validator(type, output['format'])) except NetworkXNoPath: raise Exception('No conversion path from %s/%s to %s/%s' % (type, input['format'], type, output['format'])) # Run data_descriptor through each conversion in the path for conversion in conversion_path: result = run(conversion, {'input': data_descriptor}, status=status, **kwargs) data_descriptor = result['output'] data = data_descriptor['data'] if status == utils.JobStatus.CONVERTING_OUTPUT: job_mgr = kwargs.get('_job_manager') set_job_status(job_mgr, utils.JobStatus.PUSHING_OUTPUT) io.push(data, output, **kwargs) return output
def testDockerRunArgs(self, from_env): from_env.return_value = docker_client_mock task = { 'mode': 'docker', 'docker_image': 'test/test:latest', 'container_args': [ '-f', '$input{foo}', '--temp-dir=$input{_tempdir}', '$flag{bar}' ], 'docker_run_args': { 'network_disabled': True }, 'pull_image': True, 'inputs': [], 'outputs': [{ 'id': '_stderr', 'format': 'string', 'type': 'string' }] } run(task, inputs={}, cleanup=False, validate=False, auto_convert=False) kwargs = docker_client_mock.containers.run.call_args_list[0][1] self.assertTrue('network_disabled' in kwargs) self.assertTrue(kwargs['network_disabled']) # Ensure we can't override detach and tty _reset_mocks() task['docker_run_args'] = { 'detach': False, 'tty': False } run(task, inputs={}, cleanup=False, validate=False, auto_convert=False) kwargs = docker_client_mock.containers.run.call_args_list[0][1] self.assertTrue('detach' in kwargs) self.assertTrue(kwargs['detach']) self.assertTrue('tty' in kwargs) self.assertTrue(kwargs['tty'])
def testDockerRunArgs(self, *args): task = { 'mode': 'docker', 'docker_image': 'test/test:latest', 'container_args': ['-f', '--temp-dir=$input{_tempdir}', '$flag{bar}'], 'docker_run_args': { 'network_disabled': True }, 'pull_image': True, 'inputs': [], 'outputs': [{ 'id': '_stderr' }] } run(task, inputs={}, cleanup=False, validate=False, auto_convert=False, _celery_task=celery_task) kwargs = docker_client_mock.containers.run.call_args_list[0][1] self.assertTrue('network_disabled' in kwargs) self.assertTrue(kwargs['network_disabled']) # Ensure we can't override detach and tty _reset_mocks() task['docker_run_args'] = {'detach': False, 'tty': False} run(task, inputs={}, cleanup=False, validate=False, auto_convert=False, _celery_task=celery_task) kwargs = docker_client_mock.containers.run.call_args_list[0][1] self.assertTrue('detach' in kwargs) self.assertTrue(kwargs['detach']) self.assertTrue('tty' in kwargs) self.assertFalse(kwargs['tty'])
def testNamedPipes(self, from_env): from_env.return_value = docker_client_mock task = { 'mode': 'docker', 'docker_image': 'test/test', 'pull_image': False, 'inputs': [], 'outputs': [{ 'id': 'named_pipe', 'format': 'text', 'type': 'string', 'target': 'filepath', 'stream': True }] } outputs = { 'named_pipe': { 'mode': 'test_dummy' } } class DummyAdapter(girder_worker.core.utils.StreamPushAdapter): def write(self, buf): pass # Mock out the stream adapter io.register_stream_push_adapter('test_dummy', DummyAdapter) tmp = os.path.join(_tmp, 'testing') if not os.path.isdir(tmp): os.makedirs(tmp) run(task, inputs={}, outputs=outputs, _tempdir=tmp, cleanup=False) # Make sure pipe was created inside the temp dir pipe = os.path.join(tmp, 'named_pipe') self.assertTrue(os.path.exists(pipe)) self.assertTrue(stat.S_ISFIFO(os.stat(pipe).st_mode))
def testNamedPipes(self, *args): task = { 'mode': 'docker', 'docker_image': 'test/test', 'pull_image': False, 'inputs': [], 'outputs': [{ 'id': 'named_pipe', 'target': 'filepath', 'stream': True }] } outputs = {'named_pipe': {'mode': 'test_dummy'}} class DummyAdapter(girder_worker.core.utils.StreamPushAdapter): def write(self, buf): pass # Mock out the stream adapter io.register_stream_push_adapter('test_dummy', DummyAdapter) tmp = os.path.join(_tmp, 'testing') if not os.path.isdir(tmp): os.makedirs(tmp) run(task, inputs={}, outputs=outputs, _tempdir=tmp, cleanup=False, _celery_task=celery_task) # Make sure pipe was created inside the temp dir pipe = os.path.join(tmp, 'named_pipe') self.assertTrue(os.path.exists(pipe)) self.assertTrue(stat.S_ISFIFO(os.stat(pipe).st_mode))
def testDockerModeStdErrStdOut(self): """ Test writing to stdout and stderr. """ task = { 'mode': 'docker', 'docker_image': TEST_IMAGE, 'pull_image': True, 'container_args': ['$input{test_mode}', '-m', '$input{message}'], 'inputs': [{ 'id': 'test_mode', 'name': '', 'format': 'string', 'type': 'string' }, { 'id': 'message', 'name': '', 'format': 'string', 'type': 'string' }], 'outputs': [{ 'id': '_stdout', 'format': 'string', 'type': 'string' }, { 'id': '_stderr', 'format': 'string', 'type': 'string' }] } inputs = { 'test_mode': { 'format': 'string', 'data': 'stdout_stderr' }, 'message': { 'format': 'string', 'data': self._test_message } } out = run( task, inputs=inputs, _tempdir=self._tmp, cleanup=True, validate=False, auto_convert=False) self.assertEqual(out['_stdout']['data'], 'this is stdout data\n') self.assertEqual(out['_stderr']['data'], 'this is stderr data\n')
def isvalid(type, binding, fetch=True, **kwargs): """ Determine whether a data binding is of the appropriate type and format. :param type: The expected type specifier string of the binding. :param binding: A binding dict of the form ``{'format': format, 'data', data}``, where ``format`` is the format specifier string, and ``data`` is the raw data to test. The dict may also be of the form ``{'format': format, 'uri', uri}``, where ``uri`` is the location of the data (see :py:mod:`girder_worker.uri` for URI formats). :param fetch: Whether to do an initial data fetch before conversion (default ``True``). :returns: ``True`` if the binding matches the type and format, ``False`` otherwise. """ kwargs = kwargs.copy() kwargs['auto_convert'] = False kwargs['validate'] = False analysis = get_validator_analysis(Validator(type, binding['format'])) outputs = run(analysis, {'input': binding}, fetch=fetch, **kwargs) return outputs['output']['data']
def testDockerModeOutputPipes(self): """ Test writing to named output pipe. """ task = { 'mode': 'docker', 'docker_image': TEST_IMAGE, 'pull_image': True, 'container_args': ['$input{test_mode}', '-m', '$input{message}'], 'inputs': [{ 'id': 'test_mode', 'name': '', 'format': 'string', 'type': 'string' }, { 'id': 'message', 'name': '', 'format': 'string', 'type': 'string' }], 'outputs': [{ 'id': 'output_pipe', 'format': 'text', 'type': 'string', 'target': 'filepath', 'stream': True }] } outputs = { 'output_pipe': { 'mode': 'capture' } } inputs = { 'test_mode': { 'format': 'string', 'data': 'write' }, 'message': { 'format': 'string', 'data': self._test_message, } } class CaptureAdapter(girder_worker.core.utils.StreamPushAdapter): message = '' def write(self, buf): CaptureAdapter.message += buf # Mock out the stream adapter io.register_stream_push_adapter('capture', CaptureAdapter) celery_task = mock.MagicMock() celery_task.canceled = False outputs = run( task, inputs=inputs, outputs=outputs, _tempdir=self._tmp, cleanup=False, _celery_task=celery_task) # Make sure pipe was created inside the temp dir pipe = os.path.join(self._tmp, 'output_pipe') self.assertTrue(os.path.exists(pipe)) self.assertTrue(stat.S_ISFIFO(os.stat(pipe).st_mode)) # Make use piped output was write to adapter self.assertEqual(CaptureAdapter.message, self._test_message)
def testDockerMode(self, *args): task = { 'mode': 'docker', 'docker_image': 'test/test:latest', 'container_args': ['-f', '$input{foo}', '--temp-dir=$input{_tempdir}', '$flag{bar}'], 'pull_image': True, 'inputs': [{ 'id': 'foo', 'name': 'A variable', 'target': 'filepath' }, { 'id': 'bar', 'name': 'Bar', 'arg': '--bar' }], 'outputs': [{ 'id': '_stderr' }] } inputs = { 'foo': { 'mode': 'http', 'url': 'https://foo.com/file.txt' }, 'bar': { 'mode': 'inline', 'data': True }, } @httmock.all_requests def fetchMock(url, request): if url.netloc == 'foo.com' and url.scheme == 'https': return 'dummy file contents' else: raise Exception('Unexpected url ' + repr(url)) with httmock.HTTMock(fetchMock): # Use user-specified filename _old = sys.stdout mockedStdOut = six.StringIO() sys.stdout = mockedStdOut out = run(task, inputs=inputs, cleanup=False, validate=False, auto_convert=False, _celery_task=celery_task) sys.stdout = _old # We didn't specify _stdout as an output, so it should just get # printed to sys.stdout (which we mocked) lines = mockedStdOut.getvalue().splitlines() # Remove log messages from output lines = [line for line in lines if '] INFO: ' not in line] self.assertEqual(lines, ['output message']) # We bound _stderr as a task output, so it should be in the output self.assertEqual(out, {'_stderr': {'data': 'error message\n'}}) # We should have one call to images.pull(...) self.assertEqual(docker_client_mock.images.pull.call_count, 1) self.assertEqual( docker_client_mock.images.pull.call_args_list[0][0], ('test/test:latest', )) # We should have two calls to containers.run(...) self.assertEqual(docker_client_mock.containers.run.call_count, 2) run1, run2 = docker_client_mock.containers.run.call_args_list args, kwargs = run1 self.assertEqual(args[0], 'test/test:latest') six.assertRegex(self, kwargs['volumes'].keys()[0], _tmp + '/.*') self.assertEqual(kwargs['volumes'].itervalues().next()['bind'], DATA_VOLUME) self.assertEqual(args[1][0:2], ['-f', '%s/file.txt' % DATA_VOLUME]) self.assertEqual(args[1][-2], '--temp-dir=%s' % DATA_VOLUME) self.assertEqual(args[1][-1], '--bar') args, kwargs = run2 self.assertEqual(args[0], 'busybox:latest') self.assertTrue(kwargs['remove']) six.assertRegex(self, kwargs['volumes'].keys()[0], _tmp + '/.*') self.assertTrue(kwargs['volumes'].itervalues().next() ['bind'].startswith(DATA_VOLUME)) self.assertEqual(args[1][:-1], ['chmod', '-R', 'a+rw']) self.assertTrue(args[1][-1].startswith(DATA_VOLUME)) # Make sure we can specify a custom entrypoint to the container _reset_mocks() task['entrypoint'] = '/bin/bash' inputs['foo'] = {'mode': 'http', 'url': 'https://foo.com/file.txt'} inputs['bar'] = {'mode': 'inline', 'data': False} run(task, inputs=inputs, validate=False, auto_convert=False, _celery_task=celery_task) self.assertEqual(docker_client_mock.containers.run.call_count, 2) args, kwargs = docker_client_mock.containers.run.call_args_list[0] self.assertEqual(args[0], 'test/test:latest') self.assertEqual(kwargs['entrypoint'], ['/bin/bash']) self.assertNotIn('--bar', args) self.assertEqual(args[1][0:2], ['-f', '%s/file.txt' % DATA_VOLUME]) _reset_mocks() # direct mode files should mount an extra volume inputs['foo'] = { 'mode': 'http', 'url': 'https://foo.com/file.txt', 'script_data': __file__, 'direct_path': __file__ } run(task, inputs=inputs, validate=False, auto_convert=False, _celery_task=celery_task) self.assertEqual(docker_client_mock.containers.run.call_count, 2) args, kwargs = docker_client_mock.containers.run.call_args_list[0] self.assertEqual(args[0], 'test/test:latest') self.assertEqual(kwargs['entrypoint'], ['/bin/bash']) self.assertIn(__file__, kwargs['volumes']) _reset_mocks() # Make sure custom config settings are respected girder_worker.config.set('docker', 'cache_timeout', '123456') girder_worker.config.set('docker', 'exclude_images', 'test/test:latest') # Make sure we can pass empty values task['inputs'].append({'id': 'baz'}) task['container_args'].extend(['--baz', '$input{baz}']) inputs['baz'] = {'data': '', 'mode': 'inline'} inputs['foo'] = {'mode': 'http', 'url': 'https://foo.com/file.txt'} run(task, inputs=inputs, validate=False, auto_convert=False, _celery_task=celery_task) self.assertEqual(docker_client_mock.containers.run.call_count, 2) args = docker_client_mock.containers.run.call_args_list[0][0] self.assertEqual(args[0], 'test/test:latest') self.assertEqual(args[1], [ '-f', '/mnt/girder_worker/data/file.txt', '--temp-dir=/mnt/girder_worker/data', '--baz', '' ]) self.assertNotIn('--bar', args) # And non-empty values _reset_mocks() inputs['baz'] = {'data': 'parameter1', 'mode': 'inline'} inputs['foo'] = {'mode': 'http', 'url': 'https://foo.com/file.txt'} run(task, inputs=inputs, validate=False, auto_convert=False, _celery_task=celery_task) self.assertEqual(docker_client_mock.containers.run.call_count, 2) args = docker_client_mock.containers.run.call_args_list[0][0] self.assertEqual(args[0], 'test/test:latest') self.assertEqual(args[1], [ '-f', '/mnt/girder_worker/data/file.txt', '--temp-dir=/mnt/girder_worker/data', '--baz', 'parameter1' ]) # Clean up del inputs['baz'] task['inputs'].pop() task['container_args'].pop() task['container_args'].pop() # Make sure we can skip pulling the image _reset_mocks() task['pull_image'] = False inputs['foo'] = {'mode': 'http', 'url': 'https://foo.com/file.txt'} run(task, inputs=inputs, validate=False, auto_convert=False, _celery_task=celery_task) # Assert no call to images.pull self.assertEqual(docker_client_mock.images.pull.call_count, 0) self.assertEqual(docker_client_mock.containers.run.call_count, 2)
def test_workflow_with_task_classes(self): class AddTwo(specs.Task): __inputs__ = specs.PortList([ {'name': 'a', 'type': 'number', 'format': 'number'} ]) __outputs__ = specs.PortList([ {'name': 'b', 'type': 'number', 'format': 'number'} ]) def __init__(self, spec=None, **kw): super(AddTwo, self).__init__(spec, **kw) self.mode = 'python' self.script = 'b = a + 2' class AddThree(specs.Task): __inputs__ = specs.PortList([ {'name': 'a', 'type': 'number', 'format': 'number'} ]) __outputs__ = specs.PortList([ {'name': 'b', 'type': 'number', 'format': 'number'} ]) def __init__(self, spec=None, **kw): super(AddThree, self).__init__(spec, **kw) self.mode = 'python' self.script = 'b = a + 3' class Multiply(specs.Task): __inputs__ = specs.PortList([ {'name': 'in1', 'type': 'number', 'format': 'number'}, {'name': 'in2', 'type': 'number', 'format': 'number'} ]) __outputs__ = specs.PortList([ {'name': 'out', 'type': 'number', 'format': 'number'} ]) def __init__(self, spec=None, **kw): super(Multiply, self).__init__(spec, **kw) self.mode = 'python' self.script = 'out = in1 * in2' wf = specs.Workflow() wf.add_task(AddTwo(), 'a2') wf.add_task(AddThree(), 'a3') wf.add_task(Multiply(), 'm') wf.connect_tasks('a3', 'm', {'b': 'in1'}) wf.connect_tasks('a2', 'm', {'b': 'in2'}) # Add default as defined in self.workflow wf.set_default('a3.a', {'format': 'number', 'data': 10}) self.assertEquals(wf, self.workflow) inputs = {'a2.a': {'format': 'json', 'data': '1'}, 'a3.a': {'format': 'number', 'data': 2}} ground = run(self.workflow, inputs=inputs) system = run(wf, inputs=inputs) self.assertEquals(system, ground)
def testDockerModeStdio(self): """ Test writing to stdout. """ task = { 'mode': 'docker', 'docker_image': TEST_IMAGE, 'pull_image': True, 'container_args': ['$input{test_mode}', '-m', '$input{message}'], 'inputs': [{ 'id': 'test_mode', 'name': '', 'format': 'string', 'type': 'string' }, { 'id': 'message', 'name': '', 'format': 'string', 'type': 'string' }], 'outputs': [] } inputs = { 'test_mode': { 'format': 'string', 'data': 'stdio' }, 'message': { 'format': 'string', 'data': self._test_message } } celery_task = mock.MagicMock() celery_task.canceled = False _old = sys.stdout stdout_captor = six.StringIO() sys.stdout = stdout_captor run( task, inputs=inputs, _tempdir=self._tmp, cleanup=True, validate=False, auto_convert=False, _celery_task=celery_task) sys.stdout = _old lines = stdout_captor.getvalue().splitlines() self.assertEqual(lines[-1], self._test_message) task = { 'mode': 'docker', 'docker_image': TEST_IMAGE, 'pull_image': True, 'container_args': ['$input{test_mode}', '-m', '$input{message}'], 'inputs': [{ 'id': 'test_mode', 'name': '', 'format': 'string', 'type': 'string' }, { 'id': 'message', 'name': '', 'format': 'string', 'type': 'string' }], 'outputs': [] } _old = sys.stdout stdout_captor = six.StringIO() sys.stdout = stdout_captor run( task, inputs=inputs, cleanup=True, validate=False, auto_convert=False, _celery_task=celery_task) sys.stdout = _old lines = stdout_captor.getvalue().splitlines() self.assertEqual(lines[-1], self._test_message) # Test _stdout task['outputs'] = [{ 'id': '_stdout', 'format': 'string', 'type': 'string' }] _old = sys.stdout stdout_captor = six.StringIO() sys.stdout = stdout_captor out = run( task, inputs=inputs, cleanup=False, validate=False, auto_convert=False, _celery_task=celery_task) sys.stdout = _old lines = stdout_captor.getvalue().splitlines() message = '%s\n' % self._test_message self.assertTrue(message not in lines) self.assertEqual(out['_stdout']['data'], message)
def testDockerMode(self, from_env): from_env.return_value = docker_client_mock task = { 'mode': 'docker', 'docker_image': 'test/test:latest', 'container_args': [ '-f', '$input{foo}', '--temp-dir=$input{_tempdir}', '$flag{bar}' ], 'pull_image': True, 'inputs': [{ 'id': 'foo', 'name': 'A variable', 'format': 'string', 'type': 'string', 'target': 'filepath' }, { 'id': 'bar', 'name': 'Bar', 'format': 'boolean', 'type': 'boolean', 'arg': '--bar', }], 'outputs': [{ 'id': '_stderr', 'format': 'string', 'type': 'string' }] } inputs = { 'foo': { 'mode': 'http', 'url': 'https://foo.com/file.txt' }, 'bar': { 'mode': 'inline', 'data': True }, } @httmock.all_requests def fetchMock(url, request): if url.netloc == 'foo.com' and url.scheme == 'https': return 'dummy file contents' else: raise Exception('Unexpected url ' + repr(url)) with httmock.HTTMock(fetchMock): # Use user-specified filename _old = sys.stdout mockedStdOut = six.StringIO() sys.stdout = mockedStdOut out = run( task, inputs=inputs, cleanup=False, validate=False, auto_convert=False) sys.stdout = _old # We didn't specify _stdout as an output, so it should just get # printed to sys.stdout (which we mocked) lines = mockedStdOut.getvalue().splitlines() # Remove log messages from output lines = [line for line in lines if '] INFO: ' not in line] self.assertEqual(lines, ['output message']) # We bound _stderr as a task output, so it should be in the output self.assertEqual(out, { '_stderr': { 'data': 'error message\n', 'format': 'string' } }) # We should have one call to images.pull(...) self.assertEqual(docker_client_mock.images.pull.call_count, 1) self.assertEqual(docker_client_mock.images.pull.call_args_list[0][0], ('test/test:latest', )) # We should have two calls to containers.run(...) self.assertEqual(docker_client_mock.containers.run.call_count, 2) run1, run2 = docker_client_mock.containers.run.call_args_list args, kwargs = run1 self.assertEqual(args[0], 'test/test:latest') six.assertRegex(self, kwargs['volumes'].keys()[0], _tmp + '/.*') self.assertEqual(kwargs['volumes'].itervalues().next()['bind'], DATA_VOLUME) self.assertEqual(args[1][0:2], ['-f', '%s/file.txt' % DATA_VOLUME]) self.assertEqual(args[1][-2], '--temp-dir=%s' % DATA_VOLUME) self.assertEqual(args[1][-1], '--bar') args, kwargs = run2 self.assertEqual(args[0], 'busybox:latest') self.assertTrue(kwargs['remove']) six.assertRegex(self, kwargs['volumes'].keys()[0], _tmp + '/.*') self.assertEqual(kwargs['volumes'].itervalues().next()['bind'], DATA_VOLUME) self.assertEqual(args[1], ['chmod', '-R', 'a+rw', DATA_VOLUME]) # Make sure we can specify a custom entrypoint to the container _reset_mocks() task['entrypoint'] = '/bin/bash' inputs['foo'] = { 'mode': 'http', 'url': 'https://foo.com/file.txt' } inputs['bar'] = { 'mode': 'inline', 'data': False } run(task, inputs=inputs, validate=False, auto_convert=False) self.assertEqual(docker_client_mock.containers.run.call_count, 2) args, kwargs = docker_client_mock.containers.run.call_args_list[0] self.assertEqual(args[0], 'test/test:latest') self.assertEqual(kwargs['entrypoint'], ['/bin/bash']) self.assertNotIn('--bar', args) self.assertEqual(args[1][0:2], ['-f', '%s/file.txt' % DATA_VOLUME]) _reset_mocks() # Make sure custom config settings are respected girder_worker.config.set('docker', 'cache_timeout', '123456') girder_worker.config.set( 'docker', 'exclude_images', 'test/test:latest') # Make sure we can pass empty values task['inputs'].append({ 'id': 'baz', 'format': 'string', 'type': 'string', }) task['container_args'].extend(['--baz', '$input{baz}']) inputs['baz'] = { 'data': '', 'format': 'string', 'mode': 'inline', 'type': 'string' } run(task, inputs=inputs, validate=False, auto_convert=False) self.assertEqual(docker_client_mock.containers.run.call_count, 2) args = docker_client_mock.containers.run.call_args_list[0][0] self.assertEqual(args[0], 'test/test:latest') self.assertEqual(args[1], [ '-f', '/mnt/girder_worker/data/file.txt', '--temp-dir=/mnt/girder_worker/data', '--baz', '' ]) self.assertNotIn('--bar', args) # And non-empty values _reset_mocks() inputs['baz']['data'] = 'parameter1' run(task, inputs=inputs, validate=False, auto_convert=False) self.assertEqual(docker_client_mock.containers.run.call_count, 2) args = docker_client_mock.containers.run.call_args_list[0][0] self.assertEqual(args[0], 'test/test:latest') self.assertEqual(args[1], [ '-f', '/mnt/girder_worker/data/file.txt', '--temp-dir=/mnt/girder_worker/data', '--baz', 'parameter1' ]) # Clean up del inputs['baz'] task['inputs'].pop() task['container_args'].pop() task['container_args'].pop() # Make sure we can skip pulling the image _reset_mocks() task['pull_image'] = False inputs['foo'] = { 'mode': 'http', 'url': 'https://foo.com/file.txt' } run(task, inputs=inputs, validate=False, auto_convert=False) # Assert no call to images.pull self.assertEqual(docker_client_mock.images.pull.call_count, 0) self.assertEqual(docker_client_mock.containers.run.call_count, 2)
def testDockerModeRemoveContainer(self): """ Test automatic container removal """ task = { 'mode': 'docker', 'docker_image': test_image, 'pull_image': True, 'container_args': ['$input{test_mode}', '$input{message}'], 'inputs': [{ 'id': 'test_mode', 'name': '', 'format': 'string', 'type': 'string' }, { 'id': 'message', 'name': '', 'format': 'string', 'type': 'string' }], 'outputs': [] } inputs = { 'test_mode': { 'format': 'string', 'data': 'stdio' }, 'message': { 'format': 'string', 'data': self._test_message } } docker_client = docker.from_env() containers = docker_client.containers.list(limit=1) last_container_id = containers[0].id if len(containers) > 0 else None run( task, inputs=inputs, _tempdir=self._tmp, cleanup=True, validate=False, auto_convert=False) def _fetch_new_containers(last_container_id): if last_container_id: filters = { 'since': last_container_id } new_containers = docker_client.containers.list(all=True, filters=filters) else: new_containers = docker_client.containers.list(all=True) return new_containers new_containers = _fetch_new_containers(last_container_id) # Now assert that the container was removed self.assertEqual(len(new_containers), 0) # Now confirm that the container doesn't get removed if we set # _rm_container = False girder_worker.config.set('docker', 'gc', 'True') # Stop GC removing anything girder_worker.config.set('docker', 'cache_timeout', str(sys.maxint)) task['_rm_container'] = False run( task, inputs=inputs, _tempdir=self._tmp, cleanup=True, validate=False, auto_convert=False, _rm_containers=False) new_containers = _fetch_new_containers(last_container_id) self.assertEqual(len(new_containers), 1) self.assertEqual(new_containers[0].attrs.get('Config', {})['Image'], test_image) # Clean it up new_containers[0].remove()
def testDockerModeOutputPipes(self): """ Test writing to named output pipe. """ task = { 'mode': 'docker', 'docker_image': test_image, 'pull_image': True, 'container_args': ['$input{test_mode}', '$input{message}'], 'inputs': [{ 'id': 'test_mode', 'name': '', 'format': 'string', 'type': 'string' }, { 'id': 'message', 'name': '', 'format': 'string', 'type': 'string' }], 'outputs': [{ 'id': 'output_pipe', 'format': 'text', 'type': 'string', 'target': 'filepath', 'stream': True }] } outputs = { 'output_pipe': { 'mode': 'capture' } } inputs = { 'test_mode': { 'format': 'string', 'data': 'output_pipe' }, 'message': { 'format': 'string', 'data': self._test_message, } } class CaptureAdapter(girder_worker.core.utils.StreamPushAdapter): message = '' def write(self, buf): CaptureAdapter.message += buf # Mock out the stream adapter io.register_stream_push_adapter('capture', CaptureAdapter) outputs = run( task, inputs=inputs, outputs=outputs, _tempdir=self._tmp, cleanup=False) # Make sure pipe was created inside the temp dir pipe = os.path.join(self._tmp, 'output_pipe') self.assertTrue(os.path.exists(pipe)) self.assertTrue(stat.S_ISFIFO(os.stat(pipe).st_mode)) # Make use piped output was write to adapter self.assertEqual(CaptureAdapter.message, self._test_message)
def testDockerMode(self, mockPopen, checkOutput): mockPopen.return_value = processMock checkOutput.return_value = inspectOutput task = { 'mode': 'docker', 'docker_image': 'test/test:latest', 'container_args': [ '-f', '$input{foo}', '--temp-dir=$input{_tempdir}'], 'pull_image': True, 'inputs': [{ 'id': 'foo', 'name': 'A variable', 'format': 'string', 'type': 'string', 'target': 'filepath' }], 'outputs': [{ 'id': '_stderr', 'format': 'string', 'type': 'string' }] } inputs = { 'foo': { 'mode': 'http', 'url': 'https://foo.com/file.txt' } } @httmock.all_requests def fetchMock(url, request): if url.netloc == 'foo.com' and url.scheme == 'https': return 'dummy file contents' else: raise Exception('Unexpected url ' + repr(url)) with httmock.HTTMock(fetchMock): # Use user-specified filename _old = sys.stdout mockedStdOut = six.StringIO() sys.stdout = mockedStdOut out = run( task, inputs=inputs, cleanup=False, validate=False, auto_convert=False) sys.stdout = _old # We didn't specify _stdout as an output, so it should just get # printed to sys.stdout (which we mocked) lines = mockedStdOut.getvalue().splitlines() self.assertEqual(lines[0], 'Pulling Docker image: test/test:latest') self.assertEqual(lines[-2], 'output message') self.assertEqual( lines[-1], 'Garbage collecting old containers and images.') # We bound _stderr as a task output, so it should be in the output self.assertEqual(out, { '_stderr': { 'data': 'error message\n', 'format': 'string' } }) self.assertEqual(mockPopen.call_count, 3) cmd1, cmd2, cmd3 = [x[1]['args'] for x in mockPopen.call_args_list] self.assertEqual(cmd1, ('docker', 'pull', 'test/test:latest')) self.assertEqual(cmd2[:3], ['docker', 'run', '-v']) six.assertRegex(self, cmd2[3], _tmp + '/.*:%s' % DATA_VOLUME) self.assertEqual(cmd2[4], '-v') six.assertRegex(self, cmd2[5], '%s:%s:ro' % (SCRIPTS_DIR, SCRIPTS_VOLUME)) self.assertEqual(cmd2[6:9], [ '--entrypoint', '%s/entrypoint.sh' % SCRIPTS_VOLUME, 'test/test:latest' ]) self.assertEqual(cmd2[9:15], [ str(os.getuid()), str(os.getgid()), '/usr/bin/foo', '--flag', '-f', '%s/file.txt' % DATA_VOLUME]) self.assertEqual(cmd2[-1], '--temp-dir=%s' % DATA_VOLUME) self.assertEqual(len(cmd2), 16) self.assertEqual(len(cmd3), 1) six.assertRegex(self, cmd3[0], 'docker-gc$') # Make sure we can specify a custom entrypoint to the container mockPopen.reset_mock() task['entrypoint'] = '/bin/bash' # Make sure additional docker run args work task['docker_run_args'] = ['--net', 'none'] inputs['foo'] = { 'mode': 'http', 'url': 'https://foo.com/file.txt' } out = run(task, inputs=inputs, validate=False, auto_convert=False) self.assertEqual(mockPopen.call_count, 3) cmd2 = mockPopen.call_args_list[1][1]['args'] self.assertEqual(cmd2[6:11], [ '--entrypoint', '%s/entrypoint.sh' % SCRIPTS_VOLUME, '--net', 'none', 'test/test:latest' ]) self.assertEqual(cmd2[11:16], [ str(os.getuid()), str(os.getgid()), '/bin/bash', '-f', '%s/file.txt' % DATA_VOLUME]) mockPopen.reset_mock() # Make sure custom config settings are respected girder_worker.config.set('docker', 'cache_timeout', '123456') girder_worker.config.set( 'docker', 'exclude_images', 'test/test:latest') # Make sure we can skip pulling the image task['pull_image'] = False inputs['foo'] = { 'mode': 'http', 'url': 'https://foo.com/file.txt' } out = run(task, inputs=inputs, validate=False, auto_convert=False) self.assertEqual(mockPopen.call_count, 2) cmd1, cmd2 = [x[1]['args'] for x in mockPopen.call_args_list] self.assertEqual(tuple(cmd1[:2]), ('docker', 'run')) self.assertEqual(cmd1[8:10], ['--net', 'none']) six.assertRegex(self, cmd2[0], 'docker-gc$') env = mockPopen.call_args_list[1][1]['env'] self.assertEqual(env['GRACE_PERIOD_SECONDS'], '123456') six.assertRegex(self, env['EXCLUDE_FROM_GC'], 'docker_gc_scratch/.docker-gc-exclude$')
def testDockerModeInputPipes(self): """ Test reading from named output pipe. """ task = { 'mode': 'docker', 'docker_image': TEST_IMAGE, 'pull_image': True, 'container_args': ['$input{test_mode}'], 'inputs': [{ 'id': 'test_mode', 'name': '', 'format': 'string', 'type': 'string' }, { 'id': 'input_pipe', 'format': 'string', 'type': 'string', 'target': 'filepath', 'stream': True }], 'outputs': [{ 'id': '_stdout', 'format': 'string', 'type': 'string' }] } inputs = { 'test_mode': { 'format': 'string', 'data': 'read' }, 'input_pipe': { 'mode': 'static', 'data': self._test_message } } # Mock out the stream adapter class StaticAdapter(girder_worker.core.utils.StreamFetchAdapter): def __init__(self, spec): self._data = six.BytesIO(spec['data']) def read(self, buf_len): return self._data.read(buf_len) io.register_stream_fetch_adapter('static', StaticAdapter) celery_task = mock.MagicMock() celery_task.canceled = False output = run( task, inputs=inputs, outputs={}, _tempdir=self._tmp, cleanup=True, _celery_task=celery_task) # Make sure pipe was created inside the temp dir pipe = os.path.join(self._tmp, 'input_pipe') self.assertTrue(os.path.exists(pipe)) self.assertTrue(stat.S_ISFIFO(os.stat(pipe).st_mode)) self.assertEqual(output['_stdout']['data'].rstrip(), self._test_message)
def testDockerModeRemoveContainer(self): """ Test automatic container removal """ container_name = 'testDockerModeRemoveContainer' task = { 'mode': 'docker', 'docker_image': TEST_IMAGE, 'pull_image': True, 'container_args': ['$input{test_mode}', '-m', '$input{message}'], 'inputs': [{ 'id': 'test_mode', 'name': '', 'format': 'string', 'type': 'string' }, { 'id': 'message', 'name': '', 'format': 'string', 'type': 'string' }], 'outputs': [], 'docker_run_args': { 'name': container_name } } inputs = { 'test_mode': { 'format': 'string', 'data': 'stdio' }, 'message': { 'format': 'string', 'data': self._test_message } } docker_client = docker.from_env() celery_task = mock.MagicMock() celery_task.canceled = False containers = [] def _cleanup(): for container in containers: container.remove() try: girder_worker.config.set('docker', 'gc', 'False') run( task, inputs=inputs, _tempdir=self._tmp, cleanup=True, validate=False, auto_convert=False, _celery_task=celery_task) containers = docker_client.containers.list(all=True, filters={ 'name': container_name }) # Now assert that the container was removed self.assertEqual(len(containers), 0) finally: _cleanup() try: # Now confirm that the container doesn't get removed if we set # _rm_container = False girder_worker.config.set('docker', 'gc', 'True') # Stop GC removing anything girder_worker.config.set('docker', 'cache_timeout', str(sys.maxint)) task['_rm_container'] = False run( task, inputs=inputs, _tempdir=self._tmp, cleanup=True, validate=False, auto_convert=False, _rm_containers=False, _celery_task=celery_task) containers = docker_client.containers.list(all=True, filters={ 'name': container_name }) self.assertEqual(len(containers), 1) finally: _cleanup()
def testDockerModeInputPipes(self): """ Test reading from named output pipe. """ task = { 'mode': 'docker', 'docker_image': test_image, 'pull_image': True, 'container_args': ['$input{test_mode}', '$input{message}'], 'inputs': [{ 'id': 'test_mode', 'name': '', 'format': 'string', 'type': 'string' }, { 'id': 'message', 'name': '', 'format': 'string', 'type': 'string' }, { 'id': 'input_pipe', 'format': 'string', 'type': 'string', 'target': 'filepath', 'stream': True }], 'outputs': [{ 'id': '_stdout', 'format': 'string', 'type': 'string' }] } inputs = { 'test_mode': { 'format': 'string', 'data': 'input_pipe' }, 'message': { 'format': 'string', 'data': self._test_message }, 'input_pipe': { 'mode': 'static', 'data': self._test_message } } # Mock out the stream adapter class StaticAdapter(girder_worker.core.utils.StreamFetchAdapter): def __init__(self, spec): self._data = six.BytesIO(spec['data']) def read(self, buf_len): return self._data.read(buf_len) io.register_stream_fetch_adapter('static', StaticAdapter) output = run( task, inputs=inputs, outputs={}, _tempdir=self._tmp, cleanup=True) # Make sure pipe was created inside the temp dir pipe = os.path.join(self._tmp, 'input_pipe') self.assertTrue(os.path.exists(pipe)) self.assertTrue(stat.S_ISFIFO(os.stat(pipe).st_mode)) self.assertEqual(output['_stdout']['data'].rstrip(), self._test_message)
def _run(): run( task, inputs=inputs, _tempdir=self._tmp, cleanup=True, validate=False, auto_convert=False, _celery_task=celery_task)
def testDockerModeStdio(self): """ Test writing to stdout. """ task = { 'mode': 'docker', 'docker_image': test_image, 'pull_image': True, 'container_args': ['$input{test_mode}', '$input{message}'], 'inputs': [{ 'id': 'test_mode', 'name': '', 'format': 'string', 'type': 'string' }, { 'id': 'message', 'name': '', 'format': 'string', 'type': 'string' }], 'outputs': [] } inputs = { 'test_mode': { 'format': 'string', 'data': 'stdio' }, 'message': { 'format': 'string', 'data': self._test_message } } _old = sys.stdout stdout_captor = six.StringIO() sys.stdout = stdout_captor run( task, inputs=inputs, _tempdir=self._tmp, cleanup=True, validate=False, auto_convert=False) sys.stdout = _old lines = stdout_captor.getvalue().splitlines() self.assertEqual(lines[-1], self._test_message) task = { 'mode': 'docker', 'docker_image': test_image, 'pull_image': True, 'container_args': ['$input{test_mode}', '$input{message}'], 'inputs': [{ 'id': 'test_mode', 'name': '', 'format': 'string', 'type': 'string' }, { 'id': 'message', 'name': '', 'format': 'string', 'type': 'string' }], 'outputs': [] } _old = sys.stdout stdout_captor = six.StringIO() sys.stdout = stdout_captor run( task, inputs=inputs, cleanup=True, validate=False, auto_convert=False) sys.stdout = _old lines = stdout_captor.getvalue().splitlines() self.assertEqual(lines[-1], self._test_message) # Test _stdout task['outputs'] = [{ 'id': '_stdout', 'format': 'string', 'type': 'string' }] _old = sys.stdout stdout_captor = six.StringIO() sys.stdout = stdout_captor out = run( task, inputs=inputs, cleanup=False, validate=False, auto_convert=False) sys.stdout = _old lines = stdout_captor.getvalue().splitlines() message = '%s\r\n' % self._test_message self.assertTrue(message not in lines) self.assertEqual(out['_stdout']['data'], message)
def test_workflow(self): # Task Graph 2 # ============== # This is the same task graph from workflow_test.py with minor # alterations (specifically the task names). # # + + # {a}| | {a} # | | # +--^----+ +--^----+ # | | | | # | A3 | | A2 | # | | | | # +---+---+ +---+---+ # {b} | | {b} # | | # | +---------+ | # | | | | # +-----^+ M +^----+ # {in1} | | {in2} # +----+----+ # | # | # | {out} # | # v wf = specs.Workflow() wf.add_task(self.add_two, 'a2') wf.add_task(self.add_three, 'a3') wf.add_task(self.multiply, 'm') wf.connect_tasks('a3', 'm', {'b': 'in1'}) wf.connect_tasks('a2', 'm', {'b': 'in2'}) # Add default as defined in self.workflow wf.set_default('a3.a', {'format': 'number', 'data': 10}) # Asset that the components are equal and consistent self.assertConsistent(wf.inputs, self.workflow['inputs'], type_spec=specs.Port) self.assertConsistent(wf['inputs'], self.workflow['inputs'], type_spec=specs.Port) self.assertConsistent(wf.outputs, self.workflow['outputs'], type_spec=specs.Port) self.assertConsistent(wf['outputs'], self.workflow['outputs'], type_spec=specs.Port) self.assertConsistent(wf.steps, self.workflow['steps'], type_spec=specs.StepSpec) self.assertConsistent(wf['steps'], self.workflow['steps'], type_spec=specs.StepSpec) self.assertConsistent(wf.connections, self.workflow['connections'], type_spec=specs.ConnectionSpec) self.assertConsistent(wf['connections'], self.workflow['connections'], type_spec=specs.ConnectionSpec) # Assert the equality of the Workflow object and the workflow dict self.assertEquals(wf, self.workflow) inputs = {'a2.a': {'format': 'json', 'data': '1'}, 'a3.a': {'format': 'number', 'data': 2}} ground = run(self.workflow, inputs=inputs) system = run(wf, inputs=inputs) self.assertEquals(system, ground)