def run(self, step: ContainerStep, env: Dict, rundir: str) -> ExecResult: """Execute a list of shell commands in a workflow step synchronously. Stops execution if one of the commands fails. Returns the combined result from all the commands that were executed. Parameters ---------- step: flowserv.controller.serial.workflow.ContainerStep Step in a serial workflow. env: dict, default=None Default settings for environment variables when executing workflow steps. May be None. rundir: string Path to the working directory of the workflow run. Returns ------- flowserv.controller.serial.workflow.result.ExecResult """ logging.info('run step with subprocess worker') # Keep output to STDOUT and STDERR for all executed commands in the # respective attributes of the returned execution result. result = ExecResult(step=step) # Windows-specific fix. Based on https://github.com/appveyor/ci/issues/1995 if 'SYSTEMROOT' in os.environ: env = dict(env) if env else dict() env['SYSTEMROOT'] = os.environ.get('SYSTEMROOT') try: # Run each command in the the workflow step. Each command is # expected to be a shell command that is executed using the # subprocess package. The subprocess.run() method is preferred for # capturing output. for cmd in step.commands: logging.info('{}'.format(cmd)) proc = subprocess.run( cmd, cwd=rundir, shell=True, capture_output=True, env=env ) # Append output to STDOUT and STDERR to the respecive lists. append(result.stdout, proc.stdout.decode('utf-8')) append(result.stderr, proc.stderr.decode('utf-8')) if proc.returncode != 0: # Stop execution if the command failed. result.returncode = proc.returncode break except Exception as ex: logging.error(ex) strace = '\n'.join(util.stacktrace(ex)) logging.debug(strace) result.stderr.append(strace) result.exception = ex result.returncode = 1 return result
def run(self, step: ContainerStep, env: Dict, rundir: str) -> ExecResult: """Execute a list of commands from a workflow steps synchronously using the Docker engine. Stops execution if one of the commands fails. Returns the combined result from all the commands that were executed. Parameters ---------- step: flowserv.controller.serial.workflow.ContainerStep Step in a serial workflow. env: dict, default=None Default settings for environment variables when executing workflow steps. May be None. rundir: string Path to the working directory of the workflow run that this step belongs to. Returns ------- flowserv.controller.serial.workflow.result.ExecResult """ logging.info('run step with Docker worker') return docker_run(image=step.image, commands=step.commands, env=env, rundir=rundir, result=ExecResult(step=step))
def exec(self, step: NotebookStep, context: Dict, store: FileSystemStorage) -> ExecResult: """Execute a given notebook workflow step in the current workflow context. The notebook engine expects a file system storage volume that provides access to the notebook file and any other aditional input files. Parameters ---------- step: flowserv.model.workflow.step.NotebookStep Notebook step in a serial workflow. context: dict Dictionary of variables that represent the current workflow state. store: flowserv.volume.fs.FileSystemStorage Storage volume that contains the workflow run files. Returns ------- flowserv.controller.serial.workflow.result.ExecResult """ # Call execute method of the NotebookEngine to run the notebook # with the argument values from the workflow context. step.exec(context=context, rundir=store.basedir) result = ExecResult(step=step) return result
def exec(self, step: CodeStep, context: Dict, store: FileSystemStorage) -> ExecResult: """Execute a workflow step of type :class:`flowserv.model.workflow.step.CodeStep` in a given context. Captures output to STDOUT and STDERR and includes them in the returned execution result. Note that the code worker expects a file system storage volume. Parameters ---------- step: flowserv.model.workflow.step.CodeStep Code step in a serial workflow. context: dict Context for the executed code. store: flowserv.volume.fs.FileSystemStorage Storage volume that contains the workflow run files. Returns ------- flowserv.controller.serial.workflow.result.ExecResult """ result = ExecResult(step=step) out = sys.stdout err = sys.stderr sys.stdout = OutputStream(stream=result.stdout) sys.stderr = OutputStream(stream=result.stderr) # Change working directory temporarily. cwd = os.getcwd() os.chdir(store.basedir) try: step.exec(context=context) except Exception as ex: logging.error(ex, exc_info=True) strace = '\n'.join(util.stacktrace(ex)) logging.debug(strace) result.stderr.append(strace) result.exception = ex result.returncode = 1 finally: # Make sure to reverse redirection of output streams sys.stdout = out sys.stderr = err # Reset working directory. os.chdir(cwd) return result
def test_error_run_result(): """Test results of an erroneous workflow run.""" r = RunResult(arguments={}) r.add(ExecResult(step=ContainerStep(identifier='s1', image='test'), returncode=0)) assert r.exception is None assert r.returncode == 0 r.add(ExecResult(step=ContainerStep(identifier='s2', image='test'), returncode=1, stderr=['e1', 'e2'], exception=ValueError())) # noqa: E501 with pytest.raises(ValueError): r.raise_for_status() assert r.exception is not None assert r.returncode == 1 assert r.stdout == [] assert r.stderr == ['e1', 'e2'] r = RunResult(arguments={}) r.add(ExecResult(step=ContainerStep(identifier='s3', image='test'), returncode=1, stderr=['e1', 'e2'])) with pytest.raises(err.FlowservError): r.raise_for_status()
def exec(self, step: NotebookStep, context: Dict, store: FileSystemStorage) -> ExecResult: """Execute a given notebook workflow step in the current workflow context. The notebook engine expects a file system storage volume that provides access to the notebook file and any other aditional input files. Parameters ---------- step: flowserv.model.workflow.step.NotebookStep Notebook step in a serial workflow. context: dict Dictionary of variables that represent the current workflow state. store: flowserv.volume.fs.FileSystemStorage Storage volume that contains the workflow run files. Returns ------- flowserv.controller.serial.workflow.result.ExecResult """ result = ExecResult(step=step) # Create Docker image including papermill and notebook requirements. try: image, logs = docker_build(name=step.name, requirements=step.requirements) if logs: result.stdout.append('\n'.join(logs)) except Exception as ex: logging.error(ex, exc_info=True) strace = '\n'.join(util.stacktrace(ex)) logging.debug(strace) result.stderr.append(strace) result.exception = ex result.returncode = 1 return result # Run notebook in Docker container. cmd = step.cli_command(context=context) result.stdout.append(f'run: {cmd}') return docker_run(image=image, commands=[cmd], env=self.env, rundir=store.basedir, result=result)
def test_successful_run_result(): """Test results of a successful workflow run.""" r = RunResult(arguments={'a': 1}) r.add(ExecResult(step=ContainerStep(identifier='s1', image='test1'), returncode=0, stdout=['o1'])) r.context['a'] = 2 r.add(ExecResult(step=ContainerStep(identifier='s2', image='test2'), returncode=0, stdout=['o2', 'o3'])) r.context['b'] = 1 assert r.exception is None assert r.returncode == 0 r.raise_for_status() assert r.stdout == ['o1', 'o2', 'o3'] assert r.stderr == [] assert r.get('a') == 2 assert r.get('b') == 1 result = r.steps[0] assert result.step.image == 'test1' assert result.stdout == ['o1'] result = r.steps[1] assert result.step.image == 'test2' assert result.stdout == ['o2', 'o3']
def exec_func(step: FunctionStep, context: Dict, rundir: str) -> ExecResult: """Execute a workflow step of type :class:`flowserv.model.workflow.step.FunctionStep` in a given context. Captures output to STDOUT and STDERR and includes them in the returned execution result. Parameters ---------- step: flowserv.model.workflow.step.FunctionStep Code step in a serial workflow. context: dict Context for the executed code. Returns ------- flowserv.controller.serial.workflow.result.ExecResult """ result = ExecResult(step=step) out = sys.stdout err = sys.stderr sys.stdout = OutputStream(stream=result.stdout) sys.stderr = OutputStream(stream=result.stderr) # Change working direcotry temporarily. cwd = os.getcwd() os.chdir(rundir) try: step.exec(context=context) except Exception as ex: logging.error(ex) strace = '\n'.join(util.stacktrace(ex)) logging.debug(strace) result.stderr.append(strace) result.exception = ex result.returncode = 1 finally: # Make sure to reverse redirection of output streams sys.stdout = out sys.stderr = err # Reset working directory. os.chdir(cwd) return result
def run(self, step: ContainerStep, env: Dict, rundir: str) -> ExecResult: """Execute a list of commands from a workflow steps synchronously using the Docker engine. Stops execution if one of the commands fails. Returns the combined result from all the commands that were executed. Parameters ---------- step: flowserv.controller.serial.workflow.ContainerStep Step in a serial workflow. env: dict, default=None Default settings for environment variables when executing workflow steps. May be None. rundir: string Path to the working directory of the workflow run that this step belongs to. Returns ------- flowserv.controller.serial.workflow.result.ExecResult """ logging.info('run step with Docker worker') # Keep output to STDOUT and STDERR for all executed commands in the # respective attributes of the returned execution result. result = ExecResult(step=step) # Setup the workflow environment by obtaining volume information for # all directories in the run folder. volumes = dict() for filename in os.listdir(rundir): abs_file = os.path.abspath(os.path.join(rundir, filename)) if os.path.isdir(abs_file): volumes[abs_file] = { 'bind': '/{}'.format(filename), 'mode': 'rw' } # Run the individual commands using the local Docker deamon. Import # docker package here to avoid errors for installations that do not # intend to use Docker and therefore did not install the package. import docker from docker.errors import ContainerError, ImageNotFound, APIError client = docker.from_env() try: for cmd in step.commands: logging.info('{}'.format(cmd)) logs = client.containers.run(image=step.image, command=cmd, volumes=volumes, remove=True, environment=env, stdout=True) if logs: result.stdout.append(logs.decode('utf-8')) except (ContainerError, ImageNotFound, APIError) as ex: logging.error(ex) strace = '\n'.join(util.stacktrace(ex)) logging.debug(strace) result.stderr.append(strace) result.exception = ex result.returncode = 1 return result
def docker_run(image: str, commands: List[str], env: Dict, rundir: str, result: ExecResult) -> ExecResult: """Helper function that executes a list of commands inside a Docker container. Parameters ---------- image: string Identifier of the Docker image to run. commands: string or list of string Commands that are executed inside the Docker container. result: flowserv.controller.serial.workflow.result.ExecResult Result object that will contain the run outputs and status code. Returns ------- flowserv.controller.serial.workflow.result.ExecResult """ # Setup the workflow environment by obtaining volume information for # all directories in the run folder. volumes = dict() for filename in os.listdir(rundir): abs_file = os.path.abspath(os.path.join(rundir, filename)) if os.path.isdir(abs_file): volumes[abs_file] = {'bind': '/{}'.format(filename), 'mode': 'rw'} # Run the individual commands using the local Docker deamon. Import # docker package here to avoid errors for installations that do not # intend to use Docker and therefore did not install the package. import docker from docker.errors import ContainerError, ImageNotFound, APIError client = docker.from_env() try: for cmd in commands: logging.info('{}'.format(cmd)) # Run detached container to be able to capture output to # both, STDOUT and STDERR. DO NOT remove the container yet # in order to be able to get the captured outputs. container = client.containers.run(image=image, command=cmd, volumes=volumes, remove=False, environment=env, detach=True) # Wait for container to finish. The returned dictionary will # contain the container's exit code ('StatusCode'). r = container.wait() # Add container logs to the standard outputs for the workflow # results. logs = container.logs() if logs: result.stdout.append(logs.decode('utf-8')) # Remove container if the remove flag is set to True. container.remove() # Check exit code for the container. If the code is not zero # an error occurred and we exit the commands loop. status_code = r.get('StatusCode') if status_code != 0: result.returncode = status_code break except (ContainerError, ImageNotFound, APIError) as ex: logging.error(ex, exc_info=True) strace = '\n'.join(util.stacktrace(ex)) logging.debug(strace) result.stderr.append(strace) result.exception = ex result.returncode = 1 client.close() return result