async def _undeploy_node(self, name: Text, job_id: Text): undeploy_command = "".join(["occam-kill ", "{job_id}"]).format(job_id=job_id) logger.debug("Executing {command}".format(command=undeploy_command)) async with self._get_ssh_client(name) as ssh_client: await ssh_client.run(undeploy_command) logger.info("Killed {resource}".format(resource=job_id))
async def _deploy_node(self, name: Text, service: MutableMapping[Text, Any], node: Text): deploy_command = "".join([ "{workdir}" "occam-run ", "{x11}", "{node}", "{stdin}" "{jobidFile}" "{shmSize}" "{volumes}" "{image} " "{command}" ]).format(workdir="cd {workdir} && ".format( workdir=service.get('workdir')) if 'workdir' in service else "", x11=self.get_option("x", service.get('x11')), node=self.get_option("n", node), stdin=self.get_option("i", service.get('stdin')), jobidFile=self.get_option("c", service.get('jobidFile')), shmSize=self.get_option("s", service.get('shmSize')), volumes=self.get_option("v", service.get('volumes')), image=service['image'], command=" ".join(service.get('command', ""))) logger.debug("Executing {command}".format(command=deploy_command)) async with self._get_ssh_client(name) as ssh_client: result = await ssh_client.run(deploy_command) output = result.stdout search_result = re.findall('({node}-[0-9]+).*'.format(node=node), output, re.MULTILINE) if search_result: if name not in self.jobs_table: self.jobs_table[name] = [] self.jobs_table[name].append(search_result[0]) logger.info("Deployed {name} on {resource}".format( name=name, resource=search_result[0])) else: raise Exception
async def transfer_data(self, src: Text, src_job: Optional[Job], dst: Text, dst_job: Optional[Job], writable: bool = False): # Get connectors and resources from steps src_connector = src_job.step.get_connector( ) if src_job is not None else None src_resources = src_job.get_resources() if src_job is not None else [] dst_connector = dst_job.step.get_connector( ) if dst_job is not None else None dst_resources = dst_job.get_resources() if dst_job is not None else [] src_found = False # If source file is local, simply transfer it if not src_resources: if await remotepath.exists(src_connector, None, src): src_found = True await self._transfer_from_resource(src_job, None, src, dst_job, dst_resources, dst, writable) # Otherwise process each source resource that actually contains the source path else: for src_resource in src_resources: if await remotepath.exists(src_connector, src_resource, src): src_found = True await self._transfer_from_resource(src_job, src_resource, src, dst_job, dst_resources, dst, writable) # If source path does not exist if not src_found: # Search it on destination resources for dst_resource in dst_resources: # If it exists, switch source connector and resources with the new ones if await remotepath.exists(dst_connector, dst_resource, src): logger.debug("Path {path} found {resource}.".format( path=src, resource="on resource {resource}".format( resource=dst_resource) if dst_resource is not None else "on local file-system")) await self._transfer_from_resource(dst_job, dst_resource, src, dst_job, dst_resources, dst, writable) break
def _get_command(resource: Text, command: MutableSequence[Text], environment: MutableMapping[Text, Text] = None, workdir: Optional[Text] = None, stdin: Optional[Union[int, Text]] = None, stdout: Union[int, Text] = asyncio.subprocess.STDOUT, stderr: Union[int, Text] = asyncio.subprocess.STDOUT, job_name: Optional[Text] = None, encode: bool = True): command = utils.create_command(command=command, environment=environment, workdir=workdir, stdin=stdin, stdout=stdout, stderr=stderr) logger.debug("Executing command {command} on {resource} {job}".format( command=command, resource=resource, job="for job {job}".format(job=job_name) if job_name else "")) return utils.encode_command(command) if encode else command
async def _run( self, resource: Text, command: MutableSequence[Text], environment: MutableMapping[Text, Text] = None, workdir: Optional[Text] = None, stdin: Optional[Union[int, Text]] = None, stdout: Union[int, Text] = asyncio.subprocess.STDOUT, stderr: Union[int, Text] = asyncio.subprocess.STDOUT, capture_output: bool = False, job_name: Optional[Text] = None, encode: bool = True, interactive: bool = False, stream: bool = False ) -> Union[Optional[Tuple[Optional[Any], int]], asyncio.subprocess.Process]: command = utils.create_command(command, environment, workdir, stdin, stdout, stderr) logger.debug("Executing command {command} on {resource} {job}".format( command=command, resource=resource, job="for job {job}".format(job=job_name) if job_name else "")) if encode: command = utils.encode_command(command) run_command = self._get_run_command(command, resource, interactive=interactive) proc = await asyncio.create_subprocess_exec( *shlex.split(run_command), stdin=asyncio.subprocess.PIPE if interactive else None, stdout=asyncio.subprocess.PIPE if capture_output else asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.PIPE if capture_output else asyncio.subprocess.DEVNULL) if stream: return proc elif capture_output: stdout, _ = await proc.communicate() return stdout.decode().strip(), proc.returncode else: await proc.wait()
async def _run( self, resource: Text, command: MutableSequence[Text], environment: MutableMapping[Text, Text] = None, workdir: Optional[Text] = None, stdin: Optional[Union[int, Text]] = None, stdout: Union[int, Text] = asyncio.subprocess.STDOUT, stderr: Union[int, Text] = asyncio.subprocess.STDOUT, job_name: Optional[Text] = None, capture_output: bool = False, encode: bool = True, interactive: bool = False, stream: bool = False ) -> Union[Optional[Tuple[Optional[Any], int]], asyncio.subprocess.Process]: # TODO: find a smarter way to identify detachable jobs when implementing stacked connectors if job_name: command = utils.create_command(command=command, environment=environment, workdir=workdir) logger.debug( "Executing command {command} on {resource} {job}".format( command=command, resource=resource, job="for job {job}".format( job=job_name) if job_name else "")) helper_file = await self._build_helper_file( command, resource, environment, workdir) job_id = await self._run_batch_command(helper_file=helper_file, job_name=job_name, resource=resource, workdir=workdir, stdin=stdin, stdout=stdout, stderr=stderr) logger.info("Scheduled job {job} with job id {job_id}".format( job=job_name, job_id=job_id)) self.scheduledJobs.append(job_id) self.jobsCache.clear() while True: async with self.jobsCacheLock: running_jobs = await self._get_running_jobs(resource) if job_id not in running_jobs: break await asyncio.sleep(self.pollingInterval) self.scheduledJobs.remove(job_id) return (await self._get_output(job_id, resource) if stdout == STDOUT else None, await self._get_returncode(job_id, resource)) else: return await super()._run(resource=resource, command=command, environment=environment, workdir=workdir, stdin=stdin, stdout=stdout, stderr=stderr, job_name=job_name, capture_output=capture_output, encode=encode, interactive=interactive, stream=stream)
async def execute(self, job: Job) -> CWLCommandOutput: context = utils.build_context(job) if logger.isEnabledFor(logging.DEBUG): logger.debug("Job {job} inputs: {inputs}".format( job=job.name, inputs=json.dumps(context['inputs'], indent=4, sort_keys=True))) if self.initial_work_dir is not None: await self._prepare_work_dir(job, context, self.initial_work_dir) cmd = self._get_executable_command(context) parsed_env = { k: str( eval_expression(expression=v, context=context, full_js=self.full_js, expression_lib=self.expression_lib)) for (k, v) in self.environment.items() } if 'HOME' not in parsed_env: parsed_env['HOME'] = job.output_directory if 'TMPDIR' not in parsed_env: parsed_env['TMPDIR'] = job.tmp_directory if self.step.target is None: if self.is_shell_command: cmd = ["/bin/sh", "-c", " ".join(cmd)] # Open streams stderr = self._get_stream(job, context, self.stderr, sys.stderr) stdin = self._get_stream(job, context, self.stdin, sys.stdin, is_input=True) stdout = self._get_stream(job, context, self.stdout, sys.stderr) # Execute command logger.info( 'Executing job {job} into directory {outdir}: \n{command}'. format(job=job.name, outdir=job.output_directory, command=' \\\n\t'.join(cmd))) proc = await asyncio.create_subprocess_exec( *cmd, cwd=job.output_directory, env=parsed_env, stdin=stdin, stdout=stdout, stderr=stderr) result, error = await asyncio.wait_for(proc.communicate(), self._get_timeout(job)) exit_code = proc.returncode # Close streams if stdin is not sys.stdin: stdin.close() if stdout is not sys.stderr: stdout.close() if stderr is not sys.stderr: stderr.close() else: connector = self.step.get_connector() resources = job.get_resources() logger.info( 'Executing job {job} on resource {resource} into directory {outdir}:\n{command}' .format( job=job.name, resource=resources[0] if resources else None, outdir=job.output_directory, command=' \\\n\t'.join([ "/bin/sh", "-c", "\"{cmd}\"".format(cmd=" ".join(cmd)) ] if self.is_shell_command else cmd))) if self.is_shell_command: cmd = [ "/bin/sh", "-c", "\"$(echo {command} | base64 -d)\"".format( command=base64.b64encode(" ".join(cmd).encode( 'utf-8')).decode('utf-8')) ] # If step is assigned to multiple resources, add the STREAMFLOW_HOSTS environment variable if len(resources) > 1: available_resources = await connector.get_available_resources( self.step.target.service) hosts = { k: v.hostname for k, v in available_resources.items() if k in resources } parsed_env['STREAMFLOW_HOSTS'] = ','.join(hosts.values()) # Process streams stdin = eval_expression(expression=self.stdin, context=context, full_js=self.full_js, expression_lib=self.expression_lib) stdout = eval_expression(expression=self.stdout, context=context, full_js=self.full_js, expression_lib=self.expression_lib ) if self.stdout is not None else STDOUT stderr = eval_expression(expression=self.stderr, context=context, full_js=self.full_js, expression_lib=self.expression_lib ) if self.stderr is not None else stdout # Execute remote command result, exit_code = await asyncio.wait_for( connector.run(resources[0] if resources else None, cmd, environment=parsed_env, workdir=job.output_directory, stdin=stdin, stdout=stdout, stderr=stderr, capture_output=True, job_name=job.name), self._get_timeout(job)) # Handle exit codes if self.failure_codes is not None and exit_code in self.failure_codes: status = Status.FAILED elif (self.success_codes is not None and exit_code in self.success_codes) or exit_code == 0: status = Status.COMPLETED if result: logger.info(result) else: status = Status.FAILED return CWLCommandOutput(value=result, status=status, exit_code=exit_code)