def create_task_msg(self): input_parameters = self.get_inputs() output_parameters = [] if self.stdout is not None: parameter = tes.Output(name="stdout", url=self.output2url(self.stdout), path=self.output2path(self.stdout)) output_parameters.append(parameter) if self.stderr is not None: parameter = tes.Output(name="stderr", url=self.output2url(self.stderr), path=self.output2path(self.stderr)) output_parameters.append(parameter) output_parameters.append( tes.Output(name="workdir", url=self.output2url(""), path=self.builder.outdir, type="DIRECTORY")) container = self.get_container() res_reqs = self.builder.resources ram = res_reqs['ram'] / 953.674 disk = (res_reqs['outdirSize'] + res_reqs['tmpdirSize']) / 953.674 cpus = res_reqs['cores'] docker_req, _ = self.get_requirement("DockerRequirement") if docker_req and hasattr(docker_req, "dockerOutputDirectory"): output_parameters.append( tes.Output(name="dockerOutputDirectory", url=self.output2url(""), path=docker_req.dockerOutputDirectory, type="DIRECTORY")) create_body = tes.Task(name=self.name, description=self.spec.get("doc", ""), executors=[ tes.Executor( command=self.command_line, image=container, workdir=self.builder.outdir, stdout=self.output2path(self.stdout), stderr=self.output2path(self.stderr), stdin=self.stdin, env=self.get_envvars()) ], inputs=input_parameters, outputs=output_parameters, resources=tes.Resources(cpu_cores=cpus, ram_gb=ram, disk_gb=disk), tags={"CWLDocumentId": self.spec.get("id")}) return create_body
def _create_task_msg(self, input_cp_url, output_cp_url): runner = pkg_resources.resource_string(__name__, "resources/runner.py") if isinstance(runner, bytes): runner = runner.decode("utf8") cmd_install_reqs = "pip install %s" % (" ".join(self.libraries)) cmd_tesseract = "python tesseract.py func.pickle" if len(self.libraries) == 0: cmd = cmd_tesseract else: cmd = cmd_install_reqs + " && " + cmd_tesseract task = tes.Task(name="tesseract remote execution", inputs=self.input_files + [ tes.Input(name="pickled function", url=input_cp_url, path="/tmp/tesseract/func.pickle", type="FILE"), tes.Input(name="tesseract runner script", path="/tmp/tesseract/tesseract.py", type="FILE", content=str(runner)) ], outputs=self.output_files + [ tes.Output(name="pickled result", url=output_cp_url, path="/tmp/tesseract/result.pickle", type="FILE") ], resources=tes.Resources(cpu_cores=self.cpu_cores, ram_gb=self.ram_gb, disk_gb=self.disk_gb), executors=[ tes.Executor(image=self.docker, command=["sh", "-c", cmd], stdout="/tmp/tesseract/stdout", stderr="/tmp/tesseract/stderr", workdir="/tmp/tesseract") ]) return task
def _get_task(self, job, jobscript): import tes checkdir, _ = os.path.split(self.snakefile) task = {} task["name"] = job.format_wildcards(self.jobname) task["description"] = self._get_task_description(job) task["inputs"] = self._get_task_inputs(job, jobscript, checkdir) task["outputs"] = self._get_task_outputs(job, checkdir) task["executors"] = self._get_task_executors() task["resources"] = tes.models.Resources() # define resources if "_cores" in job.resources: task["resources"]["cpu_cores"] = job.resources["_cores"] if "mem_mb" in job.resources: task["resources"]["ram_gb"] = job.resources["mem_mb"] / 1000 if "disk_mb" in job.resources: task["resources"]["disk_gb"] = job.resources["disk_mb"] / 1000 tes_task = tes.Task(**task) logger.debug("[TES] Built task: {task}".format(task=tes_task)) return tes_task
def create_task_msg(self): input_parameters = self.collect_input_parameters() output_parameters = [] if self.stdout is not None: parameter = tes.Output(name="stdout", url=self.output2url(self.stdout), path=self.output2path(self.stdout)) output_parameters.append(parameter) if self.stderr is not None: parameter = tes.Output(name="stderr", url=self.output2url(self.stderr), path=self.output2path(self.stderr)) output_parameters.append(parameter) output_parameters.append( tes.Output(name="workdir", url=self.output2url(""), path=self.docker_workdir, type="DIRECTORY")) container = self.find_docker_requirement() cpus = None ram = None disk = None for i in self.requirements: if i.get("class", "NA") == "ResourceRequirement": cpus = i.get("coresMin", i.get("coresMax", None)) ram = i.get("ramMin", i.get("ramMax", None)) ram = ram / 953.674 if ram is not None else None disk = i.get("outdirMin", i.get("outdirMax", None)) disk = disk / 953.674 if disk is not None else None elif i.get("class", "NA") == "DockerRequirement": if i.get("dockerOutputDirectory", None) is not None: output_parameters.append( tes.Output(name="dockerOutputDirectory", url=self.output2url(""), path=i.get("dockerOutputDirectory"), type="DIRECTORY")) create_body = tes.Task(name=self.name, description=self.spec.get("doc", ""), executors=[ tes.Executor( command=self.command_line, image=container, workdir=self.docker_workdir, stdout=self.output2path(self.stdout), stderr=self.output2path(self.stderr), stdin=self.stdin, env=self.environment) ], inputs=input_parameters, outputs=output_parameters, resources=tes.Resources(cpu_cores=cpus, ram_gb=ram, disk_gb=disk), tags={"CWLDocumentId": self.spec.get("id")}) return create_body
def _send_task( tes_uris: List[str], request: Dict, token: str, timeout: float = 5 ) -> Tuple[str, str]: """Send task to TES instance.""" # Process/sanitize request for use with py-tes time_now = datetime.now().strftime("%m-%d-%Y %H:%M:%S") if not 'creation_time' in request: request['creation_time'] = parse_time(time_now) if 'inputs' in request: request['inputs'] = [ tes.models.Input(**input) for input in request['inputs'] ] if 'outputs' in request: request['outputs'] = [ tes.models.Output(**output) for output in request['outputs'] ] if 'resources' in request: request['resources'] = tes.models.Resources(**request['resources']) if 'executors' in request: request['executors'] = [ tes.models.Executor(**executor) for executor in request['executors'] ] if 'logs' in request: for log in request['logs']: log['start_time'] = time_now log['end_time'] = time_now if 'logs' in log: for inner_log in log['logs']: inner_log['start_time'] = time_now inner_log['end_time'] = time_now log['logs'] = [ tes.models.ExecutorLog(**log) for log in log['logs'] ] if 'outputs' in log: for output in log['outputs']: output['size_bytes'] = 0 log['outputs'] = [ tes.models.OutputFileLog(**output) for output in log['outputs'] ] if 'system_logs' in log: log['system_logs'] = [ tes.models.SystemLog(**log) for log in log['system_logs'] ] request['logs'] = [ tes.models.TaskLog(**log) for log in request['logs'] ] # Create Task object try: task = tes.Task(**request) except Exception as e: logger.error( ( "Task object could not be created. Original error message: " "{type}: {msg}" ).format( type=type(e).__name__, msg=e, ) ) raise BadRequest # Iterate over known TES URIs for tes_uri in tes_uris: # Try to submit task to TES instance try: cli = tes.HTTPClient(tes_uri, timeout=timeout) task_id = cli.create_task(task) # Issue warning and try next TES instance if task submission failed except Exception as e: logger.warning( ( "Task could not be submitted to TES instance '{tes_uri}'. " 'Trying next TES instance in list. Original error ' "message: {type}: {msg}" ).format( tes_uri=tes_uri, type=type(e).__name__, msg=e, ) ) continue # Return task ID and URL of TES instance return (task_id, tes_uri) # Log error if no suitable TES instance was found raise ConnectionError( 'Task could not be submitted to any known TES instance.' )
def issueBatchJob(self, job_desc: JobDescription, job_environment: Optional[Dict[str, str]] = None) -> int: # TODO: get a sensible self.maxCores, etc. so we can checkResourceRequest. # How do we know if the cluster will autoscale? # Try the job as local local_id = self.handleLocalJob(job_desc) if local_id is not None: # It is a local job return local_id else: # We actually want to send to the cluster # Check resource requirements (managed by BatchSystemSupport) self.checkResourceRequest(job_desc.memory, job_desc.cores, job_desc.disk) # Make a batch system scope job ID bs_id = self.getNextJobID() # Make a vaguely human-readable name. # TES does not require it to be unique. # We could add a per-workflow prefix to use with ListTasks, but # ListTasks doesn't let us filter for newly done tasks, so it's not # actually useful for us over polling each task. job_name = str(job_desc) # Launch the job on TES # Determine job environment environment = self.environment.copy() if job_environment: environment.update(job_environment) if 'TOIL_WORKDIR' not in environment: # The appliance container defaults TOIL_WORKDIR to # /var/lib/toil, but TES doesn't (always?) give us a writable # /, so we need to use the writable space in /tmp by default # instead when running on TES. environment['TOIL_WORKDIR'] = '/tmp' # Make a command to run it in the executor command_list = pack_job(job_desc, self.user_script) # Make the sequence of TES containers ("executors") to run. # We just run one which is the Toil executor to grab the user # script and do the job. task_executors = [ tes.Executor(image=self.docker_image, command=command_list, env=environment) ] # Prepare inputs. task_inputs = list(self.mounts) # If we had any per-job input files they would come in here. # Prepare resource requirements task_resources = tes.Resources( cpu_cores=math.ceil(job_desc.cores), ram_gb=job_desc.memory / (1024**3), disk_gb=job_desc.disk / (1024**3), # TODO: py-tes spells this differently than Toil preemptible=job_desc.preemptable) # Package into a TES Task task = tes.Task(name=job_name, executors=task_executors, inputs=task_inputs, resources=task_resources) # Launch it and get back the TES ID that we can use to poll the task tes_id = self.tes.create_task(task) # Tie it to the numeric ID self.bs_id_to_tes_id[bs_id] = tes_id self.tes_id_to_bs_id[tes_id] = bs_id logger.debug('Launched job: %s', job_name) return bs_id
def run_workflow( config: Dict, body: Dict, sender: str, *args, **kwargs ) -> Dict: """Relays task to best TES instance; returns universally unique task id.""" # Get config parameters authorization_required = get_conf( config, 'security', 'authorization_required' ) endpoint_params = get_conf_type( config, 'tes', 'endpoint_params', types=(list), ) security_params = get_conf_type( config, 'security', 'jwt', ) remote_urls = get_conf_type( config, 'tes', 'service-list', types=(list), ) # Get associated workflow run # TODO: get run_id, task_id and user_id # Set initial task state # TODO: # Set access token if authorization required: try: access_token = request_access_token( user_id=document['user_id'], token_endpoint=endpoint_params['token_endpoint'], timeout=endpoint_params['timeout_token_request'], ) validate_token( token=access_token, key=security_params['public_key'], identity_claim=security_params['identity_claim'], ) except Exception as e: logger.exception( ( 'Could not get access token from token endpoint ' "'{token_endpoint}'. Original error message {type}: {msg}" ).format( token_endpoint=endpoint_params['token_endpoint'], type=type(e).__name__, msg=e, ) ) raise Forbidden else: access_token = None # Order TES instances by priority testribute = TEStribute_Interface() remote_urls_ordered = testribute.order_endpoint_list( tes_json=body, endpoints=remote_urls, access_token=access_token, method=endpoint_params['tes_distribution_method'], ) # Send task to best TES instance try: remote_id, remote_url = __send_task( urls=remote_urls_ordered, body=body, access_token=access_token, timeout=endpoint_params['timeout_tes_submission'], ) except Exception as e: logger.exception('{type}: {msg}'.format( default_path=default_path, config_var=config_var, type=type(e).__name__, msg=e, ) raise InternalServerError # Poll TES instance for state updates __initiate_state_polling( task_id=remote_id, run_id=document['run_id'], url=remote_url, interval_polling=endpoint_params['interval_polling'], timeout_polling=endpoint_params['timeout_polling'], max_time_polling=endpoint_params['max_time_polling'], ) # Generate universally unique ID local_id = __amend_task_id( remote_id=remote_id, remote_url=remote_url, separator=endpoint_params['id_separator'], encoding=endpoint_params['id_encoding'], ) # Format and return response response = {'id': local_id} return response def request_access_token( user_id: str, token_endpoint: str, timeout: int = 5 ) -> str: """Get access token from token endpoint.""" try: response = post( token_endpoint, data={'user_id': user_id}, timeout=timeout ) except Exception as e: raise if response.status_code != 200: raise ConnectionError( ( "Could not access token endpoint '{endpoint}'. Received " "status code '{code}'." ).format( endpoint=token_endpoint, code=response.status_code ) ) return response.json()['access_token'] def validate_token( token:str, key:str, identity_claim:str, ) -> None: # Decode token try: token_data = decode( jwt=token, key=get_conf( current_app.config, 'security', 'jwt', 'public_key' ), algorithms=get_conf( current_app.config, 'security', 'jwt', 'algorithm' ), verify=True, ) except Exception as e: raise ValueError( ( 'Authentication token could not be decoded. Original ' 'error message: {type}: {msg}' ).format( type=type(e).__name__, msg=e, ) ) # Validate claims identity_claim = get_conf( current_app.config, 'security', 'jwt', 'identity_claim' ) validate_claims( token_data=token_data, required_claims=[identity_claim], ) def __send_task( urls: List[str], body: Dict, timeout: int = 5 ) -> Tuple[str, str]: """Send task to TES instance.""" task = tes.Task(body) # TODO: implement this properly for url in urls: # Try to submit task to TES instance try: cli = tes.HTTPClient(url, timeout=timeout) task_id = cli.create_task(task) # TODO: fix problem with marshaling # Issue warning and try next TES instance if task submission failed except Exception as e: logger.warning( ( "Task could not be submitted to TES instance '{url}'. " 'Trying next TES instance in list. Original error ' "message: {type}: {msg}" ).format( url=url, type=type(e).__name__, msg=e, ) ) continue # Return task ID and URL of TES instance return (task_id, url) # Log error if no suitable TES instance was found raise ConnectionError( 'Task could not be submitted to any known TES instance.' ) def __initiate_state_polling( task_id: str, run_id: str, url: str, interval_polling: int = 2, timeout_polling: int = 1, max_time_polling: Optional[int] = None ) -> None: """Initiate polling of TES instance for task state.""" celery_id = uuid() logger.debug( ( "Starting polling of TES task '{task_id}' in " "background task '{celery_id}'..." ).format( task_id=task_id, celery_id=celery_id, ) ) task__poll_task_state.apply_async( None, { 'task_id': task_id, 'run_id': run_id, 'url': url, 'interval': interval_polling, 'timeout': timeout_polling, }, task_id=celery_id, soft_time_limit=max_time_polling, ) return None def __amend_task_id( remote_id: str, remote_url: str, separator: str = '@', # TODO: add to config encoding: str= 'utf-8' # TODO: add to config ) -> str: """Appends base64 to remote task ID.""" append = base64.b64encode(remote_url.encode(encoding)) return separator.join([remote_id, append])