def write_function_dockercall(self, job): ''' Writes a string containing the apiDockerCall() that will run the job. :param job_task_reference: The name of the job calling docker. :param docker_image: The corresponding name of the docker image. e.g. "ubuntu:latest" :return: A string containing the apiDockerCall() that will run the job. ''' docker_dict = { "docker_image": self.tasks_dictionary[job]['runtime']['docker'], "job_task_reference": job, "docker_user": str(self.docker_user) } docker_template = heredoc_wdl(''' stdout = apiDockerCall(self, image={docker_image}, working_dir=tempDir, parameters=["/root/{job_task_reference}_script.sh"], entrypoint="/bin/bash", user={docker_user}, volumes={{tempDir: {{"bind": "/root"}}}}) ''', docker_dict, indent=' ')[1:] return docker_template
def write_function_dockercall(self, job): ''' Writes a string containing the apiDockerCall() that will run the job. :param job_task_reference: The name of the job calling docker. :param docker_image: The corresponding name of the docker image. e.g. "ubuntu:latest" :return: A string containing the apiDockerCall() that will run the job. ''' docker_dict = { "docker_image": self.tasks_dictionary[job]['runtime']['docker'], "job_task_reference": job, "docker_user": str(self.docker_user) } docker_template = heredoc_wdl(''' stdout = apiDockerCall(self, image={docker_image}, working_dir=tempDir, parameters=[os.path.join(tempDir, "{job_task_reference}_script.sh")], entrypoint="/bin/bash", user={docker_user}, stderr=True, volumes={{tempDir: {{"bind": tempDir}}}}) writetype = 'wb' if isinstance(stdout, bytes) else 'w' with open(os.path.join(asldijoiu23r8u34q89fho934t8u34fcurrentworkingdir, '{job_task_reference}.log'), writetype) as f: f.write(stdout) ''', docker_dict, indent=' ')[1:] return docker_template
def write_scatterfunction_header(self, scattername): """ :return: """ scatter_inputs = self.fetch_scatter_inputs(scattername) fn_section = '\n\nclass {jobname}Cls(Job):\n'.format(jobname=scattername) fn_section += ' def __init__(self, ' for input in scatter_inputs: fn_section += '{input}=None, '.format(input=input) fn_section += '*args, **kwargs):\n' fn_section += ' Job.__init__(self)\n\n' for input in scatter_inputs: fn_section += ' self.id_{input} = {input}\n'.format(input=input) fn_section += heredoc_wdl(''' def run(self, fileStore): fileStore.logToMaster("{jobname}") tempDir = fileStore.getLocalTempDir() try: os.makedirs(os.path.join(tempDir, 'execution')) except OSError as e: if e.errno != errno.EEXIST: raise ''', {'jobname': scattername}, indent=' ')[1:] for input in scatter_inputs: fn_section += ' {input} = self.id_{input}\n'.format(input=input) return fn_section
def write_function_dockercall(self, job): ''' Writes a string containing the apiDockerCall() that will run the job. :param job_task_reference: The name of the job calling docker. :param docker_image: The corresponding name of the docker image. e.g. "ubuntu:latest" :return: A string containing the apiDockerCall() that will run the job. ''' docker_dict = {"docker_image": self.tasks_dictionary[job]['runtime']['docker'], "job_task_reference": job, "docker_user": str(self.docker_user)} docker_template = heredoc_wdl(''' # apiDockerCall() with demux=True returns a tuple of bytes objects (stdout, stderr). _toil_wdl_internal__stdout, _toil_wdl_internal__stderr = \\ apiDockerCall(self, image={docker_image}, working_dir=tempDir, parameters=[os.path.join(tempDir, "{job_task_reference}_script.sh")], entrypoint="/bin/bash", user={docker_user}, stderr=True, demux=True, volumes={{tempDir: {{"bind": tempDir}}}}) with open(os.path.join(_toil_wdl_internal__current_working_dir, '{job_task_reference}.log'), 'wb') as f: if _toil_wdl_internal__stdout: f.write(_toil_wdl_internal__stdout) if _toil_wdl_internal__stderr: f.write(_toil_wdl_internal__stderr) ''', docker_dict, indent=' ')[1:] return docker_template
def write_modules(self): # string used to write imports to the file module_string = heredoc_wdl( ''' from toil.job import Job from toil.common import Toil from toil.lib.docker import apiDockerCall from toil.wdl.wdl_functions import generate_docker_bashscript_file from toil.wdl.wdl_functions import generate_stdout_file from toil.wdl.wdl_functions import select_first from toil.wdl.wdl_functions import sub from toil.wdl.wdl_functions import size from toil.wdl.wdl_functions import glob from toil.wdl.wdl_functions import process_and_read_file from toil.wdl.wdl_functions import process_infile from toil.wdl.wdl_functions import process_outfile from toil.wdl.wdl_functions import abspath_file from toil.wdl.wdl_functions import combine_dicts from toil.wdl.wdl_functions import parse_memory from toil.wdl.wdl_functions import parse_cores from toil.wdl.wdl_functions import parse_disk from toil.wdl.wdl_functions import read_lines from toil.wdl.wdl_functions import read_tsv from toil.wdl.wdl_functions import read_csv from toil.wdl.wdl_functions import read_json from toil.wdl.wdl_functions import read_map from toil.wdl.wdl_functions import read_int from toil.wdl.wdl_functions import read_string from toil.wdl.wdl_functions import read_float from toil.wdl.wdl_functions import read_boolean from toil.wdl.wdl_functions import write_lines from toil.wdl.wdl_functions import write_tsv from toil.wdl.wdl_functions import write_json from toil.wdl.wdl_functions import write_map from toil.wdl.wdl_functions import defined from toil.wdl.wdl_functions import basename from toil.wdl.wdl_functions import floor from toil.wdl.wdl_functions import ceil from toil.wdl.wdl_functions import wdl_range from toil.wdl.wdl_functions import transpose from toil.wdl.wdl_functions import length import fnmatch import textwrap import subprocess import os import errno import time import shutil import shlex import uuid import logging _toil_wdl_internal__current_working_dir = os.getcwd() logger = logging.getLogger(__name__) ''', {'jobstore': self.jobstore})[1:] return module_string
def write_main_header(self): main_header = heredoc_wdl(''' if __name__=="__main__": options = Job.Runner.getDefaultOptions("./toilWorkflowRun") options.clean = 'always' with Toil(options) as toil: ''') return main_header
def write_main_header(self): main_header = heredoc_wdl(''' if __name__=="__main__": options = Job.Runner.getDefaultOptions("{jobstore}") options.clean = 'always' with Toil(options) as fileStore: ''', {'jobstore': self.jobstore}) return main_header
def write_main_header(self): main_header = heredoc_wdl(''' if __name__=="__main__": parser = Job.Runner.getDefaultArgumentParser() options = parser.parse_args() options.clean = 'always' with Toil(options) as fileStore: ''', {'jobstore': self.jobstore}) return main_header
def write_function_outputreturn(self, job, docker=False): ''' Find the output values that this function needs and write them out as a string. :param job: A list such that: (job priority #, job ID #, Job Skeleton Name, Job Alias) :param job_task_reference: The name of the job to look up values for. :return: A string representing this. ''' fn_section = '' if 'outputs' in self.tasks_dictionary[job]: return_values = [] for output in self.tasks_dictionary[job]['outputs']: output_name = output[0] output_type = output[1] output_value = output[2] if output_type == 'File': nonglob_dict = { "output_name": output_name, "expression": output_value, "out_dir": self.output_directory, "output_type": output_type } nonglob_template = heredoc_wdl(''' # output-type: {output_type} output_filename = {expression} {output_name} = process_outfile(output_filename, fileStore, tempDir, '{out_dir}') ''', nonglob_dict, indent=' ')[1:] fn_section += nonglob_template return_values.append(output_name) else: fn_section += ' {} = {}\n'.format( output_name, output_value) return_values.append(output_name) if return_values: fn_section += ' rvDict = {' for return_value in return_values: fn_section += '"{rv}": {rv}, '.format(rv=return_value) if fn_section.endswith(', '): fn_section = fn_section[:-2] if return_values: fn_section = fn_section + '}\n' if return_values: fn_section += ' print(rvDict)\n' fn_section += ' return rvDict\n\n' return fn_section
def write_function_subprocesspopen(self): ''' Write a subprocess.Popen() call for this function and write it out as a string. :param job: A list such that: (job priority #, job ID #, Job Skeleton Name, Job Alias) :return: A string representing this. ''' fn_section = heredoc_wdl(''' this_process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = this_process.communicate()\n''', indent=' ') return fn_section
def write_main_destbucket(self): ''' Writes out a loop for exporting outputs to a cloud bucket. :return: A string representing this. ''' main_section = heredoc_wdl(''' outdir = '{outdir}' onlyfiles = [os.path.join(outdir, f) for f in os.listdir(outdir) if os.path.isfile(os.path.join(outdir, f))] for output_f_path in onlyfiles: output_file = fileStore.writeGlobalFile(output_f_path) preserveThisFilename = os.path.basename(output_f_path) destUrl = '/'.join(s.strip('/') for s in [destBucket, preserveThisFilename]) fileStore.exportFile(output_file, destUrl) ''', {'outdir': self.output_directory}, indent=' ') return main_section
def write_function_cmdline(self, job): """ Write a series of commandline variables to be concatenated together eventually and either called with subprocess.Popen() or with apiDockerCall() if a docker image is called for. :param job: A list such that: (job priority #, job ID #, Job Skeleton Name, Job Alias) :return: A string representing this. """ fn_section = '\n' cmd_array = [] if 'raw_commandline' in self.tasks_dictionary[job]: for cmd in self.tasks_dictionary[job]['raw_commandline']: if not cmd.startswith("r'''"): cmd = 'str({i} if not isinstance({i}, WDLFile) else process_and_read_file({i}, tempDir, fileStore)).strip("{nl}")'.format( i=cmd, nl=r"\n") fn_section = fn_section + heredoc_wdl(''' try: # Intended to deal with "optional" inputs that may not exist # TODO: handle this better command{num} = {cmd} except: command{num} = ''\n''', { 'cmd': cmd, 'num': self.cmd_num }, indent=' ') cmd_array.append('command' + str(self.cmd_num)) self.cmd_num = self.cmd_num + 1 if cmd_array: fn_section += '\n cmd = ' for command in cmd_array: fn_section += '{command} + '.format(command=command) if fn_section.endswith(' + '): fn_section = fn_section[:-3] fn_section += '\n cmd = textwrap.dedent(cmd.strip("{nl}"))\n'.format( nl=r"\n") else: # empty command section fn_section += ' cmd = ""' return fn_section
def write_modules(self): # string used to write imports to the file module_string = heredoc_wdl( ''' from toil.job import Job from toil.common import Toil from toil.lib.docker import apiDockerCall from toil.wdl.wdl_functions import generate_docker_bashscript_file from toil.wdl.wdl_functions import select_first from toil.wdl.wdl_functions import sub from toil.wdl.wdl_functions import size from toil.wdl.wdl_functions import glob from toil.wdl.wdl_functions import process_and_read_file from toil.wdl.wdl_functions import process_infile from toil.wdl.wdl_functions import process_outfile from toil.wdl.wdl_functions import abspath_file from toil.wdl.wdl_functions import combine_dicts from toil.wdl.wdl_functions import parse_memory from toil.wdl.wdl_functions import parse_cores from toil.wdl.wdl_functions import parse_disk from toil.wdl.wdl_functions import read_string from toil.wdl.wdl_functions import read_int from toil.wdl.wdl_functions import read_float from toil.wdl.wdl_functions import read_tsv from toil.wdl.wdl_functions import read_csv from toil.wdl.wdl_functions import defined from os.path import basename import fnmatch import textwrap import subprocess import os import errno import time import shutil import shlex import uuid import logging asldijoiu23r8u34q89fho934t8u34fcurrentworkingdir = os.getcwd() logger = logging.getLogger(__name__) ''', {'jobstore': self.jobstore})[1:] return module_string
def write_function_outputreturn(self, job, docker=False): """ Find the output values that this function needs and write them out as a string. :param job: A list such that: (job priority #, job ID #, Job Skeleton Name, Job Alias) :param job_task_reference: The name of the job to look up values for. :return: A string representing this. """ fn_section = '' fn_section += heredoc_wdl(''' _toil_wdl_internal__stdout_file = generate_stdout_file(_toil_wdl_internal__stdout, tempDir, fileStore=fileStore) _toil_wdl_internal__stderr_file = generate_stdout_file(_toil_wdl_internal__stderr, tempDir, fileStore=fileStore, stderr=True) ''', indent=' ')[1:] if 'outputs' in self.tasks_dictionary[job]: return_values = [] for output in self.tasks_dictionary[job]['outputs']: output_name = output[0] output_type = output[1] output_value = output[2] if self.needs_file_import(output_type): nonglob_dict = { "output_name": output_name, "output_type": self.write_declaration_type(output_type), "expression": output_value, "out_dir": self.output_directory } nonglob_template = heredoc_wdl(''' {output_name} = {output_type}.create( {expression}, output=True) {output_name} = process_outfile({output_name}, fileStore, tempDir, '{out_dir}') ''', nonglob_dict, indent=' ')[1:] fn_section += nonglob_template return_values.append(output_name) else: fn_section += ' {} = {}\n'.format( output_name, output_value) return_values.append(output_name) if return_values: fn_section += ' rvDict = {' for return_value in return_values: fn_section += '"{rv}": {rv}, '.format(rv=return_value) if fn_section.endswith(', '): fn_section = fn_section[:-2] if return_values: fn_section = fn_section + '}\n' if return_values: fn_section += ' return rvDict\n\n' return fn_section
def write_function_header(self, job): """ Writes the header that starts each function, for example, this function can write and return: 'def write_function_header(self, job, job_declaration_array):' :param job: A list such that: (job priority #, job ID #, Job Skeleton Name, Job Alias) :param job_declaration_array: A list of all inputs that job requires. :return: A string representing this. """ fn_section = '\n\nclass {jobname}Cls(Job):\n'.format(jobname=job) fn_section += ' def __init__(self, ' if 'inputs' in self.tasks_dictionary[job]: for i in self.tasks_dictionary[job]['inputs']: var = i[0] vartype = i[1] if vartype == 'String': fn_section += '{input}="", '.format(input=var) else: fn_section += '{input}=None, '.format(input=var) fn_section += '*args, **kwargs):\n' fn_section += ' super({jobname}Cls, self).__init__(*args, **kwargs)\n'.format( jobname=job) # TODO: Resolve inherent problems resolving resource requirements # In WDL, "local-disk " + 500 + " HDD" cannot be directly converted to python. # This needs a special handler. if 'runtime' in self.tasks_dictionary[job]: runtime_resources = [] if 'memory' in self.tasks_dictionary[job]['runtime']: runtime_resources.append('memory=memory') memory = self.tasks_dictionary[job]['runtime']['memory'] fn_section += ' memory=parse_memory({})\n'.format( memory) if 'cpu' in self.tasks_dictionary[job]['runtime']: runtime_resources.append('cores=cores') cores = self.tasks_dictionary[job]['runtime']['cpu'] fn_section += ' cores=parse_cores({})\n'.format(cores) if 'disks' in self.tasks_dictionary[job]['runtime']: runtime_resources.append('disk=disk') disk = self.tasks_dictionary[job]['runtime']['disks'] fn_section += ' disk=parse_disk({})\n'.format(disk) runtime_resources = ['self'] + runtime_resources fn_section += ' Job.__init__({})\n\n'.format( ', '.join(runtime_resources)) if 'inputs' in self.tasks_dictionary[job]: for i in self.tasks_dictionary[job]['inputs']: var = i[0] var_type = i[1] var_expressn = i[2] json_expressn = self.json_var(task=job, var=var) # json declarations have priority and can overwrite # whatever is in the wdl file if json_expressn is not None: var_expressn = json_expressn if var_expressn is None: # declarations from workflow fn_section += ' self.id_{} = {}\n'.format(var, var) else: # declarations from a WDL or JSON file fn_section += ' self.id_{} = {}.create(\n {})\n'\ .format(var, self.write_declaration_type(var_type), var_expressn) fn_section += heredoc_wdl(''' def run(self, fileStore): fileStore.logToMaster("{jobname}") tempDir = fileStore.getLocalTempDir() _toil_wdl_internal__stdout_file = os.path.join(tempDir, 'stdout') _toil_wdl_internal__stderr_file = os.path.join(tempDir, 'stderr') try: os.makedirs(os.path.join(tempDir, 'execution')) except OSError as e: if e.errno != errno.EEXIST: raise ''', {'jobname': job}, indent=' ')[1:] if 'inputs' in self.tasks_dictionary[job]: for i in self.tasks_dictionary[job]['inputs']: var = i[0] var_type = i[1] docker_bool = str(self.needsdocker(job)) if self.needs_file_import(var_type): args = ', '.join([ f'abspath_file(self.id_{var}, _toil_wdl_internal__current_working_dir)', 'tempDir', 'fileStore', f'docker={docker_bool}' ]) fn_section += ' {} = process_and_read_file({})\n'.format( var, args) else: fn_section += ' {} = self.id_{}\n'.format(var, var) return fn_section
def updateStaticEC2Instances(): """ Generates a new python file of fetchable EC2 Instances by region with current prices and specs. Takes a few (~3+) minutes to run (you'll need decent internet). :return: Nothing. Writes a new 'generatedEC2Lists.py' file. """ logger.info( "Updating Toil's EC2 lists to the most current version from AWS's bulk API. " "This may take a while, depending on your internet connection.") dirname = os.path.dirname(__file__) # the file Toil uses to get info about EC2 instance types origFile = os.path.join(dirname, 'generatedEC2Lists.py') assert os.path.exists(origFile) # use a temporary file until all info is fetched genFile = os.path.join(dirname, 'generatedEC2Lists_tmp.py') assert not os.path.exists(genFile) # will be used to save a copy of the original when finished oldFile = os.path.join(dirname, 'generatedEC2Lists_old.py') # provenance note, copyright and imports with open(genFile, 'w') as f: f.write( heredoc_wdl( ''' # !!! AUTOGENERATED FILE !!! # Update with: src/toil/utils/toilUpdateEC2Instances.py # # Copyright (C) 2015-{year} UCSC Computational Genomics Lab # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from six import iteritems from toil.lib.ec2nodes import InstanceType\n\n\n''', dictionary={'year': datetime.date.today().strftime("%Y")})) currentEC2List = [] instancesByRegion = {} for regionNickname, _ in iteritems(EC2Regions): currentEC2Dict = fetchEC2InstanceDict(regionNickname=regionNickname) for instanceName, instanceTypeObj in iteritems(currentEC2Dict): if instanceTypeObj not in currentEC2List: currentEC2List.append(instanceTypeObj) instancesByRegion.setdefault(regionNickname, []).append(instanceName) # write header of total EC2 instance type list genString = "# {num} Instance Types. Generated {date}.\n".format( num=str(len(currentEC2List)), date=str(datetime.datetime.now())) genString = genString + "E2Instances = {\n" sortedCurrentEC2List = sorted(currentEC2List, key=lambda x: x.name) # write the list of all instances types for i in sortedCurrentEC2List: z = " '{name}': InstanceType(name='{name}', cores={cores}, memory={memory}, disks={disks}, disk_capacity={disk_capacity})," \ "\n".format(name=i.name, cores=i.cores, memory=i.memory, disks=i.disks, disk_capacity=i.disk_capacity) genString = genString + z genString = genString + '}\n\n' genString = genString + 'regionDict = {\n' for regionName, instanceList in iteritems(instancesByRegion): genString = genString + " '{regionName}': [".format( regionName=regionName) for instance in sorted(instanceList): genString = genString + "'{instance}', ".format(instance=instance) if genString.endswith(', '): genString = genString[:-2] genString = genString + '],\n' if genString.endswith(',\n'): genString = genString[:-len(',\n')] genString = genString + '}\n' with open(genFile, 'a+') as f: f.write(genString) # append key for fetching at the end regionKey = '\nec2InstancesByRegion = dict((region, [E2Instances[i] for i in instances]) for region, instances in iteritems(regionDict))\n' with open(genFile, 'a+') as f: f.write(regionKey) # preserve the original file unless it already exists if not os.path.exists(oldFile): os.rename(origFile, oldFile) # delete the original file if it's still there if os.path.exists(origFile): os.remove(origFile) # replace the instance list with a current list os.rename(genFile, origFile)
def write_function_header(self, job): ''' Writes the header that starts each function, for example, this function can write and return: 'def write_function_header(self, job, job_declaration_array):' :param job: A list such that: (job priority #, job ID #, Job Skeleton Name, Job Alias) :param job_declaration_array: A list of all inputs that job requires. :return: A string representing this. ''' fn_section = '\n\nclass {jobname}Cls(Job):\n'.format(jobname=job) fn_section += ' def __init__(self, ' if 'inputs' in self.tasks_dictionary[job]: for i in self.tasks_dictionary[job]['inputs']: var = i[0] vartype = i[1] if vartype == 'String': fn_section += '{input}="", '.format(input=var) else: fn_section += '{input}=None, '.format(input=var) fn_section += '*args, **kwargs):\n' if 'runtime' in self.tasks_dictionary[job]: runtime_resources = [] if 'memory' in self.tasks_dictionary[job]['runtime']: runtime_resources.append('memory=memory') memory = self.tasks_dictionary[job]['runtime']['memory'] fn_section += ' memory=parse_memory({})\n'.format( memory) if 'cpu' in self.tasks_dictionary[job]['runtime']: runtime_resources.append('cores=cores') cores = self.tasks_dictionary[job]['runtime']['cpu'] fn_section += ' cores=parse_cores({})\n'.format(cores) if 'disks' in self.tasks_dictionary[job]['runtime']: runtime_resources.append('disk=disk') disk = self.tasks_dictionary[job]['runtime']['disks'] fn_section += ' disk=parse_disk({})\n'.format(disk) runtime_resources = ['self'] + runtime_resources fn_section += ' Job.__init__({})\n\n'.format( ', '.join(runtime_resources)) if 'inputs' in self.tasks_dictionary[job]: for i in self.tasks_dictionary[job]['inputs']: var = i[0] var_expressn = i[2] json_expressn = self.json_var(task=job, var=var) # json declarations have priority and can overwrite # whatever is in the wdl file if json_expressn: var_expressn = json_expressn if not var_expressn: var_expressn = var fn_section += ' self.id_{} = {}\n'.format( var, var_expressn) fn_section += heredoc_wdl(''' super({jobname}Cls, self).__init__(*args, **kwargs) def run(self, fileStore): fileStore.logToMaster("{jobname}") tempDir = fileStore.getLocalTempDir() try: os.makedirs(os.path.join(tempDir, 'execution')) except OSError as e: if e.errno != errno.EEXIST: raise ''', {'jobname': job}, indent=' ')[1:] if 'inputs' in self.tasks_dictionary[job]: for i in self.tasks_dictionary[job]['inputs']: var = i[0] var_type = i[1] docker_bool = str(self.needsdocker(job)) if var_type == 'File': fn_section += ' {} = process_and_read_file(abspath_file(self.id_{}, asldijoiu23r8u34q89fho934t8u34fcurrentworkingdir), tempDir, fileStore, docker={})\n'.format( var, var, docker_bool) else: fn_section += ' {} = self.id_{}\n'.format(var, var) return fn_section