Esempio n. 1
0
    def getLogicalRecordCount(self, arg_filename):
        """
        get an approximate logical record count for a fasta file 
        """
        filenames = [arg_filename]
        if self.isListFile(arg_filename):
            with open(arg_filename, "r") as file_list:
                filenames = [record.strip() for record in file_list]

        record_count = 0
        for filename in filenames:
            try:
                count_command = ["kseq_count", "-a", filename]
                self.logWriter.info("getLogicalRecordCount executing: %s" %
                                    " ".join(count_command))
                proc = subprocess.Popen(count_command,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE)
                (stdout, stderr) = proc.communicate()
                record_count += int(stdout)
            except Exception as e:
                self.logWriter.info(
                    "getLogicalRecordCount -failed getting logical record count : %s"
                    % str(e))
                raise tutils.tardisException(
                    "getLogicalRecordCount -failed getting logical record count : %s"
                    % str(e))

        self.logWriter.info(
            "getLogicalRecordCount estimates there are %d records in %s" %
            (record_count, arg_filename))
        return record_count
Esempio n. 2
0
    def runCommand(self, argCommand=None):
        command = argCommand
        if argCommand is None:
            command = self.command
        
        if len(command) > 0:
            self.logWriter.info("condorhpcJob : running %s"%str(command))

            # set up the shell scriptfile(s) (one per chunk) (unless this is a rerun in which case its already been done)
            if self.submitCount == 0:
                runtime_environmentcode = self.runtime_config_template.safe_substitute() # currently no templating actually done here                
                shellcode = self.shell_script_template.safe_substitute(configure_runtime_environment=runtime_environmentcode,\
                                                                       hpcdir=self.workingRoot,command=string.join(self.command," "),\
                                                                       startdir=self.controller.options["startdir"],
                                                                       input_conditioning=str(self.controller.options["input_conditioning"]))
                
                self.scriptfilename = os.path.join(self.workingRoot, "run%d.sh"%self.jobNumber)
                if os.path.isfile(self.scriptfilename):
                    raise tutils.tardisException("error %s already exists"%self.scriptfilename)
                f=open(self.scriptfilename,"w")
                self.logWriter.info("condorhpcJob : condor shell script wrapper is %s"%self.scriptfilename)
                f.writelines(shellcode)
                f.close()
                os.chmod(self.scriptfilename, stat.S_IRWXU | stat.S_IRGRP |  stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH )


            # set up the condor jobfile  (one per chunk) (unless already done)
            if self.submitCount == 0:
                self.logname=re.sub("\.sh$",".log",self.scriptfilename)
                self.stderrnamepattern = "%s\.err\.\S+$"%re.escape(os.path.basename(self.scriptfilename))
                self.stdoutnamepattern = "%s\.out\.\S+$"%re.escape(os.path.basename(self.scriptfilename))
                self.jobfilename=re.sub("\.sh$",".job",self.scriptfilename)
                jobcode = self.job_template.safe_substitute(script=self.scriptfilename,log=self.logname,rundir=self.workingRoot)
                self.logWriter.info("condorhpcJob : condor job file is %s"%self.jobfilename)
                f=open(self.jobfilename,"w")
                f.writelines(jobcode)
                f.close()

            # submit the condor job 
            condor_submit = ["condor_submit", self.jobfilename]
            if self.controller.options["dry_run"] :
                self.logWriter.info("condorhpcJob : this is a dry run - not launching the job")
            else:
                self.logWriter.info("condorhpcJob : launching using %s"%str(condor_submit))
                self.proc = subprocess.Popen(condor_submit,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                self.submitCount += 1
                (self.stdout, self.stderr) = self.proc.communicate()
                self.submitreturncode = self.proc.returncode                
                self.logWriter.info("condorhpcJob : %s has returned (status %s) - here is its output (but now we wait for the real output !)"%(str(condor_submit), self.submitreturncode))
                self.logWriter.info("condorhpcJob : stdout : \n%s"%self.stdout)
                self.logWriter.info("condorhpcJob : stderr : \n%s"%self.stderr)
                    
        else:
	    self.logWriter.info("condorhpcJob : nothing to do")
Esempio n. 3
0
def tardis_main():
    parser = argparse.ArgumentParser(description='Condition a command for execution on a cluster.')
    parser.add_argument('-w', '--in-workflow', dest='in_workflow', action='store_const', const=True, help='Run the command as part of a workflow. After launching all of the jobs, tardis waits for all outputs, which are then collated and merged into a single output file, as specified by the output file path in the original command; all of the temporary input files (for example chunks of uncompressed fastq) are deleted provided all prior steps completed without error (if there was an error they are left there to assist with debugging). Without this option, the program exits immediately after launching all of the jobs, and output is left un-collated in the scratch folder created by this script, and no cleanup is done.')
    parser.add_argument('-c', '--chunksize', dest='chunksize', type=int, metavar='N', help='When conditioning the input file(s), split into files each containing N logical records. (A logical record for a sequence file is a complete sequence. For a text file it is a line of text). (If the -s option is used to sample the inputs, the chunksize relates to the full un-sampled file . so the same chunk-size can be used whether random sampling or not. For example a chunksize of 1,000,000 is specified in combination with a sampling rate of .0001, then each chunk would contain 100 sequences . i.e. you should not adjust the chunk-size, for the sampling rate. Note that to avoid a race-condition that could be caused by a very small chunk-size resulting in launching a very large number of jobs, tardis will throw an exception if the chunk-size used would result in launching more than MAX_DIMENSION jobs (currently 5000) )')
    parser.add_argument('--from', '--from-record', dest='from_record', type=int, metavar='N', help='When conditioning the input file(s), only use records from the input file after or including N (where that is logical record number . e.g. in a fastq file, start from record number N means start from sequence N). By combining this option with -to, you can process slices of a file. Note that this option has no affect when processing a list-file.')
    parser.add_argument('--to', '--to-record', dest='to_record', type=int, metavar='N', help='When conditioning the input file(s), only use records up to and including the record N (where that is logical record number . e.g. in a fastq file, process up to record number N means process up to and including sequence N). By combining this option with    -from, you can process slices of a file. Note that this option has no affect when processing a list-file.')
    parser.add_argument('-s', dest='samplerate', type=float, metavar='RATE', help='Rather than process the entire input file(s), a random sample of the records is processed. RATE is the probability that a given record will be sampled. For example -s .001 will result in roughly 1 in every 1000 logical records being sampled.  When the -s option is specified, tardis does not clean up the conditioned input and output . e.g. all of the uncompressed fastq sample fragments would be retained. These are retained to assist with the Q/C work that is normally associated with a sampled run. Paired fastq input files are sampled in lock-step, provided the paired fastq conditioning directive is used for both files.')
    parser.add_argument('-d', '--rootdir', dest='rootdir', type=str, metavar='DIR', help='create the tardis working folder under DIR. If no working root is specified, a default location is used.')
    parser.add_argument('--dry-run', dest='dry_run', action='store_const', const=True, help='validate the run by doing a dry run. This means that the chunks, job scripts and job files etc. are all generated but the jobs are not launched. The user can start then kill (CTRL-C) the run, inspect the script and job files that were generated to check that their command has been conditioned as envisaged.')
    parser.add_argument('-k', '--keep-conditioned-data', dest='keep_conditioned_data', action='store_const', const=True, help='keep the conditioned input and output - i.e. the input and output fragments. Normally in workflow mode these are deleted after the output is successfully "unconditioned" - i.e. joined back together')
    parser.add_argument('--job-file', dest='jobtemplatefile', type=str, metavar='FILE', help='optionally supply a job template - tardis will read the contents of FILE and use this as the job template.')
    parser.add_argument('--templatedir', dest='templatedir', type=str, metavar='DIR', help='template directory')
    parser.add_argument('--job-template-name', dest='job_template_name', type=str, metavar='NAME', help='job template name, resolved in template directory')
    parser.add_argument('--hpctype', dest='hpctype', type=str, help='indicate the hpc environment. Currently the only supported values are: condor which results in tardis attempting to set up and launch condor jobs; local which results in each job being launched by tardis itself on the local machine, using the native python sub-process API. The maximum number of processes it will run at a time is controlled by a global variable in the script MAX_PROCESSES, which is initially 10; slurm which results in tardis attempting to set up and launch slurm jobs.')
    parser.add_argument('--batonfile', dest='batonfile', type=str, metavar='FILE', help='if you supply a "baton file" FILE, tardis will write the process exit code to this file after all processing has completed. This can be useful to preserve synchronous execution of a workflow, even if tardis is started in the background - the workflow can test the existence of the batonfile - if it exists then the corresponding tardis processing step has completed (i.e. another way of each step in a workflow "passing the baton" to the next step)')
    parser.add_argument('--shell-include-file', '--runtimeconfigsourcefile', dest='runtimeconfigsourcefile', type=str, metavar='FILE', help='shell script fragment included in jobs')
    parser.add_argument('-q', '--quiet', dest='quiet', action='store_const', const=True, help='run quietly')
    parser.add_argument('--userconfig', dest='userconfig', metavar='FILE', help='user configuration file')
    parser.add_argument('--no-sysconfig', dest='no_sysconfig', action='store_true', default=False, help='ignore the system configuration file')
    parser.add_argument('command', help='command to run')
    parser.add_argument('arg', nargs=argparse.REMAINDER, help='command arguments')

    args = parser.parse_args()
    options = dict((k,v) for k,v in vars(args).iteritems() if v is not None and k != 'command')

    # filter command args
    command_args = [args.command] + args.arg
    for arg in command_args:
        if re.search("[\!\&]|(?<!\w)rm(?!\w)|(?<!\w)mv(?!\w)|(?<!\w)cp(?!\w)", arg) != None: # do not allow irrelevant/dangerous shell chars
            raise tardisException("error : dangerous argument to shell ( %s ) - will not run this"%arg)
            #args.remove(arg)
    if len(args.command) < 1:
        raise tardisException("please supply a valid command to condition and run (type tardis -h for usage")

    try:
        options = tutils.mergeOptionsWithConfig(options)
    except tutils.tardisException, msg:

        print >> sys.stderr, msg
        return 2
Esempio n. 4
0
def run(toolargs, options, stdout = sys.stdout, stderr=sys.stderr, checkCommandIsValid = True):
    # some merging / prioritisation of options is needed in some cases.
    msg_for_log=None # we don't have a logger yet - will log this later when we do
    if options.get("job_template_name",None) is not None and options.get("jobtemplatefile",None) is not None:
        msg_for_log= "warning - a job template filename was specified (%s) - this overrides the job template name specified (%s)"%(options.get("jobtemplatefile",None), options.get("job_template_name",None))
        del options["job_template_name"]


    # if we check the command is supported on this tardis
    if checkCommandIsValid:
        if not isCommandValid(toolargs, options["valid_command_patterns"]):
            print >> stderr, "%s is not supported by this tardis engine"%toolargs[0]            
            return 2


    #print "using %s"%str(options)
    if not options["quiet"]:
        print "tool args = %s"%str(toolargs)


    # set up logging and working folder for this run
    (l,workingRoot) = factory.hpcConditioner.getLogger(options)
    logger = tutils.tardisLogger(l)

    # log msg_for_log if we have one 
    if msg_for_log is not None:
        logger.info(msg_for_log)


    if not options["quiet"]:                                                     
        print "tardis.py : logging this session to %s"%workingRoot

    logger.info("tardis options : " + str(options))
    
    c = factory.hpcConditioner(logger,workingRoot,options,toolargs)
    c.options = options
    c.logWriter.info("tardis.py : logging this session to %s"%workingRoot)        
    #c.logWriter.info("using %s"%str(options))
    c.logWriter.info("tool args = %s"%str(toolargs))
    

    # create a prototype data conditioner. This won't actually do any data conditioning,
    # but will be used to induct subsequent conditioners, by passing on shared
    # shared objects
    dcPrototype=data.dataConditioner()
    dcPrototype.logWriter = logger 
    dcPrototype.workingRoot = workingRoot
    dcPrototype.jobcontroller = c    
    dcPrototype.logWriter.info("prototype dataConditioner created")
    dcPrototype.options = options

    #hpcConditioner.logWriter.info("main : requesting conditioned commands")
    conditionedCommandIter = c.getConditionedCommandGenerator(dcPrototype)
    conditionedInputGenerators = dcPrototype.getConditionedInputGenerators()    
    for conditionedInputs in conditionedInputGenerators:
        dcPrototype.distributeAvailableInputs(conditionedInputs)
        cmd = conditionedCommandIter.next()

        c.logWriter.info("setting up job for conditioned command : %s"%str(cmd)) 
        job = c.gethpcJob(cmd)
        job.runCommand()


        # check for partially submitted jobs here in case we are rate limited - otherwise we will have to wait until all chunks
        # have been written. This will also do a wait on the jobs that are running
        if c.hpcClass == local.localhpcJob:
            c.logWriter.info("(running jobs locally and there are %d partially submitted jobs)"%len(c.getUnsubmittedJobs()))
            if len(c.getUnsubmittedJobs()) > 0:
                c.retryJobSubmission(maxRetries = 1, retryPause = 1)

    # for some hpc types (e.g. slurm array jobs) , runCommand does not actually run the command, it
    # just sets up the comamnd. Thse are then all batch submitted here : 
    c.launchArrayJobs()
    
                    

    # if in a workflow, or conditioning output, and not a dry run , poll for results
    if (options["in_workflow"] or len(dcPrototype.outputUnconditioners) > 0) and not options["dry_run"] :
        c.logWriter.info("tardis.py : done setting up jobs - polling for results (and submitting any queued jobs)")
        for dc in dcPrototype.outputUnconditioners:    # (if in a workflow and no unconditioners were specified, then a default one
                                                           # will have been created)

            # results are sent to each output unconditioner
            # clear sent flag
            for job in c.jobList:
                job.sent = False

            poll_count = 0
            while True:
                poll_count +=1
                if poll_count * hpc.hpcJob.POLL_INTERVAL > hpc.hpcJob.POLL_DURATION:
                    raise tardisException("error in tardis.py session - bailing out as we have been hanging around waiting for output for far too long ! ")
                
                
                unsentJobs = [ job for job in c.jobList if not job.sent ]
                if len(unsentJobs) == 0:
                    break

                # retry jobs here in case we are rate limited
                if len(c.getUnsubmittedJobs()) > 0:
                    c.logWriter.info("(there are %d partially submitted jobs)"%len(c.getUnsubmittedJobs()))
                    c.retryJobSubmission(maxRetries = hpc.hpcJob.SUBMIT_RETRIES, retryPause = hpc.hpcJob.SUBMIT_RETRY_PAUSE)

                sent_count = 0 # count how many jobs just finished 
                for unsentJob in unsentJobs:
                    unsentJob.sendAvailableOutput(dc.outputCollector, dc.productCollector)
                    if unsentJob.sent:
                        sent_count += 1

                # if no jobs just finished , wait for awhile , otherwise go back for more output immediately
                if sent_count == 0:
                    time.sleep(hpc.hpcJob.POLL_INTERVAL)
                    



        c.logWriter.info("%s output unconditioners are unconditioning"%len(dcPrototype.outputUnconditioners))
        # uncondition all output 
        for dc in dcPrototype.outputUnconditioners:
            dc.unconditionOutput()

            
        # only remove the conditioned output if we are in a workflow and we are not sampling  and no error state was set , and KEEP_CONDITIONED_DATA is
        # not set
        if options["in_workflow"] and options["samplerate"] is None and dcPrototype.getDataResultState() == data.dataConditioner.OK and \
                   c.getJobResultState()  == hpc.hpcJob.OK and not options["keep_conditioned_data"]:
            for dc in dcPrototype.outputUnconditioners:            
                dc.logWriter.info("removing conditioned output") 
                dc.removeConditionedOutput()
        else:
            c.logWriter.info("either not in workflow or sampling , or error state set , not removing conditioned output")


        # stream the output from all jobs to stdout of this job
        c.unconditionJobStreams(stdout,stderr)

                                               
        # do not uncondition input if sampling , or if options["keep_conditioned_data"] is set, or if an error state has been set
        if options["samplerate"] is None and  dcPrototype.getDataResultState() == data.dataConditioner.OK \
                   and c.getJobResultState()  == hpc.hpcJob.OK and not options["keep_conditioned_data"]:

            c.logWriter.info("%s input conditioners are unconditioning the following files : %s"%\
                                           (len(dcPrototype.getDistinctInputConditioners()),\
                                            string.join([dc.inputFileName for dc in dcPrototype.getDistinctInputConditioners()]," , ")\
                                            )\
                                           )
            c.logWriter.info("unconditioning input")
            
            for dc in dcPrototype.getDistinctInputConditioners():
                dc.removeConditionedInput()
        else:
            c.logWriter.info("not unconditioning input as either sampling was specified, or keep conditioned input was set, or error state is set due to a previous error")


    else:
        c.logWriter.info("tardis.py : not in a workflow and no output unconditioners (or this is a dry run) - exiting")


    if dcPrototype.getDataResultState() == data.dataConditioner.OK and c.getJobResultState()  == hpc.hpcJob.OK :
        c.logWriter.info("tardis.py : done logging this session to %s , no errors detected"%workingRoot)
        if not options["quiet"]:
            print "tardis.py : done logging this session to %s , no errors detected"%workingRoot
        if len(c.getJobResultStateDescription()) > 0:
            c.logWriter.info(c.getJobResultStateDescription())
            if not options["quiet"]:
                print c.getJobResultStateDescription()
                print >> stderr, c.getJobResultStateDescription()        
        if len(dcPrototype.getDataResultStateDescription()) > 0:
            c.logWriter.info(dcPrototype.getDataResultStateDescription())
            if not options["quiet"]:
                print dcPrototype.getDataResultStateDescription()
                print >> stderr, dcPrototype.getDataResultStateDescription()
        return (0,c)
    else:
        c.logWriter.info("tardis.py : done logging this session to %s. NOTE : some errors were logged"%workingRoot)
        if not options["quiet"]:
            print "tardis.py : done logging this session to %s. NOTE : some errors were logged"%workingRoot
        if len(c.getJobResultStateDescription()) > 0:
            c.logWriter.info(c.getJobResultStateDescription())
            if not options["quiet"]:
                print c.getJobResultStateDescription()
                print >> stderr, c.getJobResultStateDescription()        
        if len(dcPrototype.getDataResultStateDescription()) > 0:
            c.logWriter.info(dcPrototype.getDataResultStateDescription())
            if not options["quiet"]:
                print dcPrototype.getDataResultStateDescription()        
                print >> stderr, dcPrototype.getDataResultStateDescription()
            
        return (2,c)
Esempio n. 5
0
    def launchArrayJobs(self):
        """
        this is only applicable to slurm jobs. This is called after the job scripts have all been
        created. One or more array jobs are launched (more than one , if the number of jobs is
        > SLURM_MAXARRAYSIZE=1000. The array job looks roughly like this :
#!/bin/bash -e

#SBATCH -J $tardis_job_moniker
#SBATCH -A $tardis_account_moniker        # Project Account
#SBATCH --time=20:00:00            # Walltime
#SBATCH --ntasks=1                 # number of parallel processes
#SBATCH --ntasks-per-socket=1      # number of processes allowed on a socket
#SBATCH --cpus-per-task=4          #number of threads per process
#SBATCH --hint=multithread         # enable hyperthreading
#SBATCH --mem-per-cpu=8G
#SBATCH --partition=inv-iranui     # Use nodes in the IRANUI partition
#SBATCH --array=1-$array_size%50          # Iterate 1 to N, but only run up to 50 concurrent runs at once
#SBATCH --error=$script-%A_%a.err
#SBATCH --output=$script-%A_%a.out

srun --cpu_bind=v,threads ${SLURM_ARRAY_TASK_ID}        
        """
        # "slurm_array_job" is launched by sbatch , and internally launches a shim script, passing
        # to it the index of the job to run. The shim then just executes run1.sh, run2.sh
        # - which are instances of "slurm_shell".

        if self.hpcClass != slurm.slurmhpcJob:
            return

        # write the slurm array shim to the working folder
        slurm_array_shim = string.Template(
            tutils.getTemplateContent(self.options,
                                      "slurm_array_shim",
                                      logWriter=self.logWriter))
        shimcode = slurm_array_shim.safe_substitute(hpcdir=self.workingRoot)
        shim_file_name = os.path.join(self.workingRoot, "slurm_array_shim.sh")
        f = open(shim_file_name, "w")
        self.logWriter.info("hpcConditioner : writing array shim")
        f.writelines(shimcode)
        f.close()
        os.chmod(
            shim_file_name, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP
            | stat.S_IROTH | stat.S_IXOTH)

        # write one or more array job files
        # figure out a job template from the options. (You can specify one of the inbuilt templates by name, or
        # supply a file containing a custom template). If nothign supplied at all we use hard coded default
        job_template_name = self.options.get("job_template_name", None)
        job_template_filename = self.options.get("jobtemplatefile", None)
        if job_template_name is None and job_template_filename is None:
            #use the default condor job template
            job_template_name = "default_slurm_array_job"

        if job_template_name is not None and job_template_filename is not None:
            raise tutils.tardisException(
                "error both job_template_name (%s) and job_template_filename (%s) defined - only define one of these"
                % (job_template_name, job_template_filename))

        if job_template_name is not None:
            job_template = tutils.getTemplateContent(self.options,
                                                     job_template_name,
                                                     logWriter=self.logWriter)
        else:
            if not os.path.isfile(job_template_filename):
                raise tutils.tardisException(
                    "error job template file %s not found" %
                    job_template_filename)
            job_template = string.join(file(job_template_filename, "r"), "")

        if job_template is None:
            raise tutils.tardisException(
                "hpcConditioner: Error job template is null after templating")
        job_template = string.Template(job_template)

        n_launched = 0
        while n_launched < len(self.jobList):
            n_launch = min(SLURM_MAXARRAYSIZE, len(self.jobList) - n_launched)

            arraycode  = job_template.safe_substitute(tardis_job_moniker=self.toolargv[0], tardis_account_moniker=os.environ['LOGNAME'],\
                                                                 array_start=str(n_launched+1),array_stop=str(n_launched+n_launch),\
                                                                 hpcdir=self.workingRoot)
            array_jobfile_name = os.path.join(
                self.workingRoot,
                "array_%d-%d.slurm" % (n_launched + 1, n_launched + n_launch))
            f = open(array_jobfile_name, "w")
            self.logWriter.info("hpcConditioner : writing array job %s" %
                                array_jobfile_name)
            f.writelines(arraycode)
            f.close()
            os.chmod(
                array_jobfile_name, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP
                | stat.S_IROTH | stat.S_IXOTH)

            # launch if we need to
            if self.options["dry_run"]:
                self.logWriter.info(
                    "slurmhpcJob : this is a dry run - not launching the job")
            else:
                slurm_submit = ["sbatch", "-v", array_jobfile_name]
                self.logWriter.info("slurmhpcJob : launching using %s" %
                                    str(slurm_submit))
                proc = subprocess.Popen(slurm_submit,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE)
                (stdout, stderr) = proc.communicate()
                submitreturncode = proc.returncode
                if submitreturncode == 0:
                    self.logWriter.info(
                        "slurmhpcJob : %s has returned (status %s) - here is its output (but now we wait for the real output !)"
                        % (str(slurm_submit), submitreturncode))
                    self.logWriter.info("slurmhpcJob : stdout : \n%s" % stdout)
                    self.logWriter.info("slurmhpcJob : stderr : \n%s" % stderr)
                else:
                    self.logWriter.info(
                        "slurmhpcJob : error %s has returned status %s !)" %
                        (str(slurm_submit), submitreturncode))
                    self.logWriter.info("slurmhpcJob : stdout : \n%s" % stdout)
                    self.logWriter.info("slurmhpcJob : stderr : \n%s" % stderr)
                    self.logWriter.info(
                        "slurmhpcJob : giving up, the array job spec may have bugs ?"
                    )
                    raise tutils.tardisException("hpcConditioner : %s" %
                                                 stderr)

            n_launched += n_launch
Esempio n. 6
0
def _slow_get_conditioned_filenames(caller, filename1, argchunksize, outdir, informat = "text", outformat = "text", samplerate = None ,filename2=None,  pairBond = None,\
                            listfilename1 = None, listfilename2 = None, length_bounds = (None,None), record_filter_func=None, from_record = None, to_record = None):
    """
    A generator.
    Split up a generic or structured text file, and return fragments as they become available, via yield.
    Structures supported are fasta, fastq (and text). Based on an original stand-alone
    script "slice_fastq.py" - updated to support paired fastq files and brought "in-house" to tardis, and
    implemented as a generator so that we can get chunknames and launch jobs , as the chunks become
    available. 
    Returns a tuple : ((inputfilename1, inputfilename2), (fragmentname1, fragmentname2))
    (The input names are returned as well, so that consumers of this generator know which
    original name each fragment relates to)
    The first element of the sub-tuples contains original / fragment-filenames obtained by (optionally uncompressing and)
    splitting up the first file.
    The second element of the sub-tuple contains either None, if there was only one file to process, or corresponding fragment
    filenames obtained by splitting up the second file in synch with the first file.
    
    The pairBond argument is a function (usually a lambda), which is applied to each pair of records
    from filename1 and filename2 , when processing two files. It tests whether they are in synch. For example
    for paired fastq files, this function could be "lambda x,y: x.name == y.name". The function should return True
    if a pair of records are in synch, or False if not. If the function returns False, then an exception is raised
    as this is unrecoverable - it indicates the pair of files are incompatible (e.g. - may indicate an upstream
    error in trimming of paired read files )
    """

    #named indexes
    LOWER = 0
    UPPER = 1

    # if chunksize zero yield empty chunknames and stop
    if argchunksize == 0:
        yield ((filename1, filename2), (None, None))
        raise StopIteration

    # some arg checks
    if filename2 is None and pairBond != None:
        caller.logWriter.info(
            "getConditionedFilenames : warning pairBond function ignored, no second file"
        )


    caller.logWriter.info("getConditionedFilenames : conditioning %s to %s chunksize %d informat %s outformat %s samplerate %s from %s to %s file2 %s"%(filename1, outdir, \
                                                                                            argchunksize , informat, outformat, samplerate, from_record, to_record , filename2))

    chunknames1 = []
    chunknames2 = []

    # adjust chunksize if we are sampling
    chunksize = argchunksize
    if samplerate != None:
        chunksize = int(.5 + samplerate * argchunksize)
        if argchunksize > 0 and chunksize == 0:
            caller.error(
                "error - chunksize was rounded to zero after adjusting for sampling - please specify a chunksize which ignores your sampling rate (it will be adjusted later)"
            )
            raise StopIteration

    # open infiles
    (infile1, uncompressedName1
     ) = textDataConditioner.getUncompressedFilestream(filename1)
    infile2 = None

    if filename2 != None:
        (infile2, uncompressedName2
         ) = textDataConditioner.getUncompressedFilestream(filename2)

    #if chunksize != 0:
    chunk = 1
    chunksYieldedCount = 0

    chunkname1 = os.path.basename(uncompressedName1)
    if filename2 != None:
        chunkname2 = os.path.basename(uncompressedName2)

    #print "DEBUG %s %s"%(uncompressedName1, chunkname1)

    # set up iterators over structured input records
    iter1 = None
    iter2 = None
    if informat in ("fastq", "fasta"):
        from Bio import SeqIO
        iter1 = SeqIO.parse(infile1, informat)
        if infile2 != None:
            iter2 = SeqIO.parse(infile2, informat)
    elif informat == "text":
        iter1 = infile1
        if infile2 != None:
            iter2 = infile2
    else:
        caller.error("unsupported input file format : %s" % informat)
        caller.logWriter.info("unsupported input file format : %s" % informat)
        raise StopIteration
        #raise tardisException("unsupported input file format : %s"%informat)

    # if we have a record filter, make iterators to apply this
    # ( currently only support a single filter - i.e. can't specify a different one for each pair)
    if record_filter_func is not None:
        caller.logWriter.info("inserting filter function")
        iter1 = (record_filter_func(unfiltered) for unfiltered in iter1)
        if iter2 is not None:
            iter2 = (record_filter_func(unfiltered) for unfiltered in iter2)

    # if there are two iterators zip them up to make an iterator over paired input. Else
    # make a paired iterator with the second iterator being a dummy repeat returning None
    piter = iter1
    if iter1 != None and iter2 != None:
        piter = itertools.izip(iter1, iter2)
    else:
        piter = itertools.izip(iter1, itertools.repeat(None))

    # output  !
    output_count = 0
    input_count = 0
    outfile1 = None
    outfile2 = None
    record1 = None
    record2 = None
    try:
        for (record1, record2) in piter:
            input_count += 1

            # will sample the output if needed
            sampleBool = tutils.getSampleBool(
                samplerate)  # 1 or 0 (always 1 if not sampling)

            # will length-filter the output if needed.
            if length_bounds != (None, None):
                for check_record in (record1, record2):
                    if check_record is not None:
                        if length_bounds[LOWER] is not None:
                            if len(check_record) < length_bounds[LOWER]:
                                sampleBool = 0
                        if length_bounds[UPPER] is not None:
                            if len(check_record) > length_bounds[UPPER]:
                                sampleBool = 0

            # will slice the file(s) if required
            if from_record is not None:
                if input_count < from_record:
                    sampleBool = 0

            if to_record is not None:
                if input_count > to_record:
                    sampleBool = 0

            if sampleBool != 1:
                continue

            output_count += sampleBool

            if chunksize > 0:
                mychunk = 1 + int(output_count / (1.0 * chunksize))
            else:
                mychunk = chunk

            # open a chunkfile if we need one
            if outfile1 is None:
                #outfilename1 =  os.path.join(outdir, "%s.%05d.%s"%(chunkname1, chunk, outformat))
                #outfilename1 =  os.path.join(outdir, "%s.%05d"%(chunkname1, chunk))
                name_parts = os.path.splitext(chunkname1)
                outfilename1 = os.path.join(
                    outdir,
                    "%s.%05d%s" % (name_parts[0], chunk, name_parts[1]))

                #print "DEBUG : %s %s"%(outdir, outfilename1)
                if os.path.exists(outfilename1):
                    #raise tardisException("getConditionedFilenames : error - %s already exists"% outfilename1)
                    caller.error(
                        "getConditionedFilenames : error - %s already exists" %
                        outfilename1)
                    caller.logWriter.info(
                        "the last sequences encountered before the error were : %s, %s"
                        % (record1, record2))
                    raise StopIteration

                outfile1 = open(outfilename1, "w")
                chunknames1.append(outfilename1)

                if filename2 != None:
                    #outfilename2 =  os.path.join(outdir, "%s.%05d.%s"%(chunkname2, chunk, outformat))
                    #outfilename2 =  os.path.join(outdir, "%s.%05d"%(chunkname2, chunk))
                    name_parts = os.path.splitext(chunkname2)
                    outfilename2 = os.path.join(
                        outdir,
                        "%s.%05d%s" % (name_parts[0], chunk, name_parts[1]))
                    if os.path.exists(outfilename2):
                        #raise tardisException("getConditionedFilenames : error - %s already exists"% outfilename2)
                        caller.error(
                            "getConditionedFilenames : error - %s already exists"
                            % outfilename2)
                        caller.logWriter.info(
                            "the last sequences encountered before the error were : %s, %s"
                            % (record1, record2))
                        raise StopIteration

                    outfile2 = open(outfilename2, "w")
                    chunknames2.append(outfilename2)

            # if two files, check pair-bonding and if OK output both records
            if outfile1 != None and outfile2 != None and pairBond != None:
                if not pairBond(record1, record2):
                    #raise tardisException("pair bonding error - %s does not bond with %s"%(str(record1), str(record2)))
                    caller.error(
                        "pair bonding error - %s does not bond with %s" %
                        (str(record1), str(record2)))
                    caller.logWriter.info(
                        "the last sequences encountered before the error were : %s, %s"
                        % (record1, record2))
                    raise StopIteration

                if outformat in ("fasta", "fastq"):
                    outfile1.write(record1.format(outformat))
                    outfile2.write(record2.format(outformat))
                else:
                    outfile1.write(record1)
                    outfile2.write(record2)

            elif outfile1 != None:
                if outformat in ("fasta", "fastq"):
                    outfile1.write(record1.format(outformat))
                else:
                    outfile1.write(record1)

            # if need a new chunk, close and yield the old one (if there is one)
            if mychunk > chunk:
                chunkInfo = [None, None]
                if outfile1 != None:
                    outfile1.close()
                    outfile1 = None
                    chunkInfo[0] = outfilename1
                if outfile2 != None:
                    outfile2.close()
                    outfile2 = None
                    chunkInfo[1] = outfilename2

                if listfilename1 is not None and listfilename2 is not None:
                    yield ((listfilename1, listfilename2), chunkInfo)
                elif listfilename1 is not None:
                    yield ((listfilename1, filename2), chunkInfo)
                elif listfilename2 is not None:
                    yield ((filename1, listfilename2), chunkInfo)
                else:
                    yield ((filename1, filename2), chunkInfo)

                chunksYieldedCount += 1

                #yield ((filename1, filename2),chunkInfo)

                chunk = mychunk

                if chunk > MAX_DIMENSION:
                    #raise tardisException("error - too many chunks - please adjust chunksize to yield no more than %d chunks"%MAX_DIMENSION)
                    caller.error(
                        "error - too many chunks - please adjust chunksize to yield no more than %d chunks"
                        % MAX_DIMENSION)
                    caller.logWriter.info(
                        "the last sequences encountered before the error were : %s, %s"
                        % (record1, record2))
                    raise tutils.tardisException(
                        "error - too many chunks - please adjust chunksize to yield no more than %d chunks"
                        % MAX_DIMENSION)

    # handle exceptions that relate to problems with the data so we can report
    # where we are, then re-raise so we bail out.
    except ValueError, e:
        caller.error(e)
        caller.logWriter.info(
            "the last sequences encountered before the error were : %s, %s" %
            (record1, record2))
        #
        #raise e
        raise StopIteration
Esempio n. 7
0
    def advance_chunk(caller, chunk, total_chunks_in, batonfile1, batonfile2,
                      chunkbase1, chunkbase2, outdir, chunk_info):
        # if total_chunks has been set, and we are being asked
        # for a chunk number greater than this, raise StopIteration
        total_chunks = total_chunks_in
        if total_chunks is not None:
            if chunk > total_chunks:
                raise StopIteration

        # in a wait loop, poll
        wait_duration = 0
        exception_count = 0
        while True:
            # if we find either baton file , try reading the total number of chunks from it if we haven't already obtained this
            if total_chunks is None:
                if os.path.isfile(batonfile1):
                    try:
                        with open(batonfile1, "r") as bf:
                            for record in bf:
                                total_chunks = int(
                                    re.split("=", record.strip())[1])
                                caller.logWriter.info(
                                    "%d chunks in total were written (according to %s)"
                                    % (total_chunks, batonfile1))
                                break
                    except Exception, e:
                        # this could happen if we try to read the baton file at the same time as
                        # the chunk-writer writes it. No action needed - will get it
                        # next pass. But if we fail more than 50 times give up.
                        caller.logWriter.info(
                            "warning - exception (%s) reading batonfile %s" %
                            (str(e), batonfile1))
                        caller.logWriter.info(
                            "error - too many failed attempts to parse batonfile %s - giving up"
                            % batonfile1)
                        exception_count += 1
                        if exception_count >= 50:
                            raise tutils.tardisException(
                                "error - too many failed attempts to parse batonfile %s - giving up"
                                % batonfile1)

            if total_chunks is None and batonfile2 is not None:
                if os.path.isfile(batonfile2):
                    try:
                        with open(batonfile2, "r") as bf:
                            for record in bf:
                                total_chunks = int(
                                    re.split("=", record.strip())[1])
                                caller.logWriter.info(
                                    "%d chunks in total were written (according to %s)"
                                    % (total_chunks, batonfile2))
                                break
                    except Exception, e:
                        # this could happen if we try to read the baton file at the same time as
                        # the chunk-writer writes it. No action needed - will get it
                        # next pass. But if we fail more than 50 times give up.
                        caller.logWriter.info(
                            "warning - exception (%s) reading batonfile %s" %
                            (str(e), batonfile1))
                        caller.logWriter.info(
                            "error - too many failed attempts to parse batonfile %s - giving up"
                            % batonfile1)
                        exception_count += 1
                        if exception_count >= 50:
                            raise tutils.tardisException(
                                "error - too many failed attempts to parse batonfile %s - giving up"
                                % batonfile1)
Esempio n. 8
0
def _fast_get_conditioned_filenames(caller, filename1, argchunksize, outdir, informat, outformat, samplerate,filename2,\
                            listfilename1 = None, listfilename2 = None, length_bounds = (None, None) , from_record = None, to_record = None):
    """
    A generator.

    See also below, _slow_get_conditioned_filenames. This was the original version. Cloned and hacked to
    make this version. 

    This method forks and calls one or subprocesses to do the actual split - the parent
    polls for the split files.
    
    This means that e.g. record_filter_func is not supported as this method does not have access to sequence
    objects.
    
    Other record oriented filters (e.g. by length , from - to, samplerate) are planned to be supported but are not yet
    """

    #named indexes
    LOWER = 0
    UPPER = 1

    # if chunksize zero yield empty chunknames and stop
    if argchunksize == 0:
        yield ((filename1, filename2), (None, None))
        raise StopIteration

    caller.logWriter.info("_fast_get_conditioned_filenames : conditioning %s to %s chunksize %d informat %s outformat %s samplerate %s from %s to %s file2 %s"%(filename1, outdir, \
                                                                                            argchunksize , informat, outformat, samplerate, from_record, to_record , filename2))

    chunknames1 = []
    chunknames2 = []

    # (we don't adjust chunksize if we are sampling as kseq does it)
    chunksize = argchunksize

    # set various filenames that will be needed
    uncompressedName1 = textDataConditioner.getUncompressedBaseName(filename1)
    batonfile1 = os.path.join(
        outdir, "%s.chunk_stats$" % os.path.basename(uncompressedName1))
    chunkbase1 = os.path.basename(uncompressedName1)
    name_parts = os.path.splitext(chunkbase1)
    chunktemplate1 = os.path.join(outdir,
                                  name_parts[0] + ".%05d" + name_parts[1])

    uncompressedName2 = None
    batonfile2 = None
    chunkbase2 = None
    chunktemplate2 = None
    if filename2 is not None:
        uncompressedName2 = textDataConditioner.getUncompressedBaseName(
            filename2)
        batonfile2 = os.path.join(
            outdir, "%s.chunk_stats$" % os.path.basename(uncompressedName2))
        chunkbase2 = os.path.basename(uncompressedName2)
        name_parts = os.path.splitext(chunkbase2)
        chunktemplate2 = os.path.join(outdir,
                                      name_parts[0] + ".%05d" + name_parts[1])

    split_logfile = os.path.join(outdir, "split_processing.log")

    # fork a process to kick off split of file 1
    try:
        split_command = [
            "kseq_split", "-f", batonfile1, "-o", outformat, filename1,
            str(chunksize), chunktemplate1
        ]
        if samplerate is not None:
            split_command = [
                "kseq_split", "-f", batonfile1, "-o", outformat, "-s",
                str(samplerate), filename1,
                str(chunksize), chunktemplate1
            ]

        caller.logWriter.info(
            "fast input conditioner forking split process : %s" %
            " ".join(split_command))
        me = os.fork()
        if me == 0:  # child
            mypid = os.getpid()

            # kick off the splitting process and wait for it to finish
            proc = subprocess.Popen(split_command,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            (stdout, stderr) = proc.communicate()

            with open(split_logfile, "a") as l:
                print >> l, "child process %d started split subprocess %d" % (
                    mypid, proc.pid)
                print >> l, "split subprocess stdout : \n%s" % stdout
                print >> l, "split subprocess stderr : \n%s" % stderr
                print >> l, "split subprocess %d terminated with return value %d" % (
                    proc.pid, proc.returncode)
                print >> l, "child process %d exiting with status %s" % (
                    mypid, proc.returncode)
            sys.exit(proc.returncode)
    except OSError, e:
        caller.logWriter.info(
            "fast input conditioner : error - fork of %s failed with OSError : %s"
            % (" ".join(split_command), e))
        raise tutils.tardisException(
            "fast input conditioner : error - fork of %s failed with OSError : %s"
            % (" ".join(split_command), e))
Esempio n. 9
0
            "fast input conditioner : error - fork of %s failed with OSError : %s"
            % (" ".join(split_command), e))
        raise tutils.tardisException(
            "fast input conditioner : error - fork of %s failed with OSError : %s"
            % (" ".join(split_command), e))

    # parent
    if filename2 != None:
        try:
            split_command = [
                "kseq_split", "-f", batonfile2, "-o", outformat, filename2,
                str(chunksize), chunktemplate2
            ]
            if samplerate is not None:
                raise tutils.tardisException(
                    "this input conditioner does not support paired random sampling ! - should not be executing this code block !?"
                )

            caller.logWriter.info(
                "fast input conditioner forking split process : %s" %
                " ".join(split_command))
            me = os.fork()
            if me == 0:  # child
                mypid = os.getpid()

                # kick off the splitting process and wait for it to finish
                proc = subprocess.Popen(split_command,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE)
                (stdout, stderr) = proc.communicate()
Esempio n. 10
0
    def runCommand(self, argCommand=None):
        command = argCommand
        if argCommand is None:
            command = self.command

        if len(command) > 0:
            self.logWriter.info("localhpcJob : running %s" % str(command))

            # set up the shell scriptfile(s) (one per chunk) (unless this is a rerun in which case its already been done)
            if self.submitCount == 0:
                runtime_environmentcode = self.runtime_config_template.safe_substitute(
                )  # currently no templating actually done here
                shellcode = self.shell_script_template.safe_substitute(configure_runtime_environment=runtime_environmentcode,\
                                                                       hpcdir=self.workingRoot,command=string.join(self.command," "),\
                                                                       startdir=self.controller.options["startdir"],\
                                                                       input_conditioning=str(self.controller.options["input_conditioning"]))

                self.scriptfilename = os.path.join(self.workingRoot,
                                                   "run%d.sh" % self.jobNumber)
                if os.path.isfile(self.scriptfilename):
                    raise tutils.tardisException("error %s already exists" %
                                                 self.scriptfilename)
                f = open(self.scriptfilename, "w")
                self.logWriter.info(
                    "localhpcJob : local shell script wrapper is %s" %
                    self.scriptfilename)
                f.writelines(shellcode)
                f.close()
                os.chmod(
                    self.scriptfilename, stat.S_IRWXU | stat.S_IRGRP
                    | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)

                self.stdoutfilename = "%s.stdout" % self.scriptfilename
                self.stderrfilename = "%s.stderr" % self.scriptfilename
                self.stdoutnamepattern = os.path.basename(self.stdoutfilename)
                self.stderrnamepattern = os.path.basename(self.stderrfilename)

                self.logname = "%s.log" % self.scriptfilename
                self.submitCount += 1

            # launch the job if we can.
            # we can launch the job if jobs running < max_processes
            # first update process statuses
            self.waitOnChildren()

            running_processes = self.getRunningProcesses()
            #self.logWriter.info("running jobs : %s"%str(running_processes))
            #jobs_running = hpcConditioner.getJobSubmittedCount() - hpcConditioner.getResultsSentCount()
            if len(running_processes
                   ) < self.controller.options["max_processes"]:
                self.logWriter.info("localhpcJob : launching %s" %
                                    self.scriptfilename)
                self.jobHeld = False
            else:
                self.logWriter.info(
                    "localhpcJob : not launching %s (jobs_running = %s)" %
                    (self.scriptfilename, str(running_processes)))
                #self.logWriter.info("DEBUGx : job list is %s"%str(hpcConditioner.jobList))
                #self.logWriter.info("DEBUGx : job submit counts are  %s"%str([j.submitCount for j in hpcConditioner.jobList]))
                #self.logWriter.info("DEBUGx : jobs sent are  %d"%hpcConditioner.getResultsSentCount())
                self.jobHeld = True
                return

            local_submit = [self.scriptfilename]
            if self.controller.options["dry_run"]:
                self.logWriter.info(
                    "localhpcJob : this is a dry run - not launching the job")
            else:
                self.logWriter.info("localhpcJob : forking to execute %s" %
                                    str(local_submit))
                self.jobHeld = False

                # now fork - if we are the parent, return, if we are the child execute the job and the exit
                # before forking, however, do an asynchronous waitpid to clean up defunct processes
                # (currently we don't do anything with these results - we just want to
                # do a wait so that the child processes can be removed from the process table
                #self.logWriter.info("localhpcJob : checking waits")
                #try:
                #    pidresults = os.waitpid(0, os.WNOHANG)
                #    self.logWriter.info("wait returned : %s"%str(pidresults))  # returns [(pid, status), (pid,status),...]
                #    pidresultsDict = dict([(pidresult[0], pidresult) for pidresult in [pidresults]])
                #    self.workerList.update(pidresultsDict)
                #except OSError as inst:
                #    if inst.errno == 10:
                #        self.logWriter.info("(no child processes)")
                #    else:
                #        self.logWriter.info("(unknown OSError - re-raising)")
                #        raise inst

                try:
                    me = os.fork()
                    if me == 0:
                        mypid = os.getpid()
                        with open(self.logname, "w") as l:
                            print >> l, "job starting pid %d" % mypid

                        fstdout = open(self.stdoutfilename, "w")
                        fstderr = open(self.stderrfilename, "w")

                        self.proc = subprocess.Popen(local_submit,
                                                     stdout=fstdout,
                                                     stderr=fstderr)
                        self.proc.communicate()
                        fstdout.close()
                        fstderr.close()
                        self.submitreturncode = self.proc.returncode
                        self.logWriter.info(
                            "localhpcJob : %s (pid %d) has returned (status %s)"
                            % (str(local_submit), os.getpid(),
                               self.submitreturncode))
                        self.logWriter.info(
                            "localhpcJob : stdout was written to %s" %
                            self.stdoutfilename)
                        self.logWriter.info(
                            "localhpcJob : stderr was written to %s" %
                            self.stderrfilename)
                        self.logWriter.info("localhpcJob : child %d exiting" %
                                            os.getpid())
                        with open(self.logname, "a") as l:
                            print >> l, "pid %d job terminated return value %d" % (
                                os.getpid(), self.submitreturncode)
                        sys.exit(0)
                    else:
                        self.workerList[me] = (0, 0)
                        self.submitCount += 1
                        self.submitreturncode = 0
                        self.logWriter.info("localhpcJob : parent returning")
                        return
                except OSError, e:
                    self.logWriter.info(
                        "localhpcJob : warning - fork of %s failed with OSError : %s"
                        % (self.scriptfilename, e))
                    self.logWriter.info("localhpcJob : job %s held " %
                                        self.scriptfilename)
                    self.jobHeld = True
Esempio n. 11
0
    def get_templates(self,default_job_template_name, default_shell_template_name, default_runtime_config_template_name):
        """
        this method examines the run-time arguments supplied to tardis, and from these figures
        out a job template, shell template and runtime config template.
        The job template is used to create a job file for the scheduler (e.g. slurm) , for each job to be launched
        The shell template is used to create a wrapper shell (i.e. run1.sh, run2.sh etc) , for each task
        The runtime config template is used to generate source to be included in the wrapper shell  - i.e. in run1.sh, run2.sh etc.

        The run time arguments examined by this method specify whether the user wants to
        
        a) use one of the named, hard-coded (in tutils.py) templates
        b) supply the name of a file containing templating
        c) don't supply either a or b , in which case a default is used.

        (if they specify both a and b, this is an error)
        """
        (job_template, shell_script_template, runtime_config_template) = (None, None, None)
        
        if self.controller.options is not None:

            # figure out a job template from the options. (You can specify one of the inbuilt templates by name, or
            # supply a file containing a custom template)
            job_template_name = self.controller.options.get("job_template_name",None)
            job_template_filename = self.controller.options.get("jobtemplatefile",None)        
            
            # use default if there is one and its needed 
            if default_job_template_name is not None:
                if job_template_name is None and job_template_filename is None:
                    #use the default job template
                    job_template_name = default_job_template_name

            # check we have at least named template or template file but not both     
            if job_template_name is not None and job_template_filename is not None:
                raise tutils.tardisException("error both job_template_name (%s) and job_template_filename (%s) defined - only define one of these"%(job_template_name,job_template_filename) )
            elif job_template_name is None and job_template_filename is None:
                raise tutils.tardisException("error neither  job_template_name nor job_template_filename are defined (and no default available")

            if job_template_name is not None:
                job_template = tutils.getTemplateContent(self.controller.options, job_template_name, logWriter=self.logWriter)
            else:
                if not os.path.isfile(job_template_filename):
                    raise tutils.tardisException("error job template file %s not found"%job_template_filename )    
                job_template = string.join(file(job_template_filename,"r"),"")
                
            if job_template is None:
                raise tutils.tardisException("hpcJob: Error job template is null after templating")
            job_template = string.Template(job_template)



            # figure out a shell template from the options. (You can specify one of the inbuilt templates by name, or
            # supply a file containing a custom template)
            shell_template_name = self.controller.options.get("shell_template_name",None)
            shell_template_filename = self.controller.options.get("shelltemplatefile",None)        
            if shell_template_name is None and shell_template_filename is None:
                #use the default local shell template
                shell_template_name = default_shell_template_name
            if shell_template_name is not None and shell_template_filename is not None:
                raise tutils.tardisException("error both shell_template_name (%s) and shell_template_filename (%s) defined - only define one of these"%(shell_template_name,shell_template_filename) )

            if shell_template_name is not None:
                shell_script_template = tutils.getTemplateContent(self.controller.options, shell_template_name, logWriter=self.logWriter)
            else:
                shell_script_template = string.join(file(shell_template_filename,"r"),"")
                
            if shell_script_template is None:
                raise tutils.tardisException("hpcJob : Error shell template is null after templating")
            shell_script_template = string.Template(shell_script_template)


            # figure out run-time configuration code (You can specify one of the inbuilt configs by name, or
            # supply a file containing a custom config)
            runtime_config_template_name = self.controller.options.get("runtime_config_name",None)
            runtime_config_template_filename = self.controller.options.get("runtimeconfigsourcefile",None)

            # use default if available and needed. Note this logic means that if you supply a run-time config, then the
            # default will not be used - so if for example the default loads a base env, thne if you supply your own ,
            # you will need to explicitly load the base before doing your own
            # this is based on the assumption that its easier to do than to undo 
            if default_runtime_config_template_name is not None:
                if runtime_config_template_name is None and runtime_config_template_filename is None:
                    #use the default - for example this might load a deafult conda env or load a default module (site dependent)
                    runtime_config_template_name = default_runtime_config_template_name

            # don't want both named,  and a file 
            if runtime_config_template_name is not None and runtime_config_template_filename is not None:
                raise tutils.tardisException("error both runtime_config_template_name (%s) and runtime_config_template_filename (%s) defined - only define one of these"%(runtime_config_template_name,runtime_config_template_filename) )

            if runtime_config_template_name is not None:
                runtime_config_template = tutils.getTemplateContent(self.controller.options, runtime_config_template_name, logWriter=self.logWriter)
            else:
                runtime_config_template = string.join(file(runtime_config_template_filename,"r"),"")
                
            if runtime_config_template is None:
                raise tutils.tardisException("hpcJob : Error config template is null after templating")
            
            runtime_config_template = string.Template(runtime_config_template)


        return (job_template, shell_script_template, runtime_config_template)