def update(self, options, saveconf, tag=None): required = filter(lambda a: a.required, self.parameters) tag = tag or self.tag for param0 in required: if param0.name not in options.keys() and param0.mapping not in options.keys(): raise StimelaCabParameterError( "Parameter {} is required but has not been specified".format(param0.name)) self.log.info(f"Validating parameters for cab {self.task} ({self.base}:{tag})") for name, value in options.items(): found = False for param in self.parameters: if name in [param.name, param.mapping]: found = True if param.deprecated: self.log.warning(f"Parameter {name} for cab {self.task} is deprecated, and will be removed in a future release") if param.io: if value is None: continue param.validate(value) param.value = [] if not isinstance(value, (list, tuple)): value = [value] for _value in value: if isinstance(_value, pathformatter): if param.check_io: raise StimelaCabParameterError("Pathformatters cannot be used on io parameters where io has to be checked") joinlist = _value() # construct placeholder list joined_str = "" for p in joinlist: if not isinstance(p, placeholder): joined_str += p else: if p() not in IODEST.keys(): raise StimelaCabParameterError('The location \'{0}\' specified for parameter \'{1}\', is unknown. Choices are {2}'.format( p(), param.name, IODEST.keys())) location = p() if location in ["input", "msfile"]: if location == "input" and self.indir is None: raise StimelaCabParameterError( "You have specified input files, but have not specified an input folder") if location == "msfile" and self.msdir is None: raise StimelaCabParameterError( "You have specified MS files, but have not specified an MS folder") joined_str += "{0}/".format(IODEST[location]) else: if self.outdir is None: raise StimelaCabParameterError( "You have specified output files, but have not specified an output folder") joined_str += "{0}/".format(IODEST[location]) param.value.append(joined_str) elif isinstance(_value, str): val = _value.split(":") if len(val) == 2: if val[1] not in IODEST.keys(): raise StimelaCabParameterError('The location \'{0}\' specified for parameter \'{1}\', is unknown. Choices are {2}'.format( val[1], param.name, IODEST.keys())) self.log.info("Location of '{0}' was specified as '{1}'. Will overide default.".format( param.name, val[1])) _value = val[0] location = val[1] else: location = param.io if location in ["input", "msfile"]: if location == "input" and self.indir is None: raise StimelaCabParameterError( "You have specified input files, but have not specified an input folder") if location == "msfile" and self.msdir is None: raise StimelaCabParameterError( "You have specified MS files, but have not specified an MS folder") path = "{0}/{1}".format(self.indir if location == "input" else self.msdir, _value) if param.check_io and not os.path.exists(path): raise StimelaCabParameterError("File '{0}' for parameter '{1}' could not be located at '{2}'.".format( _value, param.name, path)) param.value.append( "{0}/{1}".format(IODEST[location], _value)) else: if self.outdir is None: raise StimelaCabParameterError( "You have specified output files, but have not specified an output folder") param.value.append( "{0}/{1}".format(IODEST[location], _value)) else: raise StimelaCabParameterError("io parameter must either be a pathformatter object or a string") if len(param.value) == 1: param.value = param.value[0] else: # not io type if isinstance(value, pathformatter): raise StimelaCabParameterError("Path formatter type specified, but {} is not io".format(param.name)) self.log.debug( "Validating parameter {}".format(param.name)) param.validate(value) param.value = value if not found: raise StimelaCabParameterError( "Parameter {0} is unknown. Run 'stimela cabs -i {1}' to get help on this cab".format(name, self.task)) conf = {} conf.update(self.toDict()) utils.writeJson(saveconf, conf) self.log.info(f"Parameters validated and saved to {saveconf}")
def run(self, steps=None, resume=False, redo=None): """ Run a Stimela recipe. steps : recipe steps to run resume : resume recipe from last run redo : Re-run an old recipe from a .last file """ recipe = { "name": self.name, "steps": [] } start_at = 0 if redo: self.log.error("This feature has been depricated") raise SystemExit elif resume: #TODO(sphe) Need to re-think how best to do this self.log.error("This feature has been depricated") raise SystemExit if getattr(steps, '__iter__', False): _steps = [] if isinstance(steps[0], str): labels = [job.label.split('::')[0] for job in self.jobs] for step in steps: try: _steps.append(labels.index(step)+1) except ValueError: raise StimelaCabParameterError( 'Recipe label ID [{0}] doesn\'t exist'.format(step)) steps = _steps else: steps = range(1, len(self.jobs)+1) jobs = [(step, self.jobs[step-1]) for step in steps] # TIMESTR = "%Y-%m-%d %H:%M:%S" # TIMESTR = "%H:%M:%S" for i, (step, job) in enumerate(jobs): start_time = datetime.now() job.log.info('job started at {}'.format(start_time), # the extra attributes are filtered by e.g. the CARACal logger extra=dict(stimela_job_state=(job.name, "running"))) self.log.info('STEP {0} :: {1}'.format(i+1, job.label)) self.active = job try: with open(job.logfile, 'a') as astd: astd.write('\n-----------------------------------\n') astd.write( 'Stimela version : {}\n'.format(version)) astd.write( 'Cab name : {}\n'.format(job.image)) astd.write('-------------------------------------\n') job.run_job() # raise exception if wranglers declared the job a failure if job.declare_status is False: raise StimelaRecipeExecutionError("job declared as failed") self.log2recipe(job, recipe, step, 'completed') self.completed.append(job) finished_time = datetime.now() job.log.info('job complete at {} after {}'.format(finished_time, finished_time-start_time), # the extra attributes are filtered by e.g. the CARACal logger extra=dict(stimela_job_state=(job.name, "complete"))) except (utils.StimelaCabRuntimeError, StimelaRecipeExecutionError, StimelaCabParameterError) as exc: # ignore exceptions if wranglers declared the job a success if job.declare_status is True: finished_time = datetime.now() job.log.info('job complete (declared successful) at {} after {}'.format(finished_time, finished_time - start_time), # the extra attributes are filtered by e.g. the CARACal logger extra=dict(stimela_job_state=(job.name, "complete"))) continue self.remaining = [jb[1] for jb in jobs[i+1:]] self.failed = job finished_time = datetime.now() job.log.error(str(exc), extra=dict(stimela_job_state=(job.name, "failed"), boldface=True)) job.log.error('job failed at {} after {}'.format(finished_time, finished_time-start_time), extra=dict(stimela_job_state=(job.name, "failed"), color=None)) for line in traceback.format_exc().splitlines(): job.log.error(line, extra=dict(traceback_report=True)) self.log.info('Completed jobs : {}'.format( [c.name for c in self.completed])) self.log.info('Remaining jobs : {}'.format( [c.name for c in self.remaining])) self.log2recipe(job, recipe, step, 'failed') for step, jb in jobs[i+1:]: self.log.info( 'Logging remaining task: {}'.format(jb.label)) self.log2recipe(jb, recipe, step, 'remaining') self.log.info( 'Saving pipeline information in {}'.format(self.resume_file)) utils.writeJson(self.resume_file, recipe) # raise pipeline exception. Original exception context is discarded by "from None" (since we've already # logged it above, we don't need to include it with the new exception) raise PipelineException(exc, self.completed, job, self.remaining) from None self.log.info( 'Saving pipeline information in {}'.format(self.resume_file)) utils.writeJson(self.resume_file, recipe) self.log.info('Recipe executed successfully') return 0
def run(self, steps=None, resume=False, redo=None): """ Run a Stimela recipe. steps : recipe steps to run resume : resume recipe from last run redo : Re-run an old recipe from a .last file """ recipe = { "name": self.name, "steps": [] } start_at = 0 if redo: recipe = utils.readJson(redo) self.log.info('Rerunning recipe {0} from {1}'.format( recipe['name'], redo)) self.log.info('Recreating recipe instance..') self.jobs = [] for step in recipe['steps']: # add I/O folders to the json file # add a string describing the contents of these folders # The user has to ensure that these folders exist, and have the required content if step['jtype'] == 'docker': self.log.info('Adding job \'{0}\' to recipe. The container will be named \'{1}\''.format( step['cab'], step['name'])) cont = docker.Container(step['cab'], step['name'], label=step['label'], logger=self.log, shared_memory=step['shared_memory'], workdir=WORKDIR) self.log.debug('Adding volumes {0} and environmental variables {1}'.format( step['volumes'], step['environs'])) cont.volumes = step['volumes'] cont.environs = step['environs'] cont.shared_memory = step['shared_memory'] cont.input_content = step['input_content'] cont.msdir_content = step['msdir_content'] cont.logfile = step['logfile'] job = StimelaJob( step['name'], recipe=self, label=step['label'], cabpath=self.cabpath) job.job = cont job.jtype = 'docker' elif step['jtype'] == 'function': name = step['name'] func = inspect.currentframe( ).f_back.f_locals[step['function']] job = StimelaJob(name, recipe=self, label=step['label']) job.python_job(func, step['parameters']) job.jtype = 'function' self.jobs.append(job) elif resume: self.log.info("Resuming recipe from last run.") try: recipe = utils.readJson(self.resume_file) except IOError: raise StimelaRecipeExecutionError( "Cannot resume pipeline, resume file '{}' not found".format(self.resume_file)) steps_ = recipe.pop('steps') recipe['steps'] = [] _steps = [] for step in steps_: if step['status'] == 'completed': recipe['steps'].append(step) continue label = step['label'] number = step['number'] # Check if the recipe flow has changed if label == self.jobs[number-1].label: self.log.info( 'recipe step \'{0}\' is fit for re-execution. Label = {1}'.format(number, label)) _steps.append(number) else: raise StimelaRecipeExecutionError( 'Recipe flow, or task scheduling has changed. Cannot resume recipe. Label = {0}'.format(label)) # Check whether there are steps to resume if len(_steps) == 0: self.log.info( 'All the steps were completed. No steps to resume') sys.exit(0) steps = _steps if getattr(steps, '__iter__', False): _steps = [] if isinstance(steps[0], str): labels = [job.label.split('::')[0] for job in self.jobs] for step in steps: try: _steps.append(labels.index(step)+1) except ValueError: raise StimelaCabParameterError( 'Recipe label ID [{0}] doesn\'t exist'.format(step)) steps = _steps else: steps = range(1, len(self.jobs)+1) jobs = [(step, self.jobs[step-1]) for step in steps] # TIMESTR = "%Y-%m-%d %H:%M:%S" # TIMESTR = "%H:%M:%S" for i, (step, job) in enumerate(jobs): start_time = datetime.now() job.log.info('job started at {}'.format(start_time), # the extra attributes are filtered by e.g. the CARACal logger extra=dict(stimela_job_state=(job.name, "running"))) self.log.info('STEP {0} :: {1}'.format(i+1, job.label)) self.active = job try: with open(job.logfile, 'a') as astd: astd.write('\n-----------------------------------\n') astd.write( 'Stimela version : {}\n'.format(version)) astd.write( 'Cab name : {}\n'.format(job.image)) astd.write('-------------------------------------\n') job.run_job() self.log2recipe(job, recipe, step, 'completed') self.completed.append(job) finished_time = datetime.now() job.log.info('job complete at {} after {}'.format(finished_time, finished_time-start_time), # the extra attributes are filtered by e.g. the CARACal logger extra=dict(stimela_job_state=(job.name, "complete"))) except (utils.StimelaCabRuntimeError, StimelaRecipeExecutionError, StimelaCabParameterError) as e: self.remaining = [jb[1] for jb in jobs[i+1:]] self.failed = job finished_time = datetime.now() job.log.error(str(e), extra=dict(stimela_job_state=(job.name, "failed"), boldface=True)) job.log.error('job failed at {} after {}'.format(finished_time, finished_time-start_time), extra=dict(stimela_job_state=(job.name, "failed"), color=None)) for line in traceback.format_exc().splitlines(): job.log.error(line, extra=dict(traceback_report=True)) self.log.info('Completed jobs : {}'.format( [c.name for c in self.completed])) self.log.info('Remaining jobs : {}'.format( [c.name for c in self.remaining])) self.log2recipe(job, recipe, step, 'failed') for step, jb in jobs[i+1:]: self.log.info( 'Logging remaining task: {}'.format(jb.label)) self.log2recipe(jb, recipe, step, 'remaining') self.log.info( 'Saving pipeline information in {}'.format(self.resume_file)) utils.writeJson(self.resume_file, recipe) # raise pipeline exception. Original exception context is discarded by "from None" (since we've already # logged it above, we don't need to include it with the new exception) raise PipelineException(e, self.completed, job, self.remaining) from None self.log.info( 'Saving pipeline information in {}'.format(self.resume_file)) utils.writeJson(self.resume_file, recipe) self.log.info('Recipe executed successfully') return 0
def run(self, steps=None, resume=False, redo=None): """ Run a Stimela recipe. steps : recipe steps to run resume : resume recipe from last run redo : Re-run an old recipe from a .last file """ recipe = { "name": self.name, "steps": [] } start_at = 0 if redo: recipe = utils.readJson(redo) self.log.info('Rerunning recipe {0} from {1}'.format( recipe['name'], redo)) self.log.info('Recreating recipe instance..') self.jobs = [] for step in recipe['steps']: # add I/O folders to the json file # add a string describing the contents of these folders # The user has to ensure that these folders exist, and have the required content if step['jtype'] == 'docker': self.log.info('Adding job \'{0}\' to recipe. The container will be named \'{1}\''.format( step['cab'], step['name'])) cont = docker.Container(step['cab'], step['name'], label=step['label'], logger=self.log, shared_memory=step['shared_memory']) self.log.debug('Adding volumes {0} and environmental variables {1}'.format( step['volumes'], step['environs'])) cont.volumes = step['volumes'] cont.environs = step['environs'] cont.shared_memory = step['shared_memory'] cont.input_content = step['input_content'] cont.msdir_content = step['msdir_content'] cont.logfile = step['logfile'] job = StimelaJob( step['name'], recipe=self, label=step['label']) job.job = cont job.jtype = 'docker' elif step['jtype'] == 'function': name = step['name'] func = inspect.currentframe( ).f_back.f_locals[step['function']] job = StimelaJob(name, recipe=self, label=step['label']) job.python_job(func, step['parameters']) job.jtype = 'function' self.jobs.append(job) elif resume: self.log.info("Resuming recipe from last run.") try: recipe = utils.readJson(self.resume_file) except IOError: raise StimelaRecipeExecutionError( "Cannot resume pipeline, resume file '{}' not found".format(self.resume_file)) steps_ = recipe.pop('steps') recipe['steps'] = [] _steps = [] for step in steps_: if step['status'] == 'completed': recipe['steps'].append(step) continue label = step['label'] number = step['number'] # Check if the recipe flow has changed if label == self.jobs[number-1].label: self.log.info( 'recipe step \'{0}\' is fit for re-execution. Label = {1}'.format(number, label)) _steps.append(number) else: raise StimelaRecipeExecutionError( 'Recipe flow, or task scheduling has changed. Cannot resume recipe. Label = {0}'.format(label)) # Check whether there are steps to resume if len(_steps) == 0: self.log.info( 'All the steps were completed. No steps to resume') sys.exit(0) steps = _steps if getattr(steps, '__iter__', False): _steps = [] if isinstance(steps[0], str): labels = [job.label.split('::')[0] for job in self.jobs] for step in steps: try: _steps.append(labels.index(step)+1) except ValueError: raise StimelaCabParameterError( 'Recipe label ID [{0}] doesn\'t exist'.format(step)) steps = _steps else: steps = range(1, len(self.jobs)+1) jobs = [(step, self.jobs[step-1]) for step in steps] for i, (step, job) in enumerate(jobs): self.log.info('Running job {}'.format(job.name)) self.log.info('STEP {0} :: {1}'.format(i+1, job.label)) self.active = job try: if job.jtype == 'function': job.run_python_job() elif job.jtype in ['docker', 'singularity', 'udocker', 'podman']: with open(job.job.logfile, 'a') as astd: astd.write('\n-----------------------------------\n') astd.write( 'Stimela version : {}\n'.format(version)) astd.write( 'Cab name : {}\n'.format(job.job.image)) astd.write('-------------------------------------\n') run_job = getattr(job, "run_{0:s}_job".format(job.jtype)) run_job() self.log2recipe(job, recipe, step, 'completed') except (utils.StimelaCabRuntimeError, StimelaRecipeExecutionError, StimelaCabParameterError) as e: self.completed = [jb[1] for jb in jobs[:i]] self.remaining = [jb[1] for jb in jobs[i+1:]] self.failed = job self.log.info( 'Recipe execution failed while running job {}'.format(job.name)) self.log.info('Completed jobs : {}'.format( [c.name for c in self.completed])) self.log.info('Remaining jobs : {}'.format( [c.name for c in self.remaining])) self.log2recipe(job, recipe, step, 'failed') for step, jb in jobs[i+1:]: self.log.info( 'Logging remaining task: {}'.format(jb.label)) self.log2recipe(jb, recipe, step, 'remaining') self.log.info( 'Saving pipeline information in {}'.format(self.resume_file)) utils.writeJson(self.resume_file, recipe) pe = PipelineException(e, self.completed, job, self.remaining) raise_(pe, None, sys.exc_info()[2]) except: import traceback traceback.print_exc() raise RuntimeError( "An unhandled exception has occured. This is a bug, please report") finally: if job.jtype == 'singularity' and job.created: job.job.stop() self.log.info( 'Saving pipeline information in {}'.format(self.resume_file)) utils.writeJson(self.resume_file, recipe) self.log.info('Recipe executed successfully') return 0
def update(self, options, saveconf): required = filter(lambda a: a.required, self.parameters) for param0 in required: if param0.name in options.keys() == False and param0.mapping in options.keys() == False: raise RuntimeError("Parameter {} is required but has not been specified".format(param0.name)) self.log.info("Validating parameters... CAB = {0}".format(self.task)) for name,value in options.items(): found = False for param in self.parameters: if name in [param.name, param.mapping]: found = True if param.io: if value is None: continue param.validate(value) param.value = [] if hasattr(value, "__iter__") and not isinstance(value, str): print(value) # pass else: value = [value] print(value) for _value in value: val = _value.split(":") if len(val)==2: if val[1] not in IODEST.keys(): raise IOError('The location \'{0}\' specified for parameter \'{1}\', is unknown. Choices are {2}'.format(val[1], param.name, IODEST.keys())) self.log.info("Location of '{0}' was specified as '{1}'. Will overide default.".format(param.name, val[1])) _value = val[0] location = val[1] else: location = param.io if location in ["input", "msfile"]: if location == "input" and self.indir is None: raise IOError("You have specified input files, but have not specified an input folder") if location == "msfile" and self.msdir is None: raise IOError("You have specified MS files, but have not specified an MS folder") path = "{0}/{1}".format(self.indir if location=="input" else self.msdir, _value) if param.check_io and not os.path.exists(path): raise IOError("File '{0}' for parameter '{1}' could not be located at '{2}'.".format(_value, param.name, path)) param.value.append ("{0}/{1}".format(IODEST[location], _value)) else: if self.outdir is None: raise IOError("You have specified output files, but have not specified an output folder") param.value.append("{0}/{1}".format(IODEST[location], _value)) if len(param.value)==1: param.value = param.value[0] else: self.log.debug("Validating paramter {}".format(param.name)) param.validate(value) param.value = value if not found: raise RuntimeError("Parameter {0} is unknown. Run 'stimela cabs -i {1}' to get help on this cab".format(name, self.task)) conf = {} conf.update(self.toDict()) utils.writeJson(saveconf, conf) self.log.info("Parameters validated and saved. Parameter file is: {}".format(saveconf))