def __init__(self, indir=None, # input directory outdir=None, # output directory msdir=None, # MS directory parameter_file=None, task=None, base=None, binary=None, description=None, tag=None, prefix=None, parameters=[], version=None): self.indir = indir self.outdir = outdir if parameter_file: cab = utils.readJson(parameter_file) self.task = cab["task"] self.base = cab["base"] self.binary = cab["binary"] self.tag = cab["tag"] self.version = cab.get("version", "x.x.x") if cab["msdir"]: self.msdir = msdir else: self.msdir = None self.description = cab["description"] self.prefix = cab["prefix"] parameters0 = cab["parameters"] self.parameters = [] for param in parameters0: default = param.get("default", param.get("value", None)) addme = Parameter(name=param["name"], dtype=param["dtype"], io=param.get("io", None), info=param.get( "info", None) or "No documentation. Bad! Very bad...", default=default, mapping=param.get("mapping", None), required=param.get("required", False), choices=param.get("choices", False), check_io=param.get("check_io", True)) self.parameters.append(addme) else: self.task = task self.base = base self.binary = binary self.prefix = prefix self.parameters = parameters self.description = description self.msdir = msdir self.tag = tag self.version = version self.log = stimela.logger()
def __init__(self, indir=None, # input directory outdir=None, # output directory msdir=None, # MS directory parameter_file=None, task=None, base=None, binary=None, description=None, tag=None, prefix=None, loglevel='INFO', parameters=[]): logging.basicConfig(level=getattr(logging, loglevel)) self.log = logging self.indir = indir self.outdir = outdir if parameter_file: cab = utils.readJson(parameter_file) self.task = cab["task"] self.base = cab["base"] self.binary = cab["binary"] self.tag = cab["tag"] if cab["msdir"]: self.msdir = msdir self.description = cab["description"] self.prefix = cab["prefix"] parameters0 = cab["parameters"] self.parameters = [] import sys for param in parameters0: default = param.get("default", param.get("value", None)) addme = Parameter(name=param["name"], dtype=param["dtype"], io=param.get("io", None), info=param.get( "info", None) or "No documentation. Bad! Very bad...", default=default, mapping=param.get("mapping", None), #delimiter=param.get("delimiter", None), required=param.get("required", False), choices=param.get("choices", False), check_io=param.get("check_io", True)) self.parameters.append(addme) else: self.task = task self.base = base self.binary = binary self.prefix = prefix self.parameters = parameters self.description = description self.msdir = msdir self.tag = tag
def pull(argv): for i, arg in enumerate(argv): if (arg[0] == '-') and arg[1].isdigit(): argv[i] = ' ' + arg parser = ArgumentParser(description='Pull docker stimela base images') add = parser.add_argument add("-im", "--image", nargs="+", metavar="IMAGE[:TAG]", help="Pull base image along with its tag (or version). Can be called multiple times") add("-f", "--force", action="store_true", help="force pull if image already exists") add("-s", "--singularity", action="store_true", help="Pull base images using singularity." "Images will be pulled into the directory specified by the enviroment varaible, SINGULARITY_PULLFOLDER. $PWD by default") add("-d", "--docker", action="store_true", help="Pull base images using docker.") add("-p", "--podman", action="store_true", help="Pull base images using podman.") add("-cb", "--cab-base", nargs="+", help="Pull base image for specified cab") add("-pf", "--pull-folder", help="Images will be placed in this folder. Else, if the environmnental variable 'SINGULARITY_PULLFOLDER' is set, then images will be placed there. " "Else, images will be placed in the current directory") args = parser.parse_args(argv) if args.pull_folder: pull_folder = args.pull_folder else: try: pull_folder = os.environ["SINGULARITY_PULLFOLDER"] except KeyError: pull_folder = "." if args.docker: jtype = "docker" elif args.podman: jtype = "podman" elif args.singularity: jtype = "singularity" else: jtype = "udocker" log = logger.StimelaLogger(LOG_FILE, jtype=jtype) images = log.read()['images'] images_ = [] for cab in args.cab_base or []: if cab in CAB: filename = "/".join([stimela.CAB_PATH, cab, "parameters.json"]) param = utils.readJson(filename) images_.append(":".join([param["base"], param["tag"]])) args.image = images_ or args.image if args.image: for image in args.image: simage = image.replace("/", "_") simage = simage.replace(":", "_") + ".img" if args.singularity: singularity.pull( image, simage, directory=pull_folder, force=args.force) elif args.docker: docker.pull(image) log.log_image(image, 'pulled') elif args.podman: podman.pull(image) log.log_image(image, 'pulled') else: udocker.pull(image) log.log_image(image, 'pulled') else: base = [] for cab in CAB: image = "{:s}/{:s}".format(stimela.CAB_PATH, cab) base.append(utils.get_Dockerfile_base_image(image).split()[-1]) base = set(base) for image in base: if args.singularity: simage = image.replace("/", "_") simage = simage.replace(":", "_") + ".img" singularity.pull( image, simage, directory=pull_folder, force=args.force) elif args.docker: docker.pull(image, force=args.force) log.log_image(image, 'pulled') elif args.podman: podman.pull(image, force=args.force) log.log_image(image, 'pulled') else: udocker.pull(image, force=args.force) log.log_image(image, 'pulled') log.write()
def pull(argv): for i, arg in enumerate(argv): if (arg[0] == '-') and arg[1].isdigit(): argv[i] = ' ' + arg parser = ArgumentParser(description='Pull docker stimela base images') add = parser.add_argument add("-im", "--image", nargs="+", metavar="IMAGE[:TAG]", help="Pull base image along with its tag (or version). Can be called multiple times") add("-f", "--force", action="store_true", help="force pull if image already exists") add("-s", "--singularity", action="store_true", help="Pull base images using singularity." "Images will be pulled into the directory specified by the enviroment varaible, STIMELA_PULLFOLDER. $PWD by default") add("-d", "--docker", action="store_true", help="Pull base images using docker.") add("-p", "--podman", action="store_true", help="Pull base images using podman.") add("-cb", "--cab-base", nargs="+", help="Pull base image for specified cab") add("-pf", "--pull-folder", help="Images will be placed in this folder. Else, if the environmnental variable 'STIMELA_PULLFOLDER' is set, then images will be placed there. " "Else, images will be placed in the current directory") args = parser.parse_args(argv) if args.pull_folder: pull_folder = args.pull_folder else: try: pull_folder = os.environ["STIMELA_PULLFOLDER"] except KeyError: pull_folder = "." if args.podman: jtype = "podman" elif args.singularity: jtype = "singularity" elif args.docker: jtype = "docker" else: jtype = "docker" images_ = [] for cab in args.cab_base or []: if cab in CAB: filename = "/".join([stimela.CAB_PATH, cab, "parameters.json"]) param = utils.readJson(filename) tags = param["tag"] if not isinstance(tags, list): tags = [tags] for tag in tags: images_.append(":".join([param["base"], tag])) args.image = images_ or args.image if args.image: for image in args.image: simage = image.replace("/", "_") simage = simage.replace(":", "_") + singularity.suffix if args.singularity: singularity.pull( image, simage, directory=pull_folder, force=args.force) elif args.docker: docker.pull(image) elif args.podman: podman.pull(image) else: docker.pull(image) else: base = [] for cab_ in CAB: cabdir = "{:s}/{:s}".format(stimela.CAB_PATH, cab_) _cab = info(cabdir, display=False) tags = _cab.tag if not isinstance(tags, list): tags = [tags] for tag in tags: base.append(f"{_cab.base}:{tag}") base = set(base) for image in base: if args.singularity: simage = image.replace("/", "_") simage = simage.replace(":", "_") + singularity.suffix singularity.pull( image, simage, directory=pull_folder, force=args.force) elif args.docker: docker.pull(image, force=args.force) elif args.podman: podman.pull(image, force=args.force) else: docker.pull(image, force=args.force)
def run(self, steps=None, resume=False, redo=None): """ Run a Stimela recipe. steps : recipe steps to run resume : resume recipe from last run redo : Re-run an old recipe from a .last file """ recipe = { "name": self.name, "steps": [] } start_at = 0 if redo: recipe = utils.readJson(redo) self.log.info('Rerunning recipe {0} from {1}'.format( recipe['name'], redo)) self.log.info('Recreating recipe instance..') self.jobs = [] for step in recipe['steps']: # add I/O folders to the json file # add a string describing the contents of these folders # The user has to ensure that these folders exist, and have the required content if step['jtype'] == 'docker': self.log.info('Adding job \'{0}\' to recipe. The container will be named \'{1}\''.format( step['cab'], step['name'])) cont = docker.Container(step['cab'], step['name'], label=step['label'], logger=self.log, shared_memory=step['shared_memory']) self.log.debug('Adding volumes {0} and environmental variables {1}'.format( step['volumes'], step['environs'])) cont.volumes = step['volumes'] cont.environs = step['environs'] cont.shared_memory = step['shared_memory'] cont.input_content = step['input_content'] cont.msdir_content = step['msdir_content'] cont.logfile = step['logfile'] job = StimelaJob( step['name'], recipe=self, label=step['label']) job.job = cont job.jtype = 'docker' elif step['jtype'] == 'function': name = step['name'] func = inspect.currentframe( ).f_back.f_locals[step['function']] job = StimelaJob(name, recipe=self, label=step['label']) job.python_job(func, step['parameters']) job.jtype = 'function' self.jobs.append(job) elif resume: self.log.info("Resuming recipe from last run.") try: recipe = utils.readJson(self.resume_file) except IOError: raise StimelaRecipeExecutionError( "Cannot resume pipeline, resume file '{}' not found".format(self.resume_file)) steps_ = recipe.pop('steps') recipe['steps'] = [] _steps = [] for step in steps_: if step['status'] == 'completed': recipe['steps'].append(step) continue label = step['label'] number = step['number'] # Check if the recipe flow has changed if label == self.jobs[number-1].label: self.log.info( 'recipe step \'{0}\' is fit for re-execution. Label = {1}'.format(number, label)) _steps.append(number) else: raise StimelaRecipeExecutionError( 'Recipe flow, or task scheduling has changed. Cannot resume recipe. Label = {0}'.format(label)) # Check whether there are steps to resume if len(_steps) == 0: self.log.info( 'All the steps were completed. No steps to resume') sys.exit(0) steps = _steps if getattr(steps, '__iter__', False): _steps = [] if isinstance(steps[0], str): labels = [job.label.split('::')[0] for job in self.jobs] for step in steps: try: _steps.append(labels.index(step)+1) except ValueError: raise StimelaCabParameterError( 'Recipe label ID [{0}] doesn\'t exist'.format(step)) steps = _steps else: steps = range(1, len(self.jobs)+1) jobs = [(step, self.jobs[step-1]) for step in steps] for i, (step, job) in enumerate(jobs): self.log.info('Running job {}'.format(job.name)) self.log.info('STEP {0} :: {1}'.format(i+1, job.label)) self.active = job try: if job.jtype == 'function': job.run_python_job() elif job.jtype in ['docker', 'singularity', 'udocker', 'podman']: with open(job.job.logfile, 'a') as astd: astd.write('\n-----------------------------------\n') astd.write( 'Stimela version : {}\n'.format(version)) astd.write( 'Cab name : {}\n'.format(job.job.image)) astd.write('-------------------------------------\n') run_job = getattr(job, "run_{0:s}_job".format(job.jtype)) run_job() self.log2recipe(job, recipe, step, 'completed') except (utils.StimelaCabRuntimeError, StimelaRecipeExecutionError, StimelaCabParameterError) as e: self.completed = [jb[1] for jb in jobs[:i]] self.remaining = [jb[1] for jb in jobs[i+1:]] self.failed = job self.log.info( 'Recipe execution failed while running job {}'.format(job.name)) self.log.info('Completed jobs : {}'.format( [c.name for c in self.completed])) self.log.info('Remaining jobs : {}'.format( [c.name for c in self.remaining])) self.log2recipe(job, recipe, step, 'failed') for step, jb in jobs[i+1:]: self.log.info( 'Logging remaining task: {}'.format(jb.label)) self.log2recipe(jb, recipe, step, 'remaining') self.log.info( 'Saving pipeline information in {}'.format(self.resume_file)) utils.writeJson(self.resume_file, recipe) pe = PipelineException(e, self.completed, job, self.remaining) raise_(pe, None, sys.exc_info()[2]) except: import traceback traceback.print_exc() raise RuntimeError( "An unhandled exception has occured. This is a bug, please report") finally: if job.jtype == 'singularity' and job.created: job.job.stop() self.log.info( 'Saving pipeline information in {}'.format(self.resume_file)) utils.writeJson(self.resume_file, recipe) self.log.info('Recipe executed successfully') return 0
def run(self, steps=None, resume=False, redo=None): """ Run a Stimela recipe. steps : recipe steps to run resume : resume recipe from last run redo : Re-run an old recipe from a .last file """ recipe = { "name": self.name, "steps": [] } start_at = 0 if redo: recipe = utils.readJson(redo) self.log.info('Rerunning recipe {0} from {1}'.format( recipe['name'], redo)) self.log.info('Recreating recipe instance..') self.jobs = [] for step in recipe['steps']: # add I/O folders to the json file # add a string describing the contents of these folders # The user has to ensure that these folders exist, and have the required content if step['jtype'] == 'docker': self.log.info('Adding job \'{0}\' to recipe. The container will be named \'{1}\''.format( step['cab'], step['name'])) cont = docker.Container(step['cab'], step['name'], label=step['label'], logger=self.log, shared_memory=step['shared_memory'], workdir=WORKDIR) self.log.debug('Adding volumes {0} and environmental variables {1}'.format( step['volumes'], step['environs'])) cont.volumes = step['volumes'] cont.environs = step['environs'] cont.shared_memory = step['shared_memory'] cont.input_content = step['input_content'] cont.msdir_content = step['msdir_content'] cont.logfile = step['logfile'] job = StimelaJob( step['name'], recipe=self, label=step['label'], cabpath=self.cabpath) job.job = cont job.jtype = 'docker' elif step['jtype'] == 'function': name = step['name'] func = inspect.currentframe( ).f_back.f_locals[step['function']] job = StimelaJob(name, recipe=self, label=step['label']) job.python_job(func, step['parameters']) job.jtype = 'function' self.jobs.append(job) elif resume: self.log.info("Resuming recipe from last run.") try: recipe = utils.readJson(self.resume_file) except IOError: raise StimelaRecipeExecutionError( "Cannot resume pipeline, resume file '{}' not found".format(self.resume_file)) steps_ = recipe.pop('steps') recipe['steps'] = [] _steps = [] for step in steps_: if step['status'] == 'completed': recipe['steps'].append(step) continue label = step['label'] number = step['number'] # Check if the recipe flow has changed if label == self.jobs[number-1].label: self.log.info( 'recipe step \'{0}\' is fit for re-execution. Label = {1}'.format(number, label)) _steps.append(number) else: raise StimelaRecipeExecutionError( 'Recipe flow, or task scheduling has changed. Cannot resume recipe. Label = {0}'.format(label)) # Check whether there are steps to resume if len(_steps) == 0: self.log.info( 'All the steps were completed. No steps to resume') sys.exit(0) steps = _steps if getattr(steps, '__iter__', False): _steps = [] if isinstance(steps[0], str): labels = [job.label.split('::')[0] for job in self.jobs] for step in steps: try: _steps.append(labels.index(step)+1) except ValueError: raise StimelaCabParameterError( 'Recipe label ID [{0}] doesn\'t exist'.format(step)) steps = _steps else: steps = range(1, len(self.jobs)+1) jobs = [(step, self.jobs[step-1]) for step in steps] # TIMESTR = "%Y-%m-%d %H:%M:%S" # TIMESTR = "%H:%M:%S" for i, (step, job) in enumerate(jobs): start_time = datetime.now() job.log.info('job started at {}'.format(start_time), # the extra attributes are filtered by e.g. the CARACal logger extra=dict(stimela_job_state=(job.name, "running"))) self.log.info('STEP {0} :: {1}'.format(i+1, job.label)) self.active = job try: with open(job.logfile, 'a') as astd: astd.write('\n-----------------------------------\n') astd.write( 'Stimela version : {}\n'.format(version)) astd.write( 'Cab name : {}\n'.format(job.image)) astd.write('-------------------------------------\n') job.run_job() self.log2recipe(job, recipe, step, 'completed') self.completed.append(job) finished_time = datetime.now() job.log.info('job complete at {} after {}'.format(finished_time, finished_time-start_time), # the extra attributes are filtered by e.g. the CARACal logger extra=dict(stimela_job_state=(job.name, "complete"))) except (utils.StimelaCabRuntimeError, StimelaRecipeExecutionError, StimelaCabParameterError) as e: self.remaining = [jb[1] for jb in jobs[i+1:]] self.failed = job finished_time = datetime.now() job.log.error(str(e), extra=dict(stimela_job_state=(job.name, "failed"), boldface=True)) job.log.error('job failed at {} after {}'.format(finished_time, finished_time-start_time), extra=dict(stimela_job_state=(job.name, "failed"), color=None)) for line in traceback.format_exc().splitlines(): job.log.error(line, extra=dict(traceback_report=True)) self.log.info('Completed jobs : {}'.format( [c.name for c in self.completed])) self.log.info('Remaining jobs : {}'.format( [c.name for c in self.remaining])) self.log2recipe(job, recipe, step, 'failed') for step, jb in jobs[i+1:]: self.log.info( 'Logging remaining task: {}'.format(jb.label)) self.log2recipe(jb, recipe, step, 'remaining') self.log.info( 'Saving pipeline information in {}'.format(self.resume_file)) utils.writeJson(self.resume_file, recipe) # raise pipeline exception. Original exception context is discarded by "from None" (since we've already # logged it above, we don't need to include it with the new exception) raise PipelineException(e, self.completed, job, self.remaining) from None self.log.info( 'Saving pipeline information in {}'.format(self.resume_file)) utils.writeJson(self.resume_file, recipe) self.log.info('Recipe executed successfully') return 0