Exemple #1
0
    def __init__(self,
                 indir=None,  # input directory
                 outdir=None,  # output directory
                 msdir=None,  # MS directory
                 parameter_file=None,
                 task=None,
                 base=None,
                 binary=None,
                 description=None,
                 tag=None,
                 prefix=None,
                 parameters=[],
                 version=None):

        self.indir = indir
        self.outdir = outdir

        if parameter_file:
            cab = utils.readJson(parameter_file)
            self.task = cab["task"]
            self.base = cab["base"]
            self.binary = cab["binary"]
            self.tag = cab["tag"]
            self.version = cab.get("version", "x.x.x")
            if cab["msdir"]:
                self.msdir = msdir
            else:
                self.msdir = None
            self.description = cab["description"]
            self.prefix = cab["prefix"]
            parameters0 = cab["parameters"]
            self.parameters = []

            for param in parameters0:
                default = param.get("default", param.get("value", None))
                addme = Parameter(name=param["name"],
                                  dtype=param["dtype"],
                                  io=param.get("io", None),
                                  info=param.get(
                                      "info", None) or "No documentation. Bad! Very bad...",
                                  default=default,
                                  mapping=param.get("mapping", None),
                                  required=param.get("required", False),
                                  choices=param.get("choices", False),
                                  check_io=param.get("check_io", True))
                self.parameters.append(addme)

        else:
            self.task = task
            self.base = base
            self.binary = binary
            self.prefix = prefix
            self.parameters = parameters
            self.description = description
            self.msdir = msdir
            self.tag = tag
            self.version = version

        self.log = stimela.logger()
Exemple #2
0
    def __init__(self,
                 indir=None,  # input directory
                 outdir=None,  # output directory
                 msdir=None,  # MS directory
                 parameter_file=None,
                 task=None,
                 base=None,
                 binary=None,
                 description=None,
                 tag=None,
                 prefix=None, loglevel='INFO',
                 parameters=[]):

        logging.basicConfig(level=getattr(logging, loglevel))
        self.log = logging
        self.indir = indir
        self.outdir = outdir

        if parameter_file:
            cab = utils.readJson(parameter_file)
            self.task = cab["task"]
            self.base = cab["base"]
            self.binary = cab["binary"]
            self.tag = cab["tag"]
            if cab["msdir"]:
                self.msdir = msdir
            self.description = cab["description"]
            self.prefix = cab["prefix"]
            parameters0 = cab["parameters"]
            self.parameters = []

            import sys
            for param in parameters0:
                default = param.get("default", param.get("value", None))
                addme = Parameter(name=param["name"],
                                  dtype=param["dtype"],
                                  io=param.get("io", None),
                                  info=param.get(
                                      "info", None) or "No documentation. Bad! Very bad...",
                                  default=default,
                                  mapping=param.get("mapping", None),
                                  #delimiter=param.get("delimiter", None),
                                  required=param.get("required", False),
                                  choices=param.get("choices", False),
                                  check_io=param.get("check_io", True))
                self.parameters.append(addme)

        else:
            self.task = task
            self.base = base
            self.binary = binary
            self.prefix = prefix
            self.parameters = parameters
            self.description = description
            self.msdir = msdir
            self.tag = tag
Exemple #3
0
def pull(argv):
    for i, arg in enumerate(argv):
        if (arg[0] == '-') and arg[1].isdigit():
            argv[i] = ' ' + arg

    parser = ArgumentParser(description='Pull docker stimela base images')

    add = parser.add_argument

    add("-im", "--image", nargs="+", metavar="IMAGE[:TAG]",
        help="Pull base image along with its tag (or version). Can be called multiple times")

    add("-f", "--force", action="store_true",
        help="force pull if image already exists")

    add("-s", "--singularity", action="store_true",
        help="Pull base images using singularity."
        "Images will be pulled into the directory specified by the enviroment varaible, SINGULARITY_PULLFOLDER. $PWD by default")

    add("-d", "--docker", action="store_true",
        help="Pull base images using docker.")

    add("-p", "--podman", action="store_true",
        help="Pull base images using podman.")

    add("-cb", "--cab-base", nargs="+",
        help="Pull base image for specified cab")

    add("-pf", "--pull-folder",
        help="Images will be placed in this folder. Else, if the environmnental variable 'SINGULARITY_PULLFOLDER' is set, then images will be placed there. "
        "Else, images will be placed in the current directory")

    args = parser.parse_args(argv)

    if args.pull_folder:
        pull_folder = args.pull_folder
    else:
        try:
            pull_folder = os.environ["SINGULARITY_PULLFOLDER"]
        except KeyError:
            pull_folder = "."

    if args.docker:
        jtype = "docker"
    elif args.podman:
        jtype = "podman"
    elif args.singularity:
        jtype = "singularity"
    else:
        jtype = "udocker"

    log = logger.StimelaLogger(LOG_FILE, jtype=jtype)
    images = log.read()['images']

    images_ = []
    for cab in args.cab_base or []:
        if cab in CAB:
            filename = "/".join([stimela.CAB_PATH, cab, "parameters.json"])
            param = utils.readJson(filename)
            images_.append(":".join([param["base"], param["tag"]]))

    args.image = images_ or args.image
    if args.image:
        for image in args.image:
            simage = image.replace("/", "_")
            simage = simage.replace(":", "_") + ".img"
            if args.singularity:
                singularity.pull(
                    image, simage, directory=pull_folder, force=args.force)
            elif args.docker:
                docker.pull(image)
                log.log_image(image, 'pulled')
            elif args.podman:
                podman.pull(image)
                log.log_image(image, 'pulled')
            else:
                udocker.pull(image)
                log.log_image(image, 'pulled')
    else:

        base = []
        for cab in CAB:
            image = "{:s}/{:s}".format(stimela.CAB_PATH, cab)
            base.append(utils.get_Dockerfile_base_image(image).split()[-1])

        base = set(base)

        for image in base:
            if args.singularity:
                simage = image.replace("/", "_")
                simage = simage.replace(":", "_") + ".img"
                singularity.pull(
                    image, simage, directory=pull_folder, force=args.force)
            elif args.docker:
                docker.pull(image, force=args.force)
                log.log_image(image, 'pulled')
            elif args.podman:
                podman.pull(image, force=args.force)
                log.log_image(image, 'pulled')
            else:
                udocker.pull(image, force=args.force)
                log.log_image(image, 'pulled')

    log.write()
Exemple #4
0
def pull(argv):
    for i, arg in enumerate(argv):
        if (arg[0] == '-') and arg[1].isdigit():
            argv[i] = ' ' + arg

    parser = ArgumentParser(description='Pull docker stimela base images')

    add = parser.add_argument

    add("-im", "--image", nargs="+", metavar="IMAGE[:TAG]",
        help="Pull base image along with its tag (or version). Can be called multiple times")

    add("-f", "--force", action="store_true",
        help="force pull if image already exists")

    add("-s", "--singularity", action="store_true",
        help="Pull base images using singularity."
        "Images will be pulled into the directory specified by the enviroment varaible, STIMELA_PULLFOLDER. $PWD by default")

    add("-d", "--docker", action="store_true",
        help="Pull base images using docker.")

    add("-p", "--podman", action="store_true",
        help="Pull base images using podman.")

    add("-cb", "--cab-base", nargs="+",
        help="Pull base image for specified cab")

    add("-pf", "--pull-folder",
        help="Images will be placed in this folder. Else, if the environmnental variable 'STIMELA_PULLFOLDER' is set, then images will be placed there. "
        "Else, images will be placed in the current directory")

    args = parser.parse_args(argv)

    if args.pull_folder:
        pull_folder = args.pull_folder
    else:
        try:
            pull_folder = os.environ["STIMELA_PULLFOLDER"]
        except KeyError:
            pull_folder = "."

    if args.podman:
        jtype = "podman"
    elif args.singularity:
        jtype = "singularity"
    elif args.docker:
        jtype = "docker"
    else:
        jtype = "docker"



    images_ = []
    for cab in args.cab_base or []:
        if cab in CAB:
            filename = "/".join([stimela.CAB_PATH, cab, "parameters.json"])
            param = utils.readJson(filename)
            tags = param["tag"]
            if not isinstance(tags, list):
                tags = [tags]
            for tag in tags:
                images_.append(":".join([param["base"], tag]))

    args.image = images_ or args.image
    if args.image:
        for image in args.image:
            simage = image.replace("/", "_")
            simage = simage.replace(":", "_") + singularity.suffix
            if args.singularity:
                singularity.pull(
                    image, simage, directory=pull_folder, force=args.force)
            elif args.docker:
                docker.pull(image)
            elif args.podman:
                podman.pull(image)
            else:
                docker.pull(image)
    else:
        base = []
        for cab_ in CAB:
            cabdir = "{:s}/{:s}".format(stimela.CAB_PATH, cab_)
            _cab = info(cabdir, display=False)
            tags = _cab.tag
            if not isinstance(tags, list):
                tags = [tags]
            for tag in tags:
                base.append(f"{_cab.base}:{tag}")
        base = set(base)

        for image in base:
            if args.singularity:
                simage = image.replace("/", "_")
                simage = simage.replace(":", "_") + singularity.suffix
                singularity.pull(
                    image, simage, directory=pull_folder, force=args.force)
            elif args.docker:
                docker.pull(image, force=args.force)
            elif args.podman:
                podman.pull(image, force=args.force)
            else:
                docker.pull(image, force=args.force)
Exemple #5
0
    def run(self, steps=None, resume=False, redo=None):
        """
        Run a Stimela recipe. 

        steps   :   recipe steps to run
        resume  :   resume recipe from last run
        redo    :   Re-run an old recipe from a .last file
        """

        recipe = {
            "name":   self.name,
            "steps":   []
        }
        start_at = 0

        if redo:
            recipe = utils.readJson(redo)
            self.log.info('Rerunning recipe {0} from {1}'.format(
                recipe['name'], redo))
            self.log.info('Recreating recipe instance..')
            self.jobs = []
            for step in recipe['steps']:

                #        add I/O folders to the json file
                #        add a string describing the contents of these folders
                #        The user has to ensure that these folders exist, and have the required content
                if step['jtype'] == 'docker':
                    self.log.info('Adding job \'{0}\' to recipe. The container will be named \'{1}\''.format(
                        step['cab'], step['name']))
                    cont = docker.Container(step['cab'], step['name'],
                                            label=step['label'], logger=self.log,
                                            shared_memory=step['shared_memory'])

                    self.log.debug('Adding volumes {0} and environmental variables {1}'.format(
                        step['volumes'], step['environs']))
                    cont.volumes = step['volumes']
                    cont.environs = step['environs']
                    cont.shared_memory = step['shared_memory']
                    cont.input_content = step['input_content']
                    cont.msdir_content = step['msdir_content']
                    cont.logfile = step['logfile']
                    job = StimelaJob(
                        step['name'], recipe=self, label=step['label'])
                    job.job = cont
                    job.jtype = 'docker'

                elif step['jtype'] == 'function':
                    name = step['name']
                    func = inspect.currentframe(
                    ).f_back.f_locals[step['function']]
                    job = StimelaJob(name, recipe=self, label=step['label'])
                    job.python_job(func, step['parameters'])
                    job.jtype = 'function'

                self.jobs.append(job)

        elif resume:
            self.log.info("Resuming recipe from last run.")
            try:
                recipe = utils.readJson(self.resume_file)
            except IOError:
                raise StimelaRecipeExecutionError(
                    "Cannot resume pipeline, resume file '{}' not found".format(self.resume_file))

            steps_ = recipe.pop('steps')
            recipe['steps'] = []
            _steps = []
            for step in steps_:
                if step['status'] == 'completed':
                    recipe['steps'].append(step)
                    continue

                label = step['label']
                number = step['number']

                # Check if the recipe flow has changed
                if label == self.jobs[number-1].label:
                    self.log.info(
                        'recipe step \'{0}\' is fit for re-execution. Label = {1}'.format(number, label))
                    _steps.append(number)
                else:
                    raise StimelaRecipeExecutionError(
                        'Recipe flow, or task scheduling has changed. Cannot resume recipe. Label = {0}'.format(label))

            # Check whether there are steps to resume
            if len(_steps) == 0:
                self.log.info(
                    'All the steps were completed. No steps to resume')
                sys.exit(0)
            steps = _steps

        if getattr(steps, '__iter__', False):
            _steps = []
            if isinstance(steps[0], str):
                labels = [job.label.split('::')[0] for job in self.jobs]

                for step in steps:
                    try:
                        _steps.append(labels.index(step)+1)
                    except ValueError:
                        raise StimelaCabParameterError(
                            'Recipe label ID [{0}] doesn\'t exist'.format(step))
                steps = _steps
        else:
            steps = range(1, len(self.jobs)+1)

        jobs = [(step, self.jobs[step-1]) for step in steps]

        for i, (step, job) in enumerate(jobs):

            self.log.info('Running job {}'.format(job.name))
            self.log.info('STEP {0} :: {1}'.format(i+1, job.label))
            self.active = job
            try:
                if job.jtype == 'function':
                    job.run_python_job()
                elif job.jtype in ['docker', 'singularity', 'udocker', 'podman']:
                    with open(job.job.logfile, 'a') as astd:
                        astd.write('\n-----------------------------------\n')
                        astd.write(
                            'Stimela version     : {}\n'.format(version))
                        astd.write(
                            'Cab name            : {}\n'.format(job.job.image))
                        astd.write('-------------------------------------\n')

                    run_job = getattr(job, "run_{0:s}_job".format(job.jtype))
                    run_job()

                self.log2recipe(job, recipe, step, 'completed')

            except (utils.StimelaCabRuntimeError,
                    StimelaRecipeExecutionError,
                    StimelaCabParameterError) as e:
                self.completed = [jb[1] for jb in jobs[:i]]
                self.remaining = [jb[1] for jb in jobs[i+1:]]
                self.failed = job

                self.log.info(
                    'Recipe execution failed while running job {}'.format(job.name))
                self.log.info('Completed jobs : {}'.format(
                    [c.name for c in self.completed]))
                self.log.info('Remaining jobs : {}'.format(
                    [c.name for c in self.remaining]))

                self.log2recipe(job, recipe, step, 'failed')
                for step, jb in jobs[i+1:]:
                    self.log.info(
                        'Logging remaining task: {}'.format(jb.label))
                    self.log2recipe(jb, recipe, step, 'remaining')

                self.log.info(
                    'Saving pipeline information in {}'.format(self.resume_file))
                utils.writeJson(self.resume_file, recipe)

                pe = PipelineException(e, self.completed, job, self.remaining)
                raise_(pe, None, sys.exc_info()[2])
            except:
                import traceback
                traceback.print_exc()
                raise RuntimeError(
                    "An unhandled exception has occured. This is a bug, please report")

            finally:
                if job.jtype == 'singularity' and job.created:
                    job.job.stop()

        self.log.info(
            'Saving pipeline information in {}'.format(self.resume_file))
        utils.writeJson(self.resume_file, recipe)

        self.log.info('Recipe executed successfully')

        return 0
Exemple #6
0
    def run(self, steps=None, resume=False, redo=None):
        """
        Run a Stimela recipe. 

        steps   :   recipe steps to run
        resume  :   resume recipe from last run
        redo    :   Re-run an old recipe from a .last file
        """

        recipe = {
            "name":   self.name,
            "steps":   []
        }
        start_at = 0

        if redo:
            recipe = utils.readJson(redo)
            self.log.info('Rerunning recipe {0} from {1}'.format(
                recipe['name'], redo))
            self.log.info('Recreating recipe instance..')
            self.jobs = []
            for step in recipe['steps']:

                #        add I/O folders to the json file
                #        add a string describing the contents of these folders
                #        The user has to ensure that these folders exist, and have the required content
                if step['jtype'] == 'docker':
                    self.log.info('Adding job \'{0}\' to recipe. The container will be named \'{1}\''.format(
                        step['cab'], step['name']))
                    cont = docker.Container(step['cab'], step['name'],
                                            label=step['label'], logger=self.log,
                                            shared_memory=step['shared_memory'],
                                            workdir=WORKDIR)

                    self.log.debug('Adding volumes {0} and environmental variables {1}'.format(
                        step['volumes'], step['environs']))
                    cont.volumes = step['volumes']
                    cont.environs = step['environs']
                    cont.shared_memory = step['shared_memory']
                    cont.input_content = step['input_content']
                    cont.msdir_content = step['msdir_content']
                    cont.logfile = step['logfile']
                    job = StimelaJob(
                        step['name'], recipe=self, label=step['label'], cabpath=self.cabpath)
                    job.job = cont
                    job.jtype = 'docker'
                elif step['jtype'] == 'function':
                    name = step['name']
                    func = inspect.currentframe(
                    ).f_back.f_locals[step['function']]
                    job = StimelaJob(name, recipe=self, label=step['label'])
                    job.python_job(func, step['parameters'])
                    job.jtype = 'function'

                self.jobs.append(job)

        elif resume:
            self.log.info("Resuming recipe from last run.")
            try:
                recipe = utils.readJson(self.resume_file)
            except IOError:
                raise StimelaRecipeExecutionError(
                    "Cannot resume pipeline, resume file '{}' not found".format(self.resume_file))

            steps_ = recipe.pop('steps')
            recipe['steps'] = []
            _steps = []
            for step in steps_:
                if step['status'] == 'completed':
                    recipe['steps'].append(step)
                    continue

                label = step['label']
                number = step['number']

                # Check if the recipe flow has changed
                if label == self.jobs[number-1].label:
                    self.log.info(
                        'recipe step \'{0}\' is fit for re-execution. Label = {1}'.format(number, label))
                    _steps.append(number)
                else:
                    raise StimelaRecipeExecutionError(
                        'Recipe flow, or task scheduling has changed. Cannot resume recipe. Label = {0}'.format(label))

            # Check whether there are steps to resume
            if len(_steps) == 0:
                self.log.info(
                    'All the steps were completed. No steps to resume')
                sys.exit(0)
            steps = _steps

        if getattr(steps, '__iter__', False):
            _steps = []
            if isinstance(steps[0], str):
                labels = [job.label.split('::')[0] for job in self.jobs]

                for step in steps:
                    try:
                        _steps.append(labels.index(step)+1)
                    except ValueError:
                        raise StimelaCabParameterError(
                            'Recipe label ID [{0}] doesn\'t exist'.format(step))
                steps = _steps
        else:
            steps = range(1, len(self.jobs)+1)
        jobs = [(step, self.jobs[step-1]) for step in steps]

        # TIMESTR = "%Y-%m-%d %H:%M:%S"
        # TIMESTR = "%H:%M:%S"

        for i, (step, job) in enumerate(jobs):
            start_time = datetime.now()
            job.log.info('job started at {}'.format(start_time),
                          # the extra attributes are filtered by e.g. the CARACal logger
                          extra=dict(stimela_job_state=(job.name, "running")))

            self.log.info('STEP {0} :: {1}'.format(i+1, job.label))
            self.active = job
            try:
                with open(job.logfile, 'a') as astd:
                    astd.write('\n-----------------------------------\n')
                    astd.write(
                        'Stimela version     : {}\n'.format(version))
                    astd.write(
                        'Cab name            : {}\n'.format(job.image))
                    astd.write('-------------------------------------\n')
                job.run_job()

                self.log2recipe(job, recipe, step, 'completed')
                self.completed.append(job)

                finished_time = datetime.now()
                job.log.info('job complete at {} after {}'.format(finished_time, finished_time-start_time),
                              # the extra attributes are filtered by e.g. the CARACal logger
                              extra=dict(stimela_job_state=(job.name, "complete")))

            except (utils.StimelaCabRuntimeError,
                    StimelaRecipeExecutionError,
                    StimelaCabParameterError) as e:
                self.remaining = [jb[1] for jb in jobs[i+1:]]
                self.failed = job

                finished_time = datetime.now()
                job.log.error(str(e), extra=dict(stimela_job_state=(job.name, "failed"), boldface=True))
                job.log.error('job failed at {} after {}'.format(finished_time, finished_time-start_time),
                                extra=dict(stimela_job_state=(job.name, "failed"), color=None))
                for line in traceback.format_exc().splitlines():
                    job.log.error(line, extra=dict(traceback_report=True))

                self.log.info('Completed jobs : {}'.format(
                    [c.name for c in self.completed]))
                self.log.info('Remaining jobs : {}'.format(
                    [c.name for c in self.remaining]))

                self.log2recipe(job, recipe, step, 'failed')
                for step, jb in jobs[i+1:]:
                    self.log.info(
                        'Logging remaining task: {}'.format(jb.label))
                    self.log2recipe(jb, recipe, step, 'remaining')

                self.log.info(
                    'Saving pipeline information in {}'.format(self.resume_file))
                utils.writeJson(self.resume_file, recipe)

                # raise pipeline exception. Original exception context is discarded by "from None" (since we've already
                # logged it above, we don't need to include it with the new exception)
                raise PipelineException(e, self.completed, job, self.remaining) from None

        self.log.info(
            'Saving pipeline information in {}'.format(self.resume_file))
        utils.writeJson(self.resume_file, recipe)
        self.log.info('Recipe executed successfully')

        return 0