Example #1
0
 def setup_output_wranglers(self, wranglers):
     self._wranglers = []
     if not wranglers:
         return
     if type(wranglers) is not dict:
         raise utils.StimelaCabRuntimeError("wranglers: dict expected")
     for match, actions in wranglers.items():
         replace = None
         if type(actions) is str:
             actions = [actions]
         if type(actions) is not list:
             raise utils.StimelaCabRuntimeError(f"wrangler entry {match}: expected action or list of action")
         for action in actions:
             if action.startswith("replace:"):
                 replace = action.split(":", 1)[1]
             elif action not in _actions:
                 raise utils.StimelaCabRuntimeError(f"wrangler entry {match}: unknown action '{action}'")
         actions = [_actions[act] for act in actions if act in _actions]
         self._wranglers.append((re.compile(match), replace, actions))
Example #2
0
    def python_job(self, function, parameters=None):
        """
        Run python function

        function    :   Python callable to execute
        name        :   Name of function (if not given, will used function.__name__)
        parameters  :   Parameters to parse to function
        label       :   Function label; for logging purposes
        """

        if not callable(function):
            raise utils.StimelaCabRuntimeError(
                'Object given as function is not callable')

        if self.name is None:
            self.name = function.__name__

        self.job = {
            'function':   function,
            'parameters':   parameters,
        }

        return 0
Example #3
0
    def setup_job(self, image, config,
                   indir=None, outdir=None, msdir=None, 
                   singularity_image_dir=None):
        """
            Setup job

        image   :   stimela cab name, e.g. 'cab/simms'
        name    :   This name will be part of the name of the contaier that will 
                    execute the task (now optional)
        config  :   Dictionary of options to parse to the task. This will modify 
                    the parameters in the default parameter file which 
                    can be viewd by running 'stimela cabs -i <cab name>', e.g 'stimela cabs -i simms'
        indir   :   input dirctory for cab
        outdir  :   output directory for cab
        msdir   :   MS directory for cab. Only specify if different from recipe ms_dir

        function    :   Python callable to execute
        name        :   Name of function (if not given, will used function.__name__)
        parameters  :   Parameters to parse to function
        label       :   Function label; for logging purposes
        """

        if self.jtype == "python":
            self.image = image.__name__
            if not callable(image):
                raise utils.StimelaCabRuntimeError(
                    'Object given as function is not callable')

            if self.name is None:
                self.name = image.__name__

            self.job = {
                'function':   image,
                'parameters':   config,
            }
            self.setup_job_log()

            return 0

        # check if name has any offending characters
        offenders = re.findall('[^\w .-]', self.name)
        if offenders:
            raise StimelaCabParameterError('The cab name \'{:s}\' contains invalid characters.'
                                           ' Allowed charcaters are alphanumeric, plus [-_. ].'.format(self.name))

        self.setup_job_log()

        # make name palatable as container name
        pausterized_name = re.sub("[\W]", "_", self.name)

        name = '{0}-{1}{2}'.format(pausterized_name, id(image),
                                   str(time.time()).replace('.', ''))

        cont = getattr(CONT_MOD[self.jtype], "Container")(image, name,
                                     logger=self.log, 
                                     workdir=CONT_IO["output"],
                                     time_out=self.time_out)

        cabpath = os.path.join(CAB_PATH, image.split("/")[1])
        
        # In case the user specified a custom cab
        cabpath = os.path.join(self.cabpath, image.split("/")[1]) if self.cabpath else cabpath
        parameter_file = os.path.join(cabpath, 'parameters.json')
        _cab = cab.CabDefinition(indir=indir, outdir=outdir,
                                 msdir=msdir, parameter_file=parameter_file)
        self.setup_output_wranglers(_cab.wranglers)
        cont.IODEST = CONT_IO
        cont.cabname = _cab.task

#
#Example
#        ----------------
# casa_listobs:
#   tag: <tag>               ## optional
#   version: <version>       ## optional. If version is a dict, then ignore tag and priority and use <tag>:<version> pairs in dict
#   force: true              ## Continue even if tag is specified in the parameters.json file

        no_tag_version = False
        if self.tag or self.version:
            tvi = None
            if self.tag:
                try:
                    tvi = _cab.tag.index(self.tag)
                except ValueError:
                    pass
            elif self.version:
                try:
                    tvi = _cab.version.index(self.version)
                except ValueError:
                    self.log.error(f"The version, {self.version}, specified for cab '{_cab.task}' is unknown. Available versions are {_cab.version}")
                    raise ValueError 
            if tvi is None:
                tvi = -1
            self.tag = _cab.tag[tvi]
            self.version = _cab.version[tvi]
        else:
            self.tag = _cab.tag[-1]
            self.version = _cab.version[-1]

        cabspecs = self.recipe.cabspecs.get(cont.cabname, None)
        if cabspecs:
            _tag = cabspecs.get("tag", None)
            _version = cabspecs.get("version", None)
            _force_tag = cabspecs.get("force", False)
            if isinstance(_version, dict):
                if self.version in _version:
                    self.tag = _version[self.version]
            elif _version:
                self.version = _version
            else:
                self.tag = _tag
            if self.version and self.version not in _cab.version:
                self.log.error(f"The version, {self.version}, specified for cab '{_cab.task}' is unknown. Available versions are {_cab.version}")
                raise ValueError
            if not _tag:
                idx = _cab.version.index(self.version)
                self.tag = _cab.tag[idx]
            self.force_tag = _force_tag

        if self.tag not in _cab.tag:
            if self.force_tag:
                self.log.warn(f"You have chosen to use an unverified base image '{_cab.base}:{self.tag}'. May the force be with you.")
            else:
                raise StimelaBaseImageError(f"The base image '{_cab.base}' with tag '{self.tag}' has not been verified. If you wish to continue with it, please add the 'force_tag' when adding it to your recipe")

        if self.jtype == "singularity":
            simage = _cab.base.replace("/", "_")
            cont.image = '{0:s}/{1:s}_{2:s}{3:s}'.format(singularity_image_dir,
                    simage, self.tag, singularity.suffix)
            cont.image = os.path.abspath(cont.image)
            if not os.path.exists(cont.image):
                singularity.pull(":".join([_cab.base, self.tag]), 
                        os.path.basename(cont.image), directory=singularity_image_dir)
        else:
            cont.image = ":".join([_cab.base, self.tag])

        # Container parameter file will be updated and validated before the container is executed
        cont._cab = _cab
        cont.parameter_file_name = '{0}/{1}.json'.format(
            self.recipe.parameter_file_dir, name)

        self.image = str(cont.image)

        # Remove dismissable kw arguments:
        ops_to_pop = []
        for op in config:
            if isinstance(config[op], dismissable):
                ops_to_pop.append(op)
        for op in ops_to_pop:
            arg = config.pop(op)()
            if arg is not None:
                config[op] = arg
        cont.config = config

        # These are standard volumes and
        # environmental variables. These will be
        # always exist in a cab container
        cont.add_volume(cont.parameter_file_name,
                        f'{cab.MOUNT}/configfile', perm='ro', noverify=True)
        cont.add_volume(os.path.join(cabpath, "src"), f"{cab.MOUNT}/code", "ro")

        cont.add_volume(os.path.join(self.workdir, "passwd"), "/etc/passwd")
        cont.add_volume(os.path.join(self.workdir, "group"), "/etc/group")
        cont.RUNSCRIPT = f"/{self.jtype}_run"

        if self.jtype == "singularity":
            cont.RUNSCRIPT = f"/{self.jtype}"
            if _cab.base.startswith("stimela/casa") or _cab.base.startswith("stimela/simms"):
                cont.add_environ("LANGUAGE", "en_US.UTF-8")
                cont.add_environ("LANG", "en_US.UTF-8")
                cont.add_environ("LC_ALL", "en_US.UTF-8")
            cont.execdir = self.workdir
        else:
            cont.RUNSCRIPT = f"/{self.jtype}_run"
        
        runscript = shutil.which("stimela_runscript")
        if runscript:
            cont.add_volume(runscript, 
                    cont.RUNSCRIPT, perm="ro")
        else:
            self.log.error("Stimela container runscript could not found.\
                    This may due to conflicting python or stimela installations in your $PATH.")
            raise OSError

        cont.add_environ('CONFIG', f'{cab.MOUNT}/configfile')
        cont.add_environ('HOME', cont.IODEST["output"])
        cont.add_environ('STIMELA_MOUNT', cab.MOUNT)

        if msdir:
            md = cont.IODEST["msfile"]
            os.makedirs(msdir, exist_ok=True)
            cont.add_volume(msdir, md)
            cont.add_environ("MSDIR", md)
            # Keep a record of the content of the
            # volume
            dirname, dirs, files = [a for a in next(os.walk(msdir))]
            cont.msdir_content = {
                "volume":   dirname,
                "dirs":   dirs,
                "files":   files,
            }

            self.log.debug(
                'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(msdir, md))

        if indir:
            cont.add_volume(indir, cont.IODEST["input"], perm='ro')
            cont.add_environ("INPUT", cont.IODEST["input"])
            # Keep a record of the content of the
            # volume
            dirname, dirs, files = [a for a in next(os.walk(indir))]
            cont.input_content = {
                "volume":   dirname,
                "dirs":   dirs,
                "files":   files,
            }

            self.log.debug('Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(
                indir, cont.IODEST["input"]))

        os.makedirs(outdir, exist_ok=True)

        od = cont.IODEST["output"]

        cont.logfile = self.logfile
        cont.add_volume(outdir, od, "rw")
        cont.add_environ("OUTPUT", od)

        # temp files go into output
        tmpfol = os.path.join(outdir, "tmp")
        if not os.path.exists(tmpfol):
            os.mkdir(tmpfol)
        cont.add_volume(tmpfol, cont.IODEST["tmp"], "rw")
        cont.add_environ("TMPDIR", cont.IODEST["tmp"])

        self.log.debug(
            'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(outdir, od))

        # Added and ready for execution
        self.job = cont

        return 0
Example #4
0
    def setup_job(self, image, config,
                   indir=None, outdir=None, msdir=None, 
                   build_label=None, singularity_image_dir=None,
                   **kw):
        """
            Setup job

        image   :   stimela cab name, e.g. 'cab/simms'
        name    :   This name will be part of the name of the contaier that will 
                    execute the task (now optional)
        config  :   Dictionary of options to parse to the task. This will modify 
                    the parameters in the default parameter file which 
                    can be viewd by running 'stimela cabs -i <cab name>', e.g 'stimela cabs -i simms'
        indir   :   input dirctory for cab
        outdir  :   output directory for cab
        msdir   :   MS directory for cab. Only specify if different from recipe ms_dir

        function    :   Python callable to execute
        name        :   Name of function (if not given, will used function.__name__)
        parameters  :   Parameters to parse to function
        label       :   Function label; for logging purposes
        """
        
        if self.jtype == "python":
            self.image = image.__name__
            if not callable(image):
                raise utils.StimelaCabRuntimeError(
                    'Object given as function is not callable')

            if self.name is None:
                self.name = image.__name__

            self.job = {
                'function':   image,
                'parameters':   config,
            }

            return 0

        # check if name has any offending characters
        offenders = re.findall('[^\w .-]', self.name)
        if offenders:
            raise StimelaCabParameterError('The cab name \'{:s}\' contains invalid characters.'
                                           ' Allowed charcaters are alphanumeric, plus [-_. ].'.format(self.name))

        # Update I/O with values specified on command line
        script_context = self.recipe.stimela_context
        indir = script_context.get('_STIMELA_INPUT', None) or indir
        outdir = script_context.get('_STIMELA_OUTPUT', None) or outdir
        msdir = script_context.get('_STIMELA_MSDIR', None) or msdir
        build_label = script_context.get(
            '_STIMELA_BUILD_LABEL', None) or build_label

        self.setup_job_log()

        # make name palatable as container name
        pausterized_name = re.sub("[\W]", "_", self.name)

        name = '{0}-{1}{2}'.format(pausterized_name, id(image),
                                   str(time.time()).replace('.', ''))

        cont = getattr(CONT_MOD[self.jtype], "Container")(image, name,
                                     logger=self.log, 
                                     workdir=WORKDIR,
                                     time_out=self.time_out)
        if self.jtype == "docker":
            # Get location of template parameters file
            cabs_loc = f"{stimela.LOG_HOME}/{build_label}_stimela_logfile.json"
            cabs_logger = get_cabs(cabs_loc)
            try:
                cabpath = cabs_logger[f'{build_label}_{cont.image}']['DIR']
            except KeyError:
                main.build(["--us-only", cont.image.split("/")[-1],
                        "--no-cache", "--build-label", build_label])
                cabs_logger = get_cabs(cabs_loc)
                cabpath = cabs_logger[f'{build_label}_{cont.image}']['DIR']

        else:
            cabpath = os.path.join(CAB_PATH, image.split("/")[1])
        
        # In case the user specified a custom cab
        cabpath = os.path.join(self.cabpath, image.split("/")[1]) if self.cabpath else cabpath
        parameter_file = os.path.join(cabpath, 'parameters.json')
        _cab = cab.CabDefinition(indir=indir, outdir=outdir,
                                 msdir=msdir, parameter_file=parameter_file)
        cont.IODEST = CONT_IO
        cont.cabname = _cab.task

        if self.jtype == "docker":
            cont.image = '{0}_{1}'.format(build_label, image)
        elif self.jtype == "singularity":
            simage = _cab.base.replace("/", "_")
            if singularity_image_dir is None:
                singularity_image_dir = os.path.join(".", "stimela_singularity_images")
            cont.image = '{0:s}/{1:s}_{2:s}{3:s}'.format(singularity_image_dir,
                    simage, _cab.tag, singularity.suffix)
            if not os.path.exists(cont.image):
                main.pull(f"-s -cb {cont.cabname} -pf {singularity_image_dir}".split())
        else:
            cont.image = ":".join([_cab.base, _cab.tag])

        # Container parameter file will be updated and validated before the container is executed
        cont._cab = _cab
        cont.parameter_file_name = '{0}/{1}.json'.format(
            self.recipe.parameter_file_dir, name)

        self.image = str(cont.image)

        # Remove dismissable kw arguments:
        ops_to_pop = []
        for op in config:
            if isinstance(config[op], dismissable):
                ops_to_pop.append(op)
        for op in ops_to_pop:
            arg = config.pop(op)()
            if arg is not None:
                config[op] = arg
        cont.config = config

        # These are standard volumes and
        # environmental variables. These will be
        # always exist in a cab container
        cont.add_volume(self.recipe.stimela_path,
                        '/scratch/stimela', perm='ro')
        cont.add_volume(cont.parameter_file_name,
                        '/scratch/configfile', perm='ro', noverify=True)
        cont.add_volume(os.path.join(cabpath, "src"), "/scratch/code", "ro")



        if self.jtype == "singularity":
            cont.RUNSCRIPT = f"/{self.jtype}"
        else:
            cont.RUNSCRIPT = f"/{self.jtype}_run"

        cont.add_volume(f"{BIN}/stimela_runscript", 
                cont.RUNSCRIPT, perm="ro")

        cont.add_environ('CONFIG', '/scratch/configfile')
        cont.add_environ('HOME', WORKDIR)

        if msdir:
            md = cont.IODEST["msfile"]
            cont.add_volume(msdir, md)
            cont.add_environ("MSDIR", md)
            # Keep a record of the content of the
            # volume
            dirname, dirs, files = [a for a in next(os.walk(msdir))]
            cont.msdir_content = {
                "volume":   dirname,
                "dirs":   dirs,
                "files":   files,
            }

            self.log.debug(
                'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(msdir, md))

        if indir:
            cont.add_volume(indir, cont.IODEST["input"], perm='ro')
            cont.add_environ("INPUT", cont.IODEST["input"])
            # Keep a record of the content of the
            # volume
            dirname, dirs, files = [a for a in next(os.walk(indir))]
            cont.input_content = {
                "volume":   dirname,
                "dirs":   dirs,
                "files":   files,
            }

            self.log.debug('Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(
                indir, cont.IODEST["input"]))

        if not os.path.exists(outdir):
            os.mkdir(outdir)

        od = cont.IODEST["output"]

        cont.logfile = self.logfile
        cont.add_volume(outdir, od, "rw")
        cont.add_environ("OUTPUT", od)

        # temp files go into output
        tmpfol = os.path.join(outdir, "tmp")
        if not os.path.exists(tmpfol):
            os.mkdir(tmpfol)
        cont.add_volume(tmpfol, cont.IODEST["tmp"], "rw")
        cont.add_environ("TMPDIR", cont.IODEST["tmp"])

        self.log.debug(
            'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(outdir, od))

        # Added and ready for execution
        self.job = cont

        return 0