Ejemplo n.º 1
0
    def docker_job(self, image, config=None,
                   input=None, output=None, msdir=None,
                   shared_memory='1gb', build_label=None,
                   **kw):
        """
        Add a task to a stimela recipe

        image   :   stimela cab name, e.g. 'cab/simms'
        name    :   This name will be part of the name of the contaier that will 
                    execute the task (now optional)
        config  :   Dictionary of options to parse to the task. This will modify 
                    the parameters in the default parameter file which 
                    can be viewd by running 'stimela cabs -i <cab name>', e.g 'stimela cabs -i simms'
        input   :   input dirctory for cab
        output  :   output directory for cab
        msdir   :   MS directory for cab. Only specify if different from recipe ms_dir
        """

        # check if name has any offending charecters
        offenders = re.findall('\W', self.name)
        if offenders:
            raise StimelaCabParameterError('The cab name \'{:s}\' has some non-alphanumeric characters.'
                                           ' Charecters making up this name must be in [a-z,A-Z,0-9,_]'.format(self.name))

        # Update I/O with values specified on command line
        # TODO (sphe) I think this feature should be removed
        script_context = self.recipe.stimela_context
        input = script_context.get('_STIMELA_INPUT', None) or input
        output = script_context.get('_STIMELA_OUTPUT', None) or output
        output = os.path.abspath(output)
        msdir = script_context.get('_STIMELA_MSDIR', None) or msdir
        build_label = script_context.get(
            '_STIMELA_BUILD_LABEL', None) or build_label

        # Get location of template parameters file
        cabs_logger = get_cabs(
            '{0:s}/{1:s}_stimela_logfile.json'.format(stimela.LOG_HOME, build_label))
        try:
            cabpath = cabs_logger['{0:s}_{1:s}'.format(
                build_label, image)]['DIR']
        except KeyError:
            raise StimelaCabParameterError(
                'Cab {} has is uknown to stimela. Was it built?'.format(image))
        parameter_file = cabpath+'/parameters.json'

        name = '{0}-{1}{2}'.format(self.name, id(image),
                                   str(time.time()).replace('.', ''))

        _cab = cab.CabDefinition(indir=input, outdir=output,
                                 msdir=msdir, parameter_file=parameter_file)

        cont = docker.Container(image, name,
                                label=self.label, logger=self.log,
                                shared_memory=shared_memory,
                                log_container=stimela.LOG_FILE,
                                time_out=self.time_out)

        # Container parameter file will be updated and validated before the container is executed
        cont._cab = _cab
        cont.parameter_file_name = '{0}/{1}.json'.format(
            self.recipe.parameter_file_dir, name)

        # Remove dismissable kw arguments:
        ops_to_pop = []
        for op in config:
            if isinstance(config[op], dismissable):
                ops_to_pop.append(op)
        for op in ops_to_pop:
            arg = config.pop(op)()
            if arg is not None:
                config[op] = arg
        cont.config = config

        cont.add_volume(
            "{0:s}/cargo/cab/docker_run".format(self.recipe.stimela_path), "/docker_run", perm="ro")
        cont.COMMAND = "/bin/sh -c /docker_run"
        # These are standard volumes and
        # environmental variables. These will be
        # always exist in a cab container
        cont.add_volume(self.recipe.stimela_path,
                        '/scratch/stimela', perm='ro')
        cont.add_volume(self.recipe.parameter_file_dir, '/configs', perm='ro')
        cont.add_environ('CONFIG', '/configs/{}.json'.format(name))

        cab.IODEST = CONT_IO["docker"]

        if msdir:
            md = cab.IODEST["msfile"]
            cont.add_volume(msdir, md)
            cont.add_environ('MSDIR', md)
            # Keep a record of the content of the
            # volume
            dirname, dirs, files = [a for a in next(os.walk(msdir))]
            cont.msdir_content = {
                "volume":   dirname,
                "dirs":   dirs,
                "files":   files,
            }

            self.log.debug(
                'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(msdir, md))

        if input:
            cont.add_volume(input, cab.IODEST["input"], perm='ro')
            cont.add_environ('INPUT', cab.IODEST["input"])
            # Keep a record of the content of the
            # volume
            dirname, dirs, files = [a for a in next(os.walk(input))]
            cont.input_content = {
                "volume":   dirname,
                "dirs":   dirs,
                "files":   files,
            }

            self.log.debug('Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(
                input, cab.IODEST["input"]))

        if not os.path.exists(output):
            os.mkdir(output)

        od = cab.IODEST["output"]
        cont.add_environ('HOME', od)
        cont.add_environ('OUTPUT', od)

        self.log_dir = os.path.abspath(self.log_dir or output)
        log_dir_name = os.path.basename(self.log_dir or output)
        logfile_name = 'log-{0:s}.txt'.format(name.split('-')[0])
        self.logfile = cont.logfile = '{0:s}/{1:s}'.format(
            self.log_dir, logfile_name)
        cont.add_volume(output, od, "rw")

        if not os.path.exists(self.logfile):
            with open(self.logfile, "w") as std:
                pass
        cont.add_volume(
            self.logfile, "{0:s}/logfile".format(self.log_dir), "rw")
        cont.add_environ('LOGFILE',  "{0:}/logfile".format(self.log_dir))
        self.log.debug(
            'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(output, od))

        cont.image = '{0}_{1}'.format(build_label, image)
        # Added and ready for execution
        self.job = cont

        return 0
Ejemplo n.º 2
0
    def singularity_job(self,
                        image,
                        config,
                        singularity_image_dir,
                        input=None,
                        output=None,
                        msdir=None,
                        **kw):
        """
            Run task in singularity

        image   :   stimela cab name, e.g. 'cab/simms'
        name    :   This name will be part of the name of the contaier that will 
                    execute the task (now optional)
        config  :   Dictionary of options to parse to the task. This will modify 
                    the parameters in the default parameter file which 
                    can be viewd by running 'stimela cabs -i <cab name>', e.g 'stimela cabs -i simms'
        input   :   input dirctory for cab
        output  :   output directory for cab
        msdir   :   MS directory for cab. Only specify if different from recipe ms_dir


        """

        # check if name has any offending charecters
        offenders = re.findall('\W', self.name)
        if offenders:
            raise StimelaCabParameterError(
                'The cab name \'{:s}\' has some non-alphanumeric characters.'
                ' Charecters making up this name must be in [a-z,A-Z,0-9,_]'.
                format(self.name))

        # Update I/O with values specified on command line
        # TODO (sphe) I think this feature should be removed
        script_context = self.recipe.stimela_context
        input = script_context.get('_STIMELA_INPUT', None) or input
        output = script_context.get('_STIMELA_OUTPUT', None) or output
        msdir = script_context.get('_STIMELA_MSDIR', None) or msdir

        # Get location of template parameters file
        cabpath = self.recipe.stimela_path + \
            "/cargo/cab/{0:s}/".format(image.split("/")[1]) if not self.cabpath else \
                os.path.join(self.cabpath, image.split("/")[1])
        parameter_file = cabpath + '/parameters.json'

        name = '{0}-{1}{2}'.format(self.name, id(image),
                                   str(time.time()).replace('.', ''))

        _cab = cab.CabDefinition(indir=input,
                                 outdir=output,
                                 msdir=msdir,
                                 parameter_file=parameter_file)

        cab.IODEST = CONT_IO["singularity"]

        self.setup_job_log()
        cont = singularity.Container(image,
                                     name,
                                     logger=self.log,
                                     time_out=self.time_out)

        # Container parameter file will be updated and validated before the container is executed
        cont._cab = _cab
        cont.parameter_file_name = '{0}/{1}.json'.format(
            self.recipe.parameter_file_dir, name)

        # Remove dismissable kw arguments:
        ops_to_pop = []
        for op in config:
            if isinstance(config[op], dismissable):
                ops_to_pop.append(op)
        for op in ops_to_pop:
            arg = config.pop(op)()
            if arg is not None:
                config[op] = arg
        cont.config = config

        # These are standard volumes and
        # environmental variables. These will be
        # always exist in a cab container
        cont.add_volume(self.recipe.stimela_path,
                        '/scratch/stimela',
                        perm='ro')
        cont.add_volume(cont.parameter_file_name,
                        '/scratch/configfile',
                        perm='ro',
                        noverify=True)
        cont.add_volume(
            "{0:s}/{1:s}/src/".format(
                self.cabpath
                or "{0:s}/cargo/cab".format(self.recipe.stimela_path),
                _cab.task), "/scratch/code", "ro")
        cont.add_volume(os.path.join(BIN, "stimela_singularity_run"),
                        "/singularity")

        if msdir:
            md = cab.IODEST["msfile"]
            cont.add_volume(msdir, md)
            # Keep a record of the content of the
            # volume
            dirname, dirs, files = [a for a in next(os.walk(msdir))]
            cont.msdir_content = {
                "volume": dirname,
                "dirs": dirs,
                "files": files,
            }

            self.log.debug(
                'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'
                .format(msdir, md))

        if input:
            cont.add_volume(input, cab.IODEST["input"], perm='ro')
            # Keep a record of the content of the
            # volume
            dirname, dirs, files = [a for a in next(os.walk(input))]
            cont.input_content = {
                "volume": dirname,
                "dirs": dirs,
                "files": files,
            }

            self.log.debug(
                'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'
                .format(input, cab.IODEST["input"]))

        if not os.path.exists(output):
            os.mkdir(output)

        od = cab.IODEST["output"]

        self.log_dir = os.path.abspath(self.log_dir or output)
        cont.logfile = self.tmp_logfile
        cont.add_volume(cont.logfile, "/scratch/logfile", "rw")
        cont.add_volume(output, od, "rw")

        # temp files go into output
        tmpfol = os.path.join(output, "tmp")
        if not os.path.exists(tmpfol):
            os.mkdir(tmpfol)
        cont.add_volume(tmpfol, cab.IODEST["tmp"], "rw")

        self.log.debug(
            'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'
            .format(output, od))

        simage = _cab.base.replace("/", "_")
        cont.image = '{0:s}/{1:s}_{2:s}.img'.format(singularity_image_dir,
                                                    simage, _cab.tag)
        # Added and ready for execution
        self.job = cont

        return 0
Ejemplo n.º 3
0
    def udocker_job(self, image, config,
                    input=None, output=None, msdir=None,
                    **kw):
        """
            Run task using udocker

        image   :   stimela cab name, e.g. 'cab/simms'
        name    :   This name will be part of the name of the contaier that will 
                    execute the task (now optional)
        config  :   Dictionary of options to parse to the task. This will modify 
                    the parameters in the default parameter file which 
                    can be viewd by running 'stimela cabs -i <cab name>', e.g 'stimela cabs -i simms'
        input   :   input dirctory for cab
        output  :   output directory for cab
        msdir   :   MS directory for cab. Only specify if different from recipe ms_dir


        """

        # check if name has any offending charecters
        offenders = re.findall('\W', self.name)
        if offenders:
            raise StimelaCabParameterError('The cab name \'{:s}\' has some non-alphanumeric characters.'
                                           ' Charecters making up this name must be in [a-z,A-Z,0-9,_]'.format(self.name))

        # Update I/O with values specified on command line
        # TODO (sphe) I think this feature should be removed
        script_context = self.recipe.stimela_context
        input = script_context.get('_STIMELA_INPUT', None) or input
        output = script_context.get('_STIMELA_OUTPUT', None) or output
        msdir = script_context.get('_STIMELA_MSDIR', None) or msdir

        # Get location of template parameters file
        cabpath = self.recipe.stimela_path + \
            "/cargo/cab/{0:s}/".format(image.split("/")[1])
        parameter_file = cabpath+'/parameters.json'

        name = '{0}-{1}{2}'.format(self.name, id(image),
                                   str(time.time()).replace('.', ''))

        _cab = cab.CabDefinition(indir=input, outdir=output,
                                 msdir=msdir, parameter_file=parameter_file)

        cab.IODEST = CONT_IO["udocker"]

        cont = udocker.Container(image, name,
                                 logger=self.log, time_out=self.time_out)

        cont.add_volume(
            "{0:s}/cargo/cab/docker_run".format(self.recipe.stimela_path), "/udocker_run")
        cont.COMMAND = "/bin/sh -c /udocker_run"

        # Container parameter file will be updated and validated before the container is executed
        cont._cab = _cab
        cont.parameter_file_name = '{0}/{1}.json'.format(
            self.recipe.parameter_file_dir, name)

        # Remove dismissable kw arguments:
        ops_to_pop = []
        for op in config:
            if isinstance(config[op], dismissable):
                ops_to_pop.append(op)
        for op in ops_to_pop:
            arg = config.pop(op)()
            if arg is not None:
                config[op] = arg
        cont.config = config

        # These are standard volumes and
        # environmental variables. These will be
        # always exist in a cab container
        cont.add_volume(self.recipe.stimela_path, '/scratch/stimela')
        cont.add_volume(cont.parameter_file_name,
                        '/scratch/configfile', noverify=True)
        cont.add_volume("{0:s}/cargo/cab/{1:s}/src/".format(
            self.recipe.stimela_path, _cab.task), "/scratch/code")

        cont.add_environ('CONFIG', '/scratch/configfile'.format(name))

        if msdir:
            md = cab.IODEST["msfile"]
            cont.add_volume(msdir, md)
            cont.add_environ("MSDIR", md)
            # Keep a record of the content of the
            # volume
            dirname, dirs, files = [a for a in next(os.walk(msdir))]
            cont.msdir_content = {
                "volume":   dirname,
                "dirs":   dirs,
                "files":   files,
            }

            self.log.debug(
                'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(msdir, md))

        if input:
            cont.add_volume(input, cab.IODEST["input"])
            cont.add_environ("INPUT", cab.IODEST["input"])
            # Keep a record of the content of the
            # volume
            dirname, dirs, files = [a for a in next(os.walk(input))]
            cont.input_content = {
                "volume":   dirname,
                "dirs":   dirs,
                "files":   files,
            }

            self.log.debug('Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(
                input, cab.IODEST["input"]))

        if not os.path.exists(output):
            os.mkdir(output)

        od = cab.IODEST["output"]
        cont.WORKDIR = od

        self.log_dir = os.path.abspath(self.log_dir or output)
        log_dir_name = os.path.basename(self.log_dir or output)
        logfile_name = 'log-{0:s}.txt'.format(name.split('-')[0])
        self.logfile = cont.logfile = '{0:s}/{1:s}'.format(
            self.log_dir, logfile_name)

        if not os.path.exists(self.logfile):
            with open(self.logfile, 'w') as std:
                pass
        cont.add_environ("LOGFILE", "/scratch/logfile")
        cont.add_volume(self.logfile, "/scratch/logfile")
        cont.add_volume(output, od)
        cont.add_environ("OUTPUT", od)
        self.log.debug(
            'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(output, od))

        if hasattr(cont._cab, "use_graphics") and cont._cab.use_graphics:
            cont.use_graphics = True
        cont.image = '{0:s}:{1:s}'.format(_cab.base, _cab.tag)
        # Added and ready for execution
        self.job = cont

        return 0
Ejemplo n.º 4
0
    def setup_job(self, image, config,
                   indir=None, outdir=None, msdir=None, 
                   build_label=None, singularity_image_dir=None,
                   **kw):
        """
            Setup job

        image   :   stimela cab name, e.g. 'cab/simms'
        name    :   This name will be part of the name of the contaier that will 
                    execute the task (now optional)
        config  :   Dictionary of options to parse to the task. This will modify 
                    the parameters in the default parameter file which 
                    can be viewd by running 'stimela cabs -i <cab name>', e.g 'stimela cabs -i simms'
        indir   :   input dirctory for cab
        outdir  :   output directory for cab
        msdir   :   MS directory for cab. Only specify if different from recipe ms_dir

        function    :   Python callable to execute
        name        :   Name of function (if not given, will used function.__name__)
        parameters  :   Parameters to parse to function
        label       :   Function label; for logging purposes
        """
        
        if self.jtype == "python":
            self.image = image.__name__
            if not callable(image):
                raise utils.StimelaCabRuntimeError(
                    'Object given as function is not callable')

            if self.name is None:
                self.name = image.__name__

            self.job = {
                'function':   image,
                'parameters':   config,
            }

            return 0

        # check if name has any offending characters
        offenders = re.findall('[^\w .-]', self.name)
        if offenders:
            raise StimelaCabParameterError('The cab name \'{:s}\' contains invalid characters.'
                                           ' Allowed charcaters are alphanumeric, plus [-_. ].'.format(self.name))

        # Update I/O with values specified on command line
        script_context = self.recipe.stimela_context
        indir = script_context.get('_STIMELA_INPUT', None) or indir
        outdir = script_context.get('_STIMELA_OUTPUT', None) or outdir
        msdir = script_context.get('_STIMELA_MSDIR', None) or msdir
        build_label = script_context.get(
            '_STIMELA_BUILD_LABEL', None) or build_label

        self.setup_job_log()

        # make name palatable as container name
        pausterized_name = re.sub("[\W]", "_", self.name)

        name = '{0}-{1}{2}'.format(pausterized_name, id(image),
                                   str(time.time()).replace('.', ''))

        cont = getattr(CONT_MOD[self.jtype], "Container")(image, name,
                                     logger=self.log, 
                                     workdir=WORKDIR,
                                     time_out=self.time_out)
        if self.jtype == "docker":
            # Get location of template parameters file
            cabs_loc = f"{stimela.LOG_HOME}/{build_label}_stimela_logfile.json"
            cabs_logger = get_cabs(cabs_loc)
            try:
                cabpath = cabs_logger[f'{build_label}_{cont.image}']['DIR']
            except KeyError:
                main.build(["--us-only", cont.image.split("/")[-1],
                        "--no-cache", "--build-label", build_label])
                cabs_logger = get_cabs(cabs_loc)
                cabpath = cabs_logger[f'{build_label}_{cont.image}']['DIR']

        else:
            cabpath = os.path.join(CAB_PATH, image.split("/")[1])
        
        # In case the user specified a custom cab
        cabpath = os.path.join(self.cabpath, image.split("/")[1]) if self.cabpath else cabpath
        parameter_file = os.path.join(cabpath, 'parameters.json')
        _cab = cab.CabDefinition(indir=indir, outdir=outdir,
                                 msdir=msdir, parameter_file=parameter_file)
        cont.IODEST = CONT_IO
        cont.cabname = _cab.task

        if self.jtype == "docker":
            cont.image = '{0}_{1}'.format(build_label, image)
        elif self.jtype == "singularity":
            simage = _cab.base.replace("/", "_")
            if singularity_image_dir is None:
                singularity_image_dir = os.path.join(".", "stimela_singularity_images")
            cont.image = '{0:s}/{1:s}_{2:s}{3:s}'.format(singularity_image_dir,
                    simage, _cab.tag, singularity.suffix)
            if not os.path.exists(cont.image):
                main.pull(f"-s -cb {cont.cabname} -pf {singularity_image_dir}".split())
        else:
            cont.image = ":".join([_cab.base, _cab.tag])

        # Container parameter file will be updated and validated before the container is executed
        cont._cab = _cab
        cont.parameter_file_name = '{0}/{1}.json'.format(
            self.recipe.parameter_file_dir, name)

        self.image = str(cont.image)

        # Remove dismissable kw arguments:
        ops_to_pop = []
        for op in config:
            if isinstance(config[op], dismissable):
                ops_to_pop.append(op)
        for op in ops_to_pop:
            arg = config.pop(op)()
            if arg is not None:
                config[op] = arg
        cont.config = config

        # These are standard volumes and
        # environmental variables. These will be
        # always exist in a cab container
        cont.add_volume(self.recipe.stimela_path,
                        '/scratch/stimela', perm='ro')
        cont.add_volume(cont.parameter_file_name,
                        '/scratch/configfile', perm='ro', noverify=True)
        cont.add_volume(os.path.join(cabpath, "src"), "/scratch/code", "ro")



        if self.jtype == "singularity":
            cont.RUNSCRIPT = f"/{self.jtype}"
        else:
            cont.RUNSCRIPT = f"/{self.jtype}_run"

        cont.add_volume(f"{BIN}/stimela_runscript", 
                cont.RUNSCRIPT, perm="ro")

        cont.add_environ('CONFIG', '/scratch/configfile')
        cont.add_environ('HOME', WORKDIR)

        if msdir:
            md = cont.IODEST["msfile"]
            cont.add_volume(msdir, md)
            cont.add_environ("MSDIR", md)
            # Keep a record of the content of the
            # volume
            dirname, dirs, files = [a for a in next(os.walk(msdir))]
            cont.msdir_content = {
                "volume":   dirname,
                "dirs":   dirs,
                "files":   files,
            }

            self.log.debug(
                'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(msdir, md))

        if indir:
            cont.add_volume(indir, cont.IODEST["input"], perm='ro')
            cont.add_environ("INPUT", cont.IODEST["input"])
            # Keep a record of the content of the
            # volume
            dirname, dirs, files = [a for a in next(os.walk(indir))]
            cont.input_content = {
                "volume":   dirname,
                "dirs":   dirs,
                "files":   files,
            }

            self.log.debug('Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(
                indir, cont.IODEST["input"]))

        if not os.path.exists(outdir):
            os.mkdir(outdir)

        od = cont.IODEST["output"]

        cont.logfile = self.logfile
        cont.add_volume(outdir, od, "rw")
        cont.add_environ("OUTPUT", od)

        # temp files go into output
        tmpfol = os.path.join(outdir, "tmp")
        if not os.path.exists(tmpfol):
            os.mkdir(tmpfol)
        cont.add_volume(tmpfol, cont.IODEST["tmp"], "rw")
        cont.add_environ("TMPDIR", cont.IODEST["tmp"])

        self.log.debug(
            'Mounting volume \'{0}\' from local file system to \'{1}\' in the container'.format(outdir, od))

        # Added and ready for execution
        self.job = cont

        return 0