Exemple #1
0
def main():
    parse_args(__doc__, options_first=True)
    for name, values in iteritems(jip.config['profiles']):
        print("Profile:", colorize(name, BLUE))
        print("Description:", values.get('description', ""))
        rows = [(k, v) for k, v in iteritems(values) if k != 'description']
        print(render_table(("Name", "Value"), rows))
        print("")
Exemple #2
0
 def from_dict(cls, data):
     """Load a profile from a dictionary"""
     profile = cls()
     # apply all the params
     for k, v in iteritems(data):
         if k != 'jobs':
             profile.__setattr__(k, v)
     if "jobs" in data:
         for name, spec in iteritems(data["jobs"]):
             profile.specs[name] = cls.from_dict(spec)
     return profile
Exemple #3
0
def _update(config, other):
    for k, v in iteritems(other):
        if isinstance(v, collections.Mapping):
            r = _update(config.get(k, {}), v)
            config[k] = r
        else:
            config[k] = other[k]
    return config
Exemple #4
0
 def _handle_exit(self, *args):
     """The EXIT handler"""
     self.log.info("Master | EXIT request, shutting down")
     for job_id, p in iteritems(self.running):
         self.log.warn("Master | Terminating %s", job_id)
         p.process.terminate()
         p.process.join()
     return False
Exemple #5
0
 def _load_job_env(self):
     """Load the job environment"""
     env = self.env
     if env is not None:
         for k, v in iteritems(env):
             os.environ[k] = str(v)
     os.environ["JIP_ID"] = str(self.id) if self.id is not None else ""
     os.environ["JIP_JOB"] = str(self.job_id) if self.job_id else ""
     os.environ["JIP_THREADS"] = str(self.threads) if self.threads else "1"
Exemple #6
0
def _update(config, other):
    """Recursively update the given config dict with the other dict"""
    for k, v in iteritems(other):
        if isinstance(v, collections.Mapping):
            r = _update(config.get(k, {}), v)
            config[k] = r
        else:
            config[k] = other[k]
    return config
Exemple #7
0
def main():
    log.debug("job execution python path: %s", sys.path)
    args = parse_args(__doc__, options_first=True)
    try:
        log.info("Starting job with id %s stored in %s", args['<id>'],
                 args['--db'])
        jip.db.init(path=args['--db'])
        job = jip.db.get(args['<id>'])
        if not job:
            log.error("Requested job with id %s not found!", args['<id>'])
            sys.exit(1)
        if job.state != jip.db.STATE_QUEUED:
            log.warn("Job does not come from queued state! Stoping execution")
            sys.exit(0)
        # for LSF implementation, I could only test on openlava, and
        # that does not seem to support the -cwd option to switch the
        # working directory. To work around this, and be sure about the
        # working directory, we switch here
        if job.working_directory and len(job.working_directory) > 0:
            log.debug("Switching working directory to: %s",
                      job.working_directory)
            os.chdir(job.working_directory)
        # load job environment
        env = job.env
        if env is not None:
            for k, v in iteritems(env):
                log.info("Loading job environment %s:%s", k, v)
                os.environ[k] = str(v)

        # load the tool here to have it cached just in case
        # there is a problem at least on PBS where the tool
        # can not be loaded after the signal (which I still don't understand)
        try:
            tool = job.tool
            log.debug("Loaded tool: %s", tool)
        except:
            log.warn("unable to load tool. Failure cleanup might fail!")

        #check profiling
        profiler = os.getenv("JIP_PROFILER", job.env.get("JIP_PROFILER",
                                                         None)) is not None
        jip.jobs.run_job(job,
                         profiler=profiler,
                         save=True,
                         submit_embedded=True)
    except Exception as e:
        log.error("Error executing job %s: %s",
                  args['<id>'],
                  str(e),
                  exc_info=True)
        sys.exit(1)
Exemple #8
0
 def __call__(self,
              name=None,
              threads=None,
              nodes=None,
              tasks=None,
              tasks_per_node=None,
              environment=None,
              time=None,
              queue=None,
              priority=None,
              log=None,
              out=None,
              err=None,
              account=None,
              mem=None,
              profile=None,
              prefix=None,
              temp=None,
              extra=None,
              dir=None,
              description=None,
              env=None):
     clone = self.__class__(
         name=name if name is not None else self._name,
         threads=threads if threads is not None else self.threads,
         tasks=tasks if tasks is not None else self.tasks,
         tasks_per_node=tasks_per_node
         if tasks_per_node is not None else self.tasks_per_node,
         environment=environment
         if environment is not None else self.environment,
         env=env if env is not None else self.env,
         nodes=nodes if nodes is not None else self.nodes,
         profile=profile if profile is not None else self.profile,
         queue=queue if queue is not None else self.queue,
         time=time if time is not None else self.time,
         priority=priority if priority is not None else self.priority,
         log=log if log is not None else
         (err if err is not None else self.log),
         out=out if out is not None else self.out,
         account=account if account is not None else self.account,
         mem=mem if mem is not None else self.mem,
         prefix=prefix if prefix is not None else self.prefix,
         temp=temp if temp is not None else self.temp,
         extra=extra if extra is not None else self.extra,
         working_dir=dir if dir is not None else self.working_dir,
         description=description
         if description is not None else self.description,
         _load=False)
     for name, spec in iteritems(self.specs):
         clone.specs[name] = spec()
     return clone
Exemple #9
0
def load(content, script_class=None, is_pipeline=False):
    lines = content.split("\n")
    if not is_pipeline:
        if len(lines[0]) > 0:
            if re.match(r'^#!/usr/bin/env.*jip.*(-p|--pipeline).*$', lines[0]):
                is_pipeline = True
    header, content = split_header(lines)
    lineno = len(header) + 1

    blocks = parse_blocks(content, lineno)
    command_block = None
    validate_block = None
    pipeline_block = None
    setup_block = None
    init_block = None
    if sum([len(b) for b in blocks.values()]) == 0:
        raise Exception("No blocks found!")
    for block_type, blocks in iteritems(blocks):
        if len(blocks) > 1:
            raise Exception("Multiple blocks of type %s currently "
                            "not supported" % (block_type))
        if len(blocks) == 1:
            if block_type == COMMAND_BLOCK:
                command_block = blocks[0]
            elif block_type == VALIDATE_BLOCK:
                validate_block = blocks[0]
            elif block_type == PIPELINE_BLOCK:
                pipeline_block = blocks[0]
            elif block_type == SETUP_BLOCK:
                setup_block = blocks[0]
            elif block_type == INIT_BLOCK:
                init_block = blocks[0]

    docstring = _create_docstring(header)

    if script_class is None:
        script_class = ScriptTool
    if is_pipeline:
        pipeline_block = command_block
        pipeline_block.interpreter = "python"
        command_block = None
    return script_class(docstring=docstring,
                        setup_block=setup_block,
                        init_block=init_block,
                        command_block=command_block,
                        validation_block=validate_block,
                        pipeline_block=pipeline_block)
Exemple #10
0
 def load_args(self, args):
     """Update this profile from the given dictionary of command line
     arguments. The argument names must match the profile attributes
     """
     for k, v in iteritems(args):
         k = re.sub("^-+", "", k)
         k = re.sub("-", "_", k)
         if v and hasattr(self, k):
             # check for multiple values
             for single in v.split(" "):
                 tup = single.split("=")
                 if len(tup) == 1:
                     setattr(self, k, single)
                 else:
                     # find or create a spec for the given key
                     spec_profile = self.specs.get(tup[0], Profile())
                     setattr(spec_profile, k, tup[1])
                     self.specs[tup[0]] = spec_profile
Exemple #11
0
def render_template(template, **kwargs):
    """Render a template using the given keyword arguments as context

    :param template: the template string
    :type template: string
    :param kwargs: the context
    """
    if template is None or not isinstance(template, string_types):
        return template
    tmpl = _get_environment().from_string(template)
    ctx = dict(kwargs)
    if global_context is not None:
        for k, v in iteritems(global_context):
            if not k in ctx:
                ctx[k] = v
    # expose the global context
    ctx['_ctx'] = global_context
    if 'self' in ctx:
        del ctx['self']
    try:
        return tmpl.render(**ctx)
    except TemplateSyntaxError as err:
        raise RenderError(template, err.message, line=err.lineno)
Exemple #12
0
    def apply_to_node(self, node):
        # check if there is a matching spec for the node
        node_profile = self.specs.get(node.name, None)
        if not node_profile:
            node_profile = self.specs.get(node._name, None)
        # check via regexp
        for spec_name, spec in iteritems(self.specs):
            if fnmatch.fnmatch(node.name, spec_name):
                #if re.match(spec_name, node.name):
                if not node_profile:
                    node_profile = spec()
                else:
                    node_profile.update(spec)

        if node_profile:
            node._job.update(node_profile)
            if node._pipeline_profile:
                node._pipeline_profile.update(node_profile)

        # apply global profile, don't overwrite
        node._job.update(self, overwrite=False)
        if node._pipeline_profile:
            node._pipeline_profile.update(self, overwrite=False)
Exemple #13
0
def main(argv=None):
    args = parse_args(__doc__, argv=argv)
    script_file = args["<tool>"]
    script_args = args["<args>"]
    try:
        script = jip.find(script_file)
    except LookupError as e:
        print(str(e), file=sys.stderr)
        sys.exit(1)

    # disable required checks
    jip.options._check_required = False
    profile = jip.profiles.get(name='default')
    jobs = jip.jobs.create_jobs(script, args=script_args, profile=profile)
    specs = {}
    default_env = os.environ
    env_exckludes = [
        "JIP_MODULES", "JIP_LOGLEVEL", "JIP_PATH", "JIP_DB_LOGLEVEL"
    ]
    for j in jobs:
        job_env = {}
        for k, v in iteritems(j.env):
            if not k in env_exckludes and v and v != default_env.get(k, None):
                job_env[k] = v
        spec = sorted_dict({
            "threads": j.threads,
            "mem": j.max_memory,
            "queue": j.queue,
            "priority": j.priority,
            "time": j.max_time,
            "account": j.account,
            "extra": j.extra,
            "env": job_env
        })
        specs[j.name] = spec

    print(json.dumps({"jobs": specs}, indent=4, sort_keys=False))
Exemple #14
0
def create_dispatcher_graph(job, _nodes=None):
    """Create a dispatcher graph for a given job. If the job does not
    have any pipe targets, a list with a single dispatcher node is returned,
    otherwise the dispatching graph is created from all the pipe target job.

    :param job: the job
    :type: `jip.db.Job`
    :returns: list of dispatcher nodes
    :rtype: list of `jip.executils.DispatcherNode` instances
    """
    # collect all jobs that are part
    # of this graph
    if len(job.pipe_to) == 0 and _nodes is None:
        return [DispatcherNode(job)]

    # do not operate on jobs that take pipes as long as this
    # is not a recursive call, in which case the _nodes dict
    # will be initialized
    if len(job.pipe_from) > 0 and _nodes is None:
        return []

    # _initialized marks the recursion start
    _initialized = False
    if _nodes is None:
        _initialized = True
        _nodes = {}

    # check if there is a node for the jobs
    node = _nodes.get(job, None)
    if node is not None:
        # node exists, skip it
        return None
    # search for a new with the same target
    for n in itervalues(_nodes):
        if set(job.pipe_to) == n.targets:
            node = n
            break
    else:
        # create a new node
        node = DispatcherNode()

    _nodes[job] = node
    node.sources.add(job)

    # add the target
    for pipe_to in job.pipe_to:
        node.targets.add(pipe_to)

    # recursive call
    for pipe_to in job.pipe_to:
        create_dispatcher_graph(pipe_to, _nodes)

    if _initialized:
        # I am the first iteration
        # and we create edges between the nodes based on source/target
        for k, node in iteritems(_nodes):
            for target in node.targets:
                for k, other in iteritems(_nodes):
                    if target in other.sources:
                        other.depends_on.append(node)
                        node.children.append(other)
        return _sort_dispatcher_nodes(set(itervalues(_nodes)))
    return None
Exemple #15
0
def run_job(job,
            save=False,
            profiler=False,
            submit_embedded=False,
            closeDB=False):
    """Execute the given job. This method returns immediately in case the
    job has a pipe source. Otherwise the job and all its dispatch jobs are
    executed.

    NOTE that the run method creates a signal handler that sets the given
    job state to failed in case the jobs process is terminated by a signal.

    :param job: the job to run. Note the jobs with pipe sources are ignored
    :type job: `jip.db.Job`
    :param save: if True the jobs state changes are persisted in the database
    :param profiler: if set to True, job profiling is enabled
    :param submit_embedded: if True, embedded pipelines will be submitted and
                            not executed directly
    :returns: True if the job was executed successfully
    :rtype: boolean
    """
    if len(job.pipe_from) > 0:
        return
    # setup signal handling
    _setup_signal_handler(job, save=save)

    # create the dispatcher graph
    dispatcher_nodes = jip.executils.create_dispatcher_graph(job)
    log.info("%s | Dispatch graph: %s", job, dispatcher_nodes)
    # load job environment
    env = job.env
    if env is not None:
        for k, v in iteritems(env):
            log.info("Loading job environment %s:%s", k, v)
            os.environ[k] = str(v)

    # Issue #37
    # make sure working directories exist at submission time
    if not os.path.exists(job.working_directory):
        os.makedirs(job.working_directory)
    for child in job.pipe_to:
        if not os.path.exists(child.working_directory):
            os.makedirs(child.working_directory)

    # Execute the commands
    for dispatcher_node in dispatcher_nodes:
        dispatcher_node.run(profiler=profiler)

    all_jobs = get_group_jobs(job)
    if save:
        # save the update job state
        db.update_job_states(all_jobs)

    success = True

    # Close the DB connection for the execution of the commands,
    # the job object gets the detached state
    session = jip.db.create_session()
    jip.db.commit_session(session)
    session.close()

    # we collect the state of all jobs in the dispatcher first
    # a single failure will cause ALL nodes/jobs in that dispatcher
    # to be marked as failed
    for dispatcher_node in reversed(dispatcher_nodes):
        success &= dispatcher_node.wait()

    # The commands finished their execution, re-attach the job object
    session = jip.db.create_session()
    session.add(job)

    # get the new state and update all jobs
    new_state = db.STATE_DONE if success else db.STATE_FAILED
    for dispatcher_node in reversed(dispatcher_nodes):
        for job in dispatcher_node.sources:
            jip.jobs.set_state(job, new_state, update_children=False)

    if save:
        # save the update job state at the end of the run
        db.update_job_states(all_jobs)

    # handle embedded pipelines and callables
    if job.on_success and success:
        for element in job.on_success:
            if isinstance(element, jip.pipelines.Pipeline):
                ## run or submit embedded pipeline
                # Create a base profile for the embedded job
                # that is based on the current jobs profile
                profile = jip.profiles.Profile.from_job(job)
                # glob the inputs
                for n in element.nodes():
                    n._tool.options.glob_inputs()
                # TODO: handle the other paramters (i.e. profile, keep)
                # TODO: catch exception and make the job fail
                jobs = create_jobs(element, profile=profile)
                # add dependency to this job
                for j in jobs:
                    j.dependencies.append(job)
                for exe in create_executions(jobs, save=submit_embedded):
                    if not submit_embedded:
                        success &= run_job(exe.job, save=save)
                    else:
                        submit_job(exe.job)
    return success
Exemple #16
0
def parse_time(time):
    """Parse time string and returns time in minutes.

    The string can be either a number, which is the time in
    minutes, or of the form::

        <int>d<int>h<int>m<int>s

    where any part can be left out, but the order matters.

    Examples:

        30:
            returns 30 minutes
        1h:
            returns 60 minutes
        2h30m:
            return 150 minutes

    In addition, you can use a colon separated format that is either:

        HH:MM

        or

        HH:MM:SS

    :param time: time string
    :type time: string
    :returns: time in minutes
    :rtype: integer
    :raises: ValueError in case the time could not be parsed
    """
    try:
        # just minutes
        return int(time)
    except:
        pass
    import re
    from datetime import timedelta
    # check for 00:00:00 format where
    # 00:00 is hh:mm
    # 00:00:00 is hh:mm:ss
    if ':' in time:
        s = time.split(':')
        hours = int(s[0])
        minutes = int(s[1])
        parts = {
            'hours': hours,
            'minutes': minutes,
        }
        if len(s) > 2:
            parts['seconds'] = int(s[2])
    else:
        regex = re.compile(r'((?P<days>\d+?)d)?((?P<hours>\d+?)h)'
                           '?((?P<minutes>\d+?)m)?((?P<seconds>\d+)s)?')
        parts = regex.match(time)
        if not parts:
            raise ValueError("Unable to parse time format %s" % time)
        parts = parts.groupdict()
    time_params = {}
    for name, param in iteritems(parts):
        if param:
            time_params[name] = int(param)
    delta = timedelta(**time_params)

    seconds = delta.seconds
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    if (seconds % 60) > 0:
        minutes += 1
    r = (delta.days * 1440) + (60 * hours) + minutes
    return r
Exemple #17
0
    def apply(self, job, pipeline=False, overwrite=False):
        """Apply this profile to the given job."""
        log.debug("Profiles | Applying job profile to %s", job)
        if overwrite:
            self.apply_overwrite(job)
            return

        # set the job name or the pipeline name
        # if this is a job or a pipeline
        if not pipeline:
            job.name = self._render_job_name(job)
        elif self.name is not None:
            log.info("Apply pipeline name to job: %s %s", job, self.name)
            job.pipeline = self._render(job, self.name)

        if self.threads and job.threads is None:
            job.threads = int(self.threads)
        if self.nodes is not None and job.nodes is None:
            job.nodes = self.nodes
        if self.tasks is not None and job.tasks is None:
            job.tasks = self.tasks
        if self.tasks_per_node is not None and job.tasts_per_node is None:
            job.tasks_per_node = self.tasks_per_node
        if self.environment is not None and job.environment is None:
            job.environment = self.environment
        if self.queue is not None and job.queue is None:
            job.queue = self.queue
        if self.priority is not None and job.priority is None:
            job.priority = self.priority
        if self.time is not None and job.max_time is None:
            job.max_time = jip.utils.parse_time(self.time)
        if self.mem is not None:
            if job.max_memory is None:
                job.max_memory = 0
            job.max_memory += jip.utils.parse_mem(self.mem)
        if self.log is not None and job.stderr is None:
            job.stderr = self._render(job, self.log)
        if self.out is not None and job.stdout is None:
            job.stdout = self._render(job, self.out)
        if self.account is not None and job.account is None:
            job.account = self.account
        if self.temp is not None and job.temp is None:
            job.temp = self.temp
        if self.extra is not None and job.extra is None:
            job.extra = self.extra
        if self.working_dir is not None and job.working_directory is None:
            job.working_directory = os.path.abspath(self.working_dir)

        # make log files absolute
        if job.stdout and not job.stdout.startswith("/"):
            job.stdout = os.path.join(job.working_directory, job.stdout)
        if job.stderr and not job.stderr.startswith("/"):
            job.stderr = os.path.join(job.working_directory, job.stderr)

        # load environment
        if self.env:
            current = os.environ.copy()
            if job.env:
                current.update(job.env)
            rendered = {}
            for k, v in iteritems(self.env):
                rendered[k] = render_template(v, **current)
            job.env.update(rendered)

        if hasattr(job, 'pipe_to'):
            for child in job.pipe_to:
                self.apply(child)
Exemple #18
0
    def apply_overwrite(self, job):
        """Apply the profile and overwrite all settings that are set
        in this profile
        """
        log.debug("Profiles | Overwriting job profile to %s", job)

        if self.name:
            job.name = self._render_job_name(job)
        if self.threads:
            job.threads = int(self.threads)
        if self.nodes is not None:
            job.nodes = self.nodes
        if self.tasks is not None:
            job.tasks = self.tasks
        if self.tasks_per_node is not None:
            job.tasks_per_node = self.tasks_per_node
        if self.environment is not None:
            job.environment = self.environment
        if self.queue is not None:
            job.queue = self.queue
        if self.priority is not None:
            job.priority = self.priority
        if self.time is not None:
            job.max_time = jip.utils.parse_time(self.time)
        if self.mem is not None:
            job.max_memory = jip.utils.parse_mem(self.mem)
        if self.log is not None:
            job.stderr = self._render(job, self.log)
        if self.out is not None:
            job.stdout = self._render(job, self.out)
        if self.account is not None:
            job.account = self.account
        if self.temp is not None:
            job.temp = self.temp
        if self.extra is not None:
            job.extra = self.extra
        if self.working_dir is not None:
            job.working_directory = os.path.abspath(self.working_dir)

        # make log files absolute
        if job.stdout and not job.stdout.startswith("/"):
            job.stdout = os.path.join(job.working_directory, job.stdout)
        if job.stderr and not job.stderr.startswith("/"):
            job.stderr = os.path.join(job.working_directory, job.stderr)

        # load environment
        if self.env:
            current = os.environ.copy()
            if job.env:
                current.update(job.env)
            rendered = {}
            for k, v in iteritems(self.env):
                rendered[k] = render_template(v, **current)
            job.env.update(rendered)

        if hasattr(job, 'pipe_to'):
            for child in job.pipe_to:
                self.apply_overwrite(child)
        # check specs
        for spec_name, spec in iteritems(self.specs):
            if fnmatch.fnmatch(job.name, spec_name):
                spec.apply_overwrite(job)
Exemple #19
0
def main():
    args = parse_args(__doc__, options_first=True)

    print("Tools scripts")
    print("-------------")
    print("Please note that there might be more. Here, we search only for")
    print("files with the .jip extension!")
    print("")
    print("Search paths:")
    print("Current directory: %s" % getcwd())
    print("Jip configuration: %s" % jip.config.get("jip_path", ""))
    print("JIP_PATH variable: %s" % getenv("JIP_PATH", ""))
    print("")
    rows = []
    for name, path in iteritems(jip.scanner.scan_files()):
        rows.append((name, path))
    print(render_table(["Name", "Path"], rows))
    print("")

    print("Tools implemented in Python modules")
    print("-----------------------------------")
    print("The modules must be available in PYTHONPATH and must be specified")
    print("in the jip configuration or in the JIP_MODULES environment")
    print("variable. Please note that pipeline scripts that contain")
    print("python blocks are allowed to load modules that contain tool")
    print("implementation. These tools might not be found by this scan!")
    print("")
    print("Jip configuration: %s" % jip.config.get("jip_modules", ""))
    print("JIP_MODULES variable: %s" % getenv("JIP_MODULES", ""))
    print("")
    rows = []
    jip.scanner.scan_modules()
    for name, cls in iteritems(jip.scanner.registry):
        cls_help = cls.help()
        description = "-"
        if cls_help is not None:
            description = cls_help.split("\n")[0]
        if len(description) > 60:
            description = "%s ..." % description[:46]
        rows.append((name, description))
    print(render_table(["Tool", "Description"], rows))

    print("")
    print("All Tools detected")
    print("------------------")
    print("")
    covered = set([])
    rows = []
    for name, p in iteritems(jip.scanner.scan()):
        if name in covered:
            continue
        covered.add(name)
        cls = jip.find(name)
        cls_help = cls.help()
        description = "-"
        if cls_help is not None:
            description = cls_help.split("\n")[0]
        if len(description) > 60:
            description = "%s ..." % description[:46]
        rows.append((cls.name, description))
    print(render_table(["Tool", "Description"], rows))