def main(): parse_args(__doc__, options_first=True) for name, values in iteritems(jip.config['profiles']): print("Profile:", colorize(name, BLUE)) print("Description:", values.get('description', "")) rows = [(k, v) for k, v in iteritems(values) if k != 'description'] print(render_table(("Name", "Value"), rows)) print("")
def from_dict(cls, data): """Load a profile from a dictionary""" profile = cls() # apply all the params for k, v in iteritems(data): if k != 'jobs': profile.__setattr__(k, v) if "jobs" in data: for name, spec in iteritems(data["jobs"]): profile.specs[name] = cls.from_dict(spec) return profile
def _update(config, other): for k, v in iteritems(other): if isinstance(v, collections.Mapping): r = _update(config.get(k, {}), v) config[k] = r else: config[k] = other[k] return config
def _handle_exit(self, *args): """The EXIT handler""" self.log.info("Master | EXIT request, shutting down") for job_id, p in iteritems(self.running): self.log.warn("Master | Terminating %s", job_id) p.process.terminate() p.process.join() return False
def _load_job_env(self): """Load the job environment""" env = self.env if env is not None: for k, v in iteritems(env): os.environ[k] = str(v) os.environ["JIP_ID"] = str(self.id) if self.id is not None else "" os.environ["JIP_JOB"] = str(self.job_id) if self.job_id else "" os.environ["JIP_THREADS"] = str(self.threads) if self.threads else "1"
def _update(config, other): """Recursively update the given config dict with the other dict""" for k, v in iteritems(other): if isinstance(v, collections.Mapping): r = _update(config.get(k, {}), v) config[k] = r else: config[k] = other[k] return config
def main(): log.debug("job execution python path: %s", sys.path) args = parse_args(__doc__, options_first=True) try: log.info("Starting job with id %s stored in %s", args['<id>'], args['--db']) jip.db.init(path=args['--db']) job = jip.db.get(args['<id>']) if not job: log.error("Requested job with id %s not found!", args['<id>']) sys.exit(1) if job.state != jip.db.STATE_QUEUED: log.warn("Job does not come from queued state! Stoping execution") sys.exit(0) # for LSF implementation, I could only test on openlava, and # that does not seem to support the -cwd option to switch the # working directory. To work around this, and be sure about the # working directory, we switch here if job.working_directory and len(job.working_directory) > 0: log.debug("Switching working directory to: %s", job.working_directory) os.chdir(job.working_directory) # load job environment env = job.env if env is not None: for k, v in iteritems(env): log.info("Loading job environment %s:%s", k, v) os.environ[k] = str(v) # load the tool here to have it cached just in case # there is a problem at least on PBS where the tool # can not be loaded after the signal (which I still don't understand) try: tool = job.tool log.debug("Loaded tool: %s", tool) except: log.warn("unable to load tool. Failure cleanup might fail!") #check profiling profiler = os.getenv("JIP_PROFILER", job.env.get("JIP_PROFILER", None)) is not None jip.jobs.run_job(job, profiler=profiler, save=True, submit_embedded=True) except Exception as e: log.error("Error executing job %s: %s", args['<id>'], str(e), exc_info=True) sys.exit(1)
def __call__(self, name=None, threads=None, nodes=None, tasks=None, tasks_per_node=None, environment=None, time=None, queue=None, priority=None, log=None, out=None, err=None, account=None, mem=None, profile=None, prefix=None, temp=None, extra=None, dir=None, description=None, env=None): clone = self.__class__( name=name if name is not None else self._name, threads=threads if threads is not None else self.threads, tasks=tasks if tasks is not None else self.tasks, tasks_per_node=tasks_per_node if tasks_per_node is not None else self.tasks_per_node, environment=environment if environment is not None else self.environment, env=env if env is not None else self.env, nodes=nodes if nodes is not None else self.nodes, profile=profile if profile is not None else self.profile, queue=queue if queue is not None else self.queue, time=time if time is not None else self.time, priority=priority if priority is not None else self.priority, log=log if log is not None else (err if err is not None else self.log), out=out if out is not None else self.out, account=account if account is not None else self.account, mem=mem if mem is not None else self.mem, prefix=prefix if prefix is not None else self.prefix, temp=temp if temp is not None else self.temp, extra=extra if extra is not None else self.extra, working_dir=dir if dir is not None else self.working_dir, description=description if description is not None else self.description, _load=False) for name, spec in iteritems(self.specs): clone.specs[name] = spec() return clone
def load(content, script_class=None, is_pipeline=False): lines = content.split("\n") if not is_pipeline: if len(lines[0]) > 0: if re.match(r'^#!/usr/bin/env.*jip.*(-p|--pipeline).*$', lines[0]): is_pipeline = True header, content = split_header(lines) lineno = len(header) + 1 blocks = parse_blocks(content, lineno) command_block = None validate_block = None pipeline_block = None setup_block = None init_block = None if sum([len(b) for b in blocks.values()]) == 0: raise Exception("No blocks found!") for block_type, blocks in iteritems(blocks): if len(blocks) > 1: raise Exception("Multiple blocks of type %s currently " "not supported" % (block_type)) if len(blocks) == 1: if block_type == COMMAND_BLOCK: command_block = blocks[0] elif block_type == VALIDATE_BLOCK: validate_block = blocks[0] elif block_type == PIPELINE_BLOCK: pipeline_block = blocks[0] elif block_type == SETUP_BLOCK: setup_block = blocks[0] elif block_type == INIT_BLOCK: init_block = blocks[0] docstring = _create_docstring(header) if script_class is None: script_class = ScriptTool if is_pipeline: pipeline_block = command_block pipeline_block.interpreter = "python" command_block = None return script_class(docstring=docstring, setup_block=setup_block, init_block=init_block, command_block=command_block, validation_block=validate_block, pipeline_block=pipeline_block)
def load_args(self, args): """Update this profile from the given dictionary of command line arguments. The argument names must match the profile attributes """ for k, v in iteritems(args): k = re.sub("^-+", "", k) k = re.sub("-", "_", k) if v and hasattr(self, k): # check for multiple values for single in v.split(" "): tup = single.split("=") if len(tup) == 1: setattr(self, k, single) else: # find or create a spec for the given key spec_profile = self.specs.get(tup[0], Profile()) setattr(spec_profile, k, tup[1]) self.specs[tup[0]] = spec_profile
def render_template(template, **kwargs): """Render a template using the given keyword arguments as context :param template: the template string :type template: string :param kwargs: the context """ if template is None or not isinstance(template, string_types): return template tmpl = _get_environment().from_string(template) ctx = dict(kwargs) if global_context is not None: for k, v in iteritems(global_context): if not k in ctx: ctx[k] = v # expose the global context ctx['_ctx'] = global_context if 'self' in ctx: del ctx['self'] try: return tmpl.render(**ctx) except TemplateSyntaxError as err: raise RenderError(template, err.message, line=err.lineno)
def apply_to_node(self, node): # check if there is a matching spec for the node node_profile = self.specs.get(node.name, None) if not node_profile: node_profile = self.specs.get(node._name, None) # check via regexp for spec_name, spec in iteritems(self.specs): if fnmatch.fnmatch(node.name, spec_name): #if re.match(spec_name, node.name): if not node_profile: node_profile = spec() else: node_profile.update(spec) if node_profile: node._job.update(node_profile) if node._pipeline_profile: node._pipeline_profile.update(node_profile) # apply global profile, don't overwrite node._job.update(self, overwrite=False) if node._pipeline_profile: node._pipeline_profile.update(self, overwrite=False)
def main(argv=None): args = parse_args(__doc__, argv=argv) script_file = args["<tool>"] script_args = args["<args>"] try: script = jip.find(script_file) except LookupError as e: print(str(e), file=sys.stderr) sys.exit(1) # disable required checks jip.options._check_required = False profile = jip.profiles.get(name='default') jobs = jip.jobs.create_jobs(script, args=script_args, profile=profile) specs = {} default_env = os.environ env_exckludes = [ "JIP_MODULES", "JIP_LOGLEVEL", "JIP_PATH", "JIP_DB_LOGLEVEL" ] for j in jobs: job_env = {} for k, v in iteritems(j.env): if not k in env_exckludes and v and v != default_env.get(k, None): job_env[k] = v spec = sorted_dict({ "threads": j.threads, "mem": j.max_memory, "queue": j.queue, "priority": j.priority, "time": j.max_time, "account": j.account, "extra": j.extra, "env": job_env }) specs[j.name] = spec print(json.dumps({"jobs": specs}, indent=4, sort_keys=False))
def create_dispatcher_graph(job, _nodes=None): """Create a dispatcher graph for a given job. If the job does not have any pipe targets, a list with a single dispatcher node is returned, otherwise the dispatching graph is created from all the pipe target job. :param job: the job :type: `jip.db.Job` :returns: list of dispatcher nodes :rtype: list of `jip.executils.DispatcherNode` instances """ # collect all jobs that are part # of this graph if len(job.pipe_to) == 0 and _nodes is None: return [DispatcherNode(job)] # do not operate on jobs that take pipes as long as this # is not a recursive call, in which case the _nodes dict # will be initialized if len(job.pipe_from) > 0 and _nodes is None: return [] # _initialized marks the recursion start _initialized = False if _nodes is None: _initialized = True _nodes = {} # check if there is a node for the jobs node = _nodes.get(job, None) if node is not None: # node exists, skip it return None # search for a new with the same target for n in itervalues(_nodes): if set(job.pipe_to) == n.targets: node = n break else: # create a new node node = DispatcherNode() _nodes[job] = node node.sources.add(job) # add the target for pipe_to in job.pipe_to: node.targets.add(pipe_to) # recursive call for pipe_to in job.pipe_to: create_dispatcher_graph(pipe_to, _nodes) if _initialized: # I am the first iteration # and we create edges between the nodes based on source/target for k, node in iteritems(_nodes): for target in node.targets: for k, other in iteritems(_nodes): if target in other.sources: other.depends_on.append(node) node.children.append(other) return _sort_dispatcher_nodes(set(itervalues(_nodes))) return None
def run_job(job, save=False, profiler=False, submit_embedded=False, closeDB=False): """Execute the given job. This method returns immediately in case the job has a pipe source. Otherwise the job and all its dispatch jobs are executed. NOTE that the run method creates a signal handler that sets the given job state to failed in case the jobs process is terminated by a signal. :param job: the job to run. Note the jobs with pipe sources are ignored :type job: `jip.db.Job` :param save: if True the jobs state changes are persisted in the database :param profiler: if set to True, job profiling is enabled :param submit_embedded: if True, embedded pipelines will be submitted and not executed directly :returns: True if the job was executed successfully :rtype: boolean """ if len(job.pipe_from) > 0: return # setup signal handling _setup_signal_handler(job, save=save) # create the dispatcher graph dispatcher_nodes = jip.executils.create_dispatcher_graph(job) log.info("%s | Dispatch graph: %s", job, dispatcher_nodes) # load job environment env = job.env if env is not None: for k, v in iteritems(env): log.info("Loading job environment %s:%s", k, v) os.environ[k] = str(v) # Issue #37 # make sure working directories exist at submission time if not os.path.exists(job.working_directory): os.makedirs(job.working_directory) for child in job.pipe_to: if not os.path.exists(child.working_directory): os.makedirs(child.working_directory) # Execute the commands for dispatcher_node in dispatcher_nodes: dispatcher_node.run(profiler=profiler) all_jobs = get_group_jobs(job) if save: # save the update job state db.update_job_states(all_jobs) success = True # Close the DB connection for the execution of the commands, # the job object gets the detached state session = jip.db.create_session() jip.db.commit_session(session) session.close() # we collect the state of all jobs in the dispatcher first # a single failure will cause ALL nodes/jobs in that dispatcher # to be marked as failed for dispatcher_node in reversed(dispatcher_nodes): success &= dispatcher_node.wait() # The commands finished their execution, re-attach the job object session = jip.db.create_session() session.add(job) # get the new state and update all jobs new_state = db.STATE_DONE if success else db.STATE_FAILED for dispatcher_node in reversed(dispatcher_nodes): for job in dispatcher_node.sources: jip.jobs.set_state(job, new_state, update_children=False) if save: # save the update job state at the end of the run db.update_job_states(all_jobs) # handle embedded pipelines and callables if job.on_success and success: for element in job.on_success: if isinstance(element, jip.pipelines.Pipeline): ## run or submit embedded pipeline # Create a base profile for the embedded job # that is based on the current jobs profile profile = jip.profiles.Profile.from_job(job) # glob the inputs for n in element.nodes(): n._tool.options.glob_inputs() # TODO: handle the other paramters (i.e. profile, keep) # TODO: catch exception and make the job fail jobs = create_jobs(element, profile=profile) # add dependency to this job for j in jobs: j.dependencies.append(job) for exe in create_executions(jobs, save=submit_embedded): if not submit_embedded: success &= run_job(exe.job, save=save) else: submit_job(exe.job) return success
def parse_time(time): """Parse time string and returns time in minutes. The string can be either a number, which is the time in minutes, or of the form:: <int>d<int>h<int>m<int>s where any part can be left out, but the order matters. Examples: 30: returns 30 minutes 1h: returns 60 minutes 2h30m: return 150 minutes In addition, you can use a colon separated format that is either: HH:MM or HH:MM:SS :param time: time string :type time: string :returns: time in minutes :rtype: integer :raises: ValueError in case the time could not be parsed """ try: # just minutes return int(time) except: pass import re from datetime import timedelta # check for 00:00:00 format where # 00:00 is hh:mm # 00:00:00 is hh:mm:ss if ':' in time: s = time.split(':') hours = int(s[0]) minutes = int(s[1]) parts = { 'hours': hours, 'minutes': minutes, } if len(s) > 2: parts['seconds'] = int(s[2]) else: regex = re.compile(r'((?P<days>\d+?)d)?((?P<hours>\d+?)h)' '?((?P<minutes>\d+?)m)?((?P<seconds>\d+)s)?') parts = regex.match(time) if not parts: raise ValueError("Unable to parse time format %s" % time) parts = parts.groupdict() time_params = {} for name, param in iteritems(parts): if param: time_params[name] = int(param) delta = timedelta(**time_params) seconds = delta.seconds hours = seconds // 3600 minutes = (seconds % 3600) // 60 if (seconds % 60) > 0: minutes += 1 r = (delta.days * 1440) + (60 * hours) + minutes return r
def apply(self, job, pipeline=False, overwrite=False): """Apply this profile to the given job.""" log.debug("Profiles | Applying job profile to %s", job) if overwrite: self.apply_overwrite(job) return # set the job name or the pipeline name # if this is a job or a pipeline if not pipeline: job.name = self._render_job_name(job) elif self.name is not None: log.info("Apply pipeline name to job: %s %s", job, self.name) job.pipeline = self._render(job, self.name) if self.threads and job.threads is None: job.threads = int(self.threads) if self.nodes is not None and job.nodes is None: job.nodes = self.nodes if self.tasks is not None and job.tasks is None: job.tasks = self.tasks if self.tasks_per_node is not None and job.tasts_per_node is None: job.tasks_per_node = self.tasks_per_node if self.environment is not None and job.environment is None: job.environment = self.environment if self.queue is not None and job.queue is None: job.queue = self.queue if self.priority is not None and job.priority is None: job.priority = self.priority if self.time is not None and job.max_time is None: job.max_time = jip.utils.parse_time(self.time) if self.mem is not None: if job.max_memory is None: job.max_memory = 0 job.max_memory += jip.utils.parse_mem(self.mem) if self.log is not None and job.stderr is None: job.stderr = self._render(job, self.log) if self.out is not None and job.stdout is None: job.stdout = self._render(job, self.out) if self.account is not None and job.account is None: job.account = self.account if self.temp is not None and job.temp is None: job.temp = self.temp if self.extra is not None and job.extra is None: job.extra = self.extra if self.working_dir is not None and job.working_directory is None: job.working_directory = os.path.abspath(self.working_dir) # make log files absolute if job.stdout and not job.stdout.startswith("/"): job.stdout = os.path.join(job.working_directory, job.stdout) if job.stderr and not job.stderr.startswith("/"): job.stderr = os.path.join(job.working_directory, job.stderr) # load environment if self.env: current = os.environ.copy() if job.env: current.update(job.env) rendered = {} for k, v in iteritems(self.env): rendered[k] = render_template(v, **current) job.env.update(rendered) if hasattr(job, 'pipe_to'): for child in job.pipe_to: self.apply(child)
def apply_overwrite(self, job): """Apply the profile and overwrite all settings that are set in this profile """ log.debug("Profiles | Overwriting job profile to %s", job) if self.name: job.name = self._render_job_name(job) if self.threads: job.threads = int(self.threads) if self.nodes is not None: job.nodes = self.nodes if self.tasks is not None: job.tasks = self.tasks if self.tasks_per_node is not None: job.tasks_per_node = self.tasks_per_node if self.environment is not None: job.environment = self.environment if self.queue is not None: job.queue = self.queue if self.priority is not None: job.priority = self.priority if self.time is not None: job.max_time = jip.utils.parse_time(self.time) if self.mem is not None: job.max_memory = jip.utils.parse_mem(self.mem) if self.log is not None: job.stderr = self._render(job, self.log) if self.out is not None: job.stdout = self._render(job, self.out) if self.account is not None: job.account = self.account if self.temp is not None: job.temp = self.temp if self.extra is not None: job.extra = self.extra if self.working_dir is not None: job.working_directory = os.path.abspath(self.working_dir) # make log files absolute if job.stdout and not job.stdout.startswith("/"): job.stdout = os.path.join(job.working_directory, job.stdout) if job.stderr and not job.stderr.startswith("/"): job.stderr = os.path.join(job.working_directory, job.stderr) # load environment if self.env: current = os.environ.copy() if job.env: current.update(job.env) rendered = {} for k, v in iteritems(self.env): rendered[k] = render_template(v, **current) job.env.update(rendered) if hasattr(job, 'pipe_to'): for child in job.pipe_to: self.apply_overwrite(child) # check specs for spec_name, spec in iteritems(self.specs): if fnmatch.fnmatch(job.name, spec_name): spec.apply_overwrite(job)
def main(): args = parse_args(__doc__, options_first=True) print("Tools scripts") print("-------------") print("Please note that there might be more. Here, we search only for") print("files with the .jip extension!") print("") print("Search paths:") print("Current directory: %s" % getcwd()) print("Jip configuration: %s" % jip.config.get("jip_path", "")) print("JIP_PATH variable: %s" % getenv("JIP_PATH", "")) print("") rows = [] for name, path in iteritems(jip.scanner.scan_files()): rows.append((name, path)) print(render_table(["Name", "Path"], rows)) print("") print("Tools implemented in Python modules") print("-----------------------------------") print("The modules must be available in PYTHONPATH and must be specified") print("in the jip configuration or in the JIP_MODULES environment") print("variable. Please note that pipeline scripts that contain") print("python blocks are allowed to load modules that contain tool") print("implementation. These tools might not be found by this scan!") print("") print("Jip configuration: %s" % jip.config.get("jip_modules", "")) print("JIP_MODULES variable: %s" % getenv("JIP_MODULES", "")) print("") rows = [] jip.scanner.scan_modules() for name, cls in iteritems(jip.scanner.registry): cls_help = cls.help() description = "-" if cls_help is not None: description = cls_help.split("\n")[0] if len(description) > 60: description = "%s ..." % description[:46] rows.append((name, description)) print(render_table(["Tool", "Description"], rows)) print("") print("All Tools detected") print("------------------") print("") covered = set([]) rows = [] for name, p in iteritems(jip.scanner.scan()): if name in covered: continue covered.add(name) cls = jip.find(name) cls_help = cls.help() description = "-" if cls_help is not None: description = cls_help.split("\n")[0] if len(description) > 60: description = "%s ..." % description[:46] rows.append((cls.name, description)) print(render_table(["Tool", "Description"], rows))