def __init__(self, dir=None, mkdtemp=None, cleanup=None): super(BaseJobFileFactory, self).__init__() # get default values from config if None if mkdtemp is None: mkdtemp = Config.instance().get_expanded("job", "job_file_dir_mkdtemp", type=bool) if cleanup is None: cleanup = Config.instance().get_expanded("job", "job_file_dir_cleanup", type=bool) # store the cleanup flag self.cleanup = cleanup # when dir ist None, a temporary directory is forced if not dir: mkdtemp = True # store the directory, default to the job.job_file_dir config self.dir = dir or Config.instance().get_expanded("job", "job_file_dir") # create the directory if not os.path.exists(self.dir): os.makedirs(self.dir) # check if it should be extended by a temporary dir if mkdtemp: self.dir = tempfile.mkdtemp(dir=self.dir)
def __init__(self, path=None, is_tmp=False, **kwargs): # handle tmp paths manually since luigi uses the env tmp dir if not path: if not is_tmp: raise Exception("either path or is_tmp must be set") # get the tmp dir from the config and ensure it exists tmp_dir = os.path.realpath(Config.instance().get_expanded("target", "tmp_dir")) if not self.fs.exists(tmp_dir): perm = Config.instance().get("target", "tmp_dir_permission") self.fs.mkdir(tmp_dir, perm=perm and int(perm)) # create a random path while True: path = os.path.join(tmp_dir, "luigi-tmp-%09d" % (random.randint(0, 999999999,))) if not self.fs.exists(path): break # is_tmp might be an extension if isinstance(is_tmp, six.string_types): if is_tmp[0] != ".": is_tmp = "." + is_tmp path += is_tmp else: path = self.fs.abspath(os.path.expandvars(os.path.expanduser(remove_scheme(path)))) luigi.LocalTarget.__init__(self, path=path, is_tmp=is_tmp) FileSystemTarget.__init__(self, self.path, **kwargs)
def __init__(self, section=None, **kwargs): # if present, read options from the section in the law config self.config_section = None cfg = Config.instance() if not section: section = cfg.get_expanded("target", "default_local_fs") if isinstance(section, six.string_types): if cfg.has_section(section): # extend options of sections other than "local_fs" with its defaults if section != "local_fs": data = dict( cfg.items("local_fs", expand_vars=False, expand_user=False)) cfg.update({section: data}, overwrite_sections=True, overwrite_options=False) kwargs = self.parse_config(section, kwargs) self.config_section = section else: raise Exception( "law config has no section '{}' to read {} options".format( section, self.__class__.__name__)) super(LocalFileSystem, self).__init__(**kwargs)
def setup_logging(): """ Sets up the internal law loggers as well as all other loggers listed in the ``"logging"`` config section as (*name*, *level*) pairs. This includes loggers that do not use the ``"law.*"`` namespace which can be seen as a convenient feature to set up custom loggers. """ global _logging_setup # make sure logging is setup only once if _logging_setup: return _logging_setup = True # set the handler of the law root logger which propagates it to lower level loggers logging.getLogger("law").addHandler(create_stream_handler()) # set levels for all loggers and add the console handler for all non-law loggers cfg = Config.instance() for name, level in cfg.items("logging"): add_console_handler = not name.startswith( "law.") and not get_tty_handlers(name) setup_logger(name, level, add_console_handler=add_console_handler, clear=False)
def get_sw_dir(sw_dir=None): if sw_dir is None: sw_dir = Config.instance().get("core", "software_dir") sw_dir = os.path.expandvars(os.path.expanduser(sw_dir)) return sw_dir
def __init__(self, file_name="job.jdl", universe="vanilla", executable=None, arguments=None, input_files=None, output_files=None, postfix_output_files=True, log="log.txt", stdout="stdout.txt", stderr="stderr.txt", notification="Never", custom_content=None, absolute_paths=False, **kwargs): # get some default kwargs from the config cfg = Config.instance() if kwargs.get("dir") is None: kwargs["dir"] = cfg.get_expanded("job", cfg.find_option("job", "htcondor_job_file_dir", "job_file_dir")) if kwargs.get("mkdtemp") is None: kwargs["mkdtemp"] = cfg.get_expanded_boolean("job", cfg.find_option("job", "htcondor_job_file_dir_mkdtemp", "job_file_dir_mkdtemp")) if kwargs.get("cleanup") is None: kwargs["cleanup"] = cfg.get_expanded_boolean("job", cfg.find_option("job", "htcondor_job_file_dir_cleanup", "job_file_dir_cleanup")) super(HTCondorJobFileFactory, self).__init__(**kwargs) self.file_name = file_name self.universe = universe self.executable = executable self.arguments = arguments self.input_files = input_files or [] self.output_files = output_files or [] self.postfix_output_files = postfix_output_files self.log = log self.stdout = stdout self.stderr = stderr self.notification = notification self.custom_content = custom_content self.absolute_paths = absolute_paths
def notify_mail(title, message, recipient=None, sender=None, smtp_host=None, smtp_port=None, **kwargs): """ Mail notification method taking a *title* and a string *message*. *recipient*, *sender*, *smtp_host* and *smtp_port* default to the configuration values in the [notifications] section. """ cfg = Config.instance() if not recipient: recipient = cfg.get_expanded("notifications", "mail_recipient") if not sender: sender = cfg.get_expanded("notifications", "mail_sender") if not smtp_host: smtp_host = cfg.get_expanded("notifications", "mail_smtp_host") if not smtp_port: smtp_port = cfg.get_expanded("notifications", "mail_smtp_port") if not recipient or not sender: logger.warning( "cannot send mail notification, recipient ({}) or sender ({}) empty" .format(recipient, sender)) return False return send_mail(recipient, sender, title, message, smtp_host=smtp_host, smtp_port=smtp_port)
def notify_mail(title, message, recipient=None, sender=None, smtp_host=None, smtp_port=None, **kwargs): """ Sends a notification mail with a *title* and a string *message*. *recipient*, *sender*, *smtp_host* and *smtp_port* default to the configuration values in the [notifications] section. When *recipient* or *sender* are not set, a warning is issued and *False* is returned. Otherwise, the result of :py:meth:`util.send_mail` is returned. """ cfg = Config.instance() if not recipient: recipient = cfg.get_expanded("notifications", "mail_recipient") if not sender: sender = cfg.get_expanded("notifications", "mail_sender") if not smtp_host: smtp_host = cfg.get_expanded("notifications", "mail_smtp_host") if not smtp_port: smtp_port = cfg.get_expanded("notifications", "mail_smtp_port") if not recipient or not sender: logger.warning("cannot send mail notification, recipient ({}) or sender ({}) empty".format( recipient, sender)) return False mail_kwargs = {} if smtp_host: mail_kwargs["smtp_host"] = smtp_host if smtp_port: mail_kwargs["smtp_port"] = smtp_port return send_mail(recipient, sender, title, uncolored(message), **mail_kwargs)
def _init_configs(self, section, default_fs_option, default_section, init_kwargs): cfg = Config.instance() # get the proper section if not section: section = cfg.get_expanded("target", default_fs_option) # try to read it and fill configs to pass to the file system and the remote file interface fs_config = {} fi_config = {} if isinstance(section, six.string_types): # when set, the section must exist if not cfg.has_section(section): raise Exception( "law config has no section '{}' to read {} options".format( section, self.__class__.__name__)) # extend options of sections other than the default one with its values self._update_section_defaults(default_section, section) # read the configs from the section for both the file system and remote interface fs_config = self.parse_config(section) fi_config = self.file_interface_cls.parse_config(section) # update both configs with init kwargs fs_config = merge_dicts(fs_config, init_kwargs, deep=True) fi_config = merge_dicts(fi_config, init_kwargs, deep=True) return section, fs_config, fi_config
def setup_parser(sub_parsers): """ Sets up the command line parser for the *config* subprogram and adds it to *sub_parsers*. """ parser = sub_parsers.add_parser( "config", prog="law config", description="Configuration helper" " to get, set or remove a value from the law configuration file ({}).". format(Config.instance().config_file)) parser.add_argument("name", nargs="?", help="the name of the config in the format" " <section>[.<option>]") parser.add_argument("value", nargs="?", help="the value to set") parser.add_argument("--remove", "-r", action="store_true", help="remove the config") parser.add_argument("--expand", "-e", action="store_true", help="expand variables when getting" " a value") parser.add_argument("--location", "-l", action="store_true", help="print the location of the" " configuration file and exit")
def get_config(name): """ Returns the config value that corresponds to *name*, which must have the format ``section.option``. """ section, option = name.split(".", 1) return Config.instance().get(section, option)
def prepare_stageout(self, tmp_dir): # get the sandbox stage-out mask stageout_mask = self.task.sandbox_stageout() if not stageout_mask: return None # determine outputs as seen from outside and within the sandbox outputs = self.task.output() with patch_object(os, "environ", self.task.env, lock=True): sandbox_outputs = self.task.output() # apply the mask to both structs outputs = mask_struct(stageout_mask, outputs) sandbox_outputs = mask_struct(stageout_mask, sandbox_outputs) if not outputs: return None # define the stage-out directory cfg = Config.instance() section = self.sandbox_inst.get_config_section() stageout_dir = tmp_dir.child(cfg.get_expanded(section, "stageout_dir"), type="d") stageout_dir.touch() # create a lookup for input -> sandbox input sandbox_targets = dict(zip(flatten(outputs), flatten(sandbox_outputs))) return StageInfo(outputs, stageout_dir, sandbox_targets)
def parse_config(cls, section, config=None): # reads a law config section and returns parsed file system configs cfg = Config.instance() if config is None: config = {} # helper to add a config value if it exists, extracted with a config parser method def add(option, func): cache_option = "cache_" + option if option not in config and not cfg.is_missing_or_none( section, cache_option): config[option] = func(section, cache_option) def get_size(section, cache_option): value = cfg.get_expanded(section, cache_option) return parse_bytes(value, input_unit="MB", unit="MB") def get_time(section, cache_option): value = cfg.get_expanded(section, cache_option) return parse_duration(value, input_unit="s", unit="s") add("root", cfg.get_expanded) add("cleaup", cfg.get_expanded_boolean) add("max_size", get_size) add("file_perm", cfg.get_expanded_int) add("dir_perm", cfg.get_expanded_int) add("wait_delay", get_time) add("max_waits", cfg.get_expanded_int) add("global_lock", cfg.get_expanded_boolean) return config
def parse_config(cls, section, config=None, overwrite=False): config = super(GFALFileInterface, cls).parse_config(section, config=config, overwrite=overwrite) cfg = Config.instance() # helper to add a config value if it exists, extracted with a config parser method def add(option, func, postfix="gfal_", _config=config): if option not in config or overwrite: _config[option] = func(section, postfix + option) # use atomic contexts per operation add("atomic_contexts", cfg.get_expanded_boolean) # transfer config config.setdefault("transfer_config", {}) transfer_specs = [ ("timeout", cfg.get_expanded_int), ("checksum_check", cfg.get_expanded_boolean), ("nbstreams", cfg.get_expanded_int), ("overwrite", cfg.get_expanded_boolean), ("create_parent", cfg.get_expanded_boolean), ("strict_copy", cfg.get_expanded_boolean), ] for name, func in transfer_specs: add(name, func, "gfal_transfer_", config["transfer_config"]) return config
def __init__(self, section=None, **kwargs): # default gfal transfer config kwargs.setdefault("transfer_config", {}) kwargs["transfer_config"].setdefault("checksum_check", False) # if present, read options from the section in the law config cfg = Config.instance() if not section: section = cfg.get_expanded("target", "default_wlcg_fs") if isinstance(section, six.string_types): if cfg.has_section(section): # extend with the real defaults before parsing if section != "wlcg_fs": data = dict(cfg.items("wlcg_fs", expand_vars=False, expand_user=False)) cfg.update({section: data}, overwrite_sections=True, overwrite_options=False) kwargs = self.parse_config(section, kwargs) else: raise Exception("law config has no section '{}' to read {} options".format( section, self.__class__.__name__)) # base path is mandatory if not kwargs.get("base"): raise Exception("{}.base is missing, set either 'section', 'base', or change the " "target.default_wlcg_fs option in your law config".format(self.__class__.__name__)) base = kwargs.pop("base") RemoteFileSystem.__init__(self, base, **kwargs)
def execute(args): """ Executes the *config* subprogram with parsed commandline *args*. """ # just print the file location? if args.location: cfg = Config.instance() print(cfg.config_file) return # every option below requires the name to be set if not args.name: abort( "please give the name of the config in the format <section>[.<option>]" ) # removal if args.remove: abort("config removal not yet implemented") # setting if args.value: abort("config setting not yet implemented") # getting print(get_config(args.name, expand=args.expand))
def setup_parser(sub_parsers): """ Sets up the command line parser for the *index* subprogram and adds it to *sub_parsers*. """ parser = sub_parsers.add_parser( "index", prog="law index", description="Create or update the" " (human-readable) law task index file ({}). This is only required for the shell" " auto-completion.".format(Config.instance().get("core", "index_file"))) parser.add_argument("--modules", "-m", nargs="+", help="additional modules to traverse") parser.add_argument("--no-externals", "-e", action="store_true", help="skip external tasks") parser.add_argument("--remove", "-r", action="store_true", help="remove the index file and" " exit") parser.add_argument("--location", "-l", action="store_true", help="print the location of the" " index file and exit") parser.add_argument("--verbose", "-v", action="store_true", help="verbose output")
def notify_telegram(title, content, token=None, chat=None, mention_user=None, **kwargs): """ Sends a telegram notification and returns *True* on success. The communication with the telegram API might have some delays and is therefore handled by a thread. """ # test import import telegram # noqa: F401 cfg = Config.instance() # get default token and chat if not token: token = cfg.get_expanded("notifications", "telegram_token") if not chat: chat = cfg.get_expanded("notifications", "telegram_chat") if not token or not chat: logger.warning( "cannot send Telegram notification, token ({}) or chat ({}) empty". format(token, chat)) return False # append the user to mention to the title # unless explicitly set to empty string mention_text = "" if mention_user is None: mention_user = cfg.get_expanded("notifications", "telegram_mention_user") if mention_user: mention_text = " (@{})".format(mention_user) # request data for the API call request = { "parse_mode": "Markdown", } # standard or attachment content? if isinstance(content, six.string_types): request["text"] = "{}{}\n\n{}".format(title, mention_text, content) else: # content is a dict, add some formatting request["text"] = "{}{}\n\n".format(title, mention_text) for key, value in content.items(): request["text"] += "_{}_: {}\n".format(key, value) # extend by arbitrary kwargs request.update(kwargs) # threaded, non-blocking API communication thread = threading.Thread(target=_notify_telegram, args=(token, chat, request)) thread.start() return True
def __init__(self, file_name="job.jdl", executable=None, arguments=None, input_files=None, output_files=None, postfix_output_files=True, output_uri=None, stdout="stdout.txt", stderr="stderr.txt", vo=None, custom_content=None, absolute_paths=False, **kwargs): # get some default kwargs from the config cfg = Config.instance() if kwargs.get("dir") is None: kwargs["dir"] = cfg.get_expanded("job", cfg.find_option("job", "glite_job_file_dir", "job_file_dir")) if kwargs.get("mkdtemp") is None: kwargs["mkdtemp"] = cfg.get_expanded_boolean("job", cfg.find_option("job", "glite_job_file_dir_mkdtemp", "job_file_dir_mkdtemp")) if kwargs.get("cleanup") is None: kwargs["cleanup"] = cfg.get_expanded_boolean("job", cfg.find_option("job", "glite_job_file_dir_cleanup", "job_file_dir_cleanup")) super(GLiteJobFileFactory, self).__init__(**kwargs) self.file_name = file_name self.executable = executable self.arguments = arguments self.input_files = input_files or [] self.output_files = output_files or [] self.postfix_output_files = postfix_output_files self.output_uri = output_uri self.stdout = stdout self.stderr = stderr self.vo = vo self.custom_content = custom_content self.absolute_paths = absolute_paths
def setup_logging(): """ Sets up the internal logging mechanism, i.e., it creates the :py:attr:`console_handler`, sets its formatting, and adds it to the main logger which propagates settings to lower level loggers. In addition, all other loggers listed in the ``"logging"`` config section as (*name*, *level*) pairs are set up. This includes loggers that do not use the ``"law.*"`` namespace which can be seen as a convenient feature to set up custom loggers. """ global console_handler # make sure logging is setup only once if console_handler: return # set the handler of the law root logger which propagates it to lower level loggers console_handler = logging.StreamHandler() console_handler.setFormatter(LogFormatter()) logging.getLogger("law").addHandler(console_handler) # set levels for all loggers and add the console handler for all non-law loggers cfg = Config.instance() for name, level in cfg.items("logging"): level = level.upper() if hasattr(logging, level): # create / get the logger and set the level logger = logging.getLogger(name) logger.setLevel(getattr(logging, level)) # add the console handler when not part of the law.* namespace if not name.startswith("law."): logger.addHandler(console_handler) logger.debug("registered logger with level '{}'".format(level))
def parse_config(cls, section, config=None, overwrite=False): config = super(RemoteFileSystem, cls).parse_config(section, config=config, overwrite=overwrite) cfg = Config.instance() # helper to add a config value if it exists, extracted with a config parser method def add(option, func): if option not in config or overwrite: config[option] = func(section, option) # default setting for validation for existence after copy add("validate_copy", cfg.get_expanded_boolean) # default setting for using the cache add("use_cache", cfg.get_expanded_boolean) # cache options if cfg.options(section, prefix="cache_"): RemoteCache.parse_config(section, config.setdefault("cache_config", {}), overwrite=overwrite) return config
def _get_env(self): # environment variables to set env = OrderedDict() # default sandboxing variables env["LAW_SANDBOX"] = self.key.replace("$", r"\$") env["LAW_SANDBOX_SWITCHED"] = "1" if self.task: env["LAW_SANDBOX_IS_ROOT_TASK"] = "1" if self.task.is_root_task( ) else "" if getattr(self.task, "_worker_id", None): env["LAW_SANDBOX_WORKER_ID"] = self.task._worker_id if getattr(self.task, "_worker_task", None): env["LAW_SANDBOX_WORKER_TASK"] = self.task.live_task_id # extend by variables from the config file cfg = Config.instance() section = self.get_config_section(postfix="env") for name, value in cfg.items_expanded(section): if "*" in name or "?" in name: names = [ key for key in os.environ.keys() if fnmatch(key, name) ] else: names = [name] for name in names: env[name] = value if value is not None else os.getenv(name, "") # extend by variables defined on task level if self.task: task_env = self.task.sandbox_env(env) if task_env: env.update(task_env) return env
def __init__(self, file_name="job.job", command=None, executable=None, arguments=None, queue=None, cwd=None, input_files=None, output_files=None, postfix_output_files=True, manual_stagein=False, manual_stageout=False, job_name=None, stdout="stdout.txt", stderr="stderr.txt", shell="bash", emails=False, custom_content=None, absolute_paths=False, **kwargs): # get some default kwargs from the config cfg = Config.instance() if kwargs.get("dir") is None: kwargs["dir"] = cfg.get_expanded( "job", cfg.find_option("job", "lsf_job_file_dir", "job_file_dir")) if kwargs.get("mkdtemp") is None: kwargs["mkdtemp"] = cfg.get_expanded_boolean( "job", cfg.find_option("job", "lsf_job_file_dir_mkdtemp", "job_file_dir_mkdtemp")) if kwargs.get("cleanup") is None: kwargs["cleanup"] = cfg.get_expanded_boolean( "job", cfg.find_option("job", "lsf_job_file_dir_cleanup", "job_file_dir_cleanup")) super(LSFJobFileFactory, self).__init__(**kwargs) self.file_name = file_name self.command = command self.executable = executable self.arguments = arguments self.queue = queue self.cwd = cwd self.input_files = DeprecatedInputFiles(input_files or {}) self.output_files = output_files or [] self.postfix_output_files = postfix_output_files self.manual_stagein = manual_stagein self.manual_stageout = manual_stageout self.job_name = job_name self.stdout = stdout self.stderr = stderr self.shell = shell self.emails = emails self.custom_content = custom_content self.absolute_paths = absolute_paths
def get_config_section(self, postfix=None): section = self.sandbox_type + "_sandbox" if postfix: section += "_" + postfix image_section = section + "_" + self.name cfg = Config.instance() return image_section if cfg.has_section(image_section) else section
def get_config_volumes(self, section, default_section): cfg = Config.instance() vols = {} section = section if cfg.has_section(section) else default_section for hdir, cdir in cfg.items(section): vols[os.path.expandvars(os.path.expanduser(hdir))] = cdir return vols
def get_prefixed_config(self, section, option, **kwargs): """ Returns the config value defined by *section* and *option*, which is prefixed by the :py:attr:`workflow_type`. When the prefixed option is not found, the plain *option* is used instead. All *kwargs* are forwarded to :py:meth:`Config.get_expanded`. """ cfg = Config.instance() prefixed_option = "{}_{}".format(self.workflow_type, option) default = cfg.get_expanded(section, option, **kwargs) return cfg.get_expanded(section, prefixed_option, default=default, **kwargs)
def get_prefixed_config(self, section, option, **kwargs): """ TODO. """ cfg = Config.instance() default = cfg.get_expanded(section, option, **kwargs) return cfg.get_expanded(section, "{}_{}".format(self.workflow_type, option), default=default, **kwargs)
def _bash_cmd(self): cmd = ["bash"] # login flag cfg = Config.instance() cfg_section = self.get_config_section() if cfg.get_expanded_boolean(cfg_section, "login"): cmd.extend(["-l"]) return cmd
def parse_config(cls, section, config=None): config = super(RemoteFileSystem, cls).parse_config(section, config=config) cfg = Config.instance() # helper to add a config value if it exists, extracted with a config parser method def add(option, func): if option not in config: config[option] = func(section, option) def get_expanded_list(section, option): # get config value, run brace expansion taking into account csv splitting value = cfg.get_expanded(section, option) return value and [ v.strip() for v in brace_expand(value.strip(), split_csv=True) ] def get_time(section, option): value = cfg.get_expanded(section, option) return parse_duration(value, input_unit="s", unit="s") # base path(s) add("base", get_expanded_list) # base path(s) per operation options = cfg.options(section, prefix="base_") add( "bases", lambda *_: { option[5:]: get_expanded_list(section, option) for option in options if not cfg.is_missing_or_none(section, option) }) # atomic contexts add("atomic_contexts", cfg.get_expanded_boolean) # number of retries add("retries", cfg.get_expanded_int) # delay between retries add("retry_delay", get_time) # random base selection add("random_base", cfg.get_expanded_boolean) # validation after copy add("validate_copy", cfg.get_expanded_boolean) # cache options if cfg.options(section, prefix="cache_"): RemoteCache.parse_config(section, config.setdefault("cache_config", {})) return config
def sandbox_user(self): uid, gid = os.getuid(), os.getgid() # check if there is a config section that defines the user and group ids if self.sandbox_inst: cfg = Config.instance() section = self.sandbox_inst.get_config_section() uid = cfg.get_expanded_int(section, "uid", default=uid) gid = cfg.get_expanded_int(section, "gid", default=gid) return uid, gid