def req_params(cls, inst, _exclude=None, _prefer_cli=None, **kwargs): # common/intersection params params = luigi.util.common_params(inst, cls) # determine parameters to exclude _exclude = set() if _exclude is None else set(make_list(_exclude)) # also use this class' req and req_get sets # and the req and req_set sets of the instance's class _exclude.update(cls.exclude_params_req, cls.exclude_params_req_get) _exclude.update(inst.exclude_params_req, inst.exclude_params_req_set) # remove excluded parameters for name in list(params.keys()): if multi_match(name, _exclude, any): del params[name] # add kwargs params.update(kwargs) # remove params that are preferably set via cli class arguments if _prefer_cli: cls_args = [] prefix = cls.get_task_family() + "_" if luigi.cmdline_parser.CmdlineParser.get_instance(): for key in global_cmdline_values().keys(): if key.startswith(prefix): cls_args.append(key[len(prefix):]) for name in make_list(_prefer_cli): if name in params and name in cls_args: del params[name] return params
def _initialize_sandbox(self, force=False): if self._sandbox_initialized and not force: return self._sandbox_initialized = True # when we are already in a sandbox, this task is placed inside it, i.e., there is no nesting if _sandbox_switched: self._effective_sandbox = _current_sandbox[0] # when the sandbox is set via a parameter and not hard-coded, # check if the value is among the valid sandboxes, otherwise determine the fallback elif isinstance(self.__class__.sandbox, luigi.Parameter): if multi_match(self.sandbox, self.valid_sandboxes, mode=any): self._effective_sandbox = self.sandbox else: self._effective_sandbox = self.fallback_sandbox(self.sandbox) # just set the effective sandbox else: self._effective_sandbox = self.sandbox # at this point, the sandbox must be set unless it is explicitely allowed to be empty if self._effective_sandbox in (None, NO_STR): if not self.allow_empty_sandbox: raise Exception("task {!r} requires the sandbox parameter to be set".format(self)) self._effective_sandbox = NO_STR # create the sandbox proxy when required if not self.is_sandboxed(): self._sandbox_inst = Sandbox.new(self._effective_sandbox, self) self._sandbox_proxy = SandboxProxy(task=self) logger.debug("created sandbox proxy instance of type '{}'".format( self._effective_sandbox))
def req_params(cls, inst, _exclude=None, **kwargs): # common/intersection params params = luigi.util.common_params(inst, cls) # determine parameters to exclude if _exclude is None: _exclude = set() elif isinstance(_exclude, (list, tuple)): _exclude = set(_exclude) elif not isinstance(_exclude, set): _exclude = {_exclude} # also use this class' req and req_receive sets # and the req and req_transfer sets of the instance's class _exclude.update(cls.exclude_params_req, cls.exclude_params_req_receive) _exclude.update(inst.exclude_params_req, inst.exclude_params_req_transfer) # remove excluded parameters for name in list(params.keys()): if multi_match(name, _exclude, any): del params[name] # add kwargs params.update(kwargs) return params
def deregister(cls, task_cls=None): """ Removes a task class *task_cls* from the luigi task register. When *None*, *this* class is used. Task family strings and patterns are accepted as well. *True* is returned when at least one class was successfully removed, and *False* otherwise. """ # always compare task families if task_cls is None: task_family = cls.get_task_family() elif isinstance(task_cls, six.string_types): task_family = task_cls else: task_family = task_cls.get_task_family() success = False # remove from the register i = -1 while True: i += 1 if i >= len(Register._reg): break registered_cls = Register._reg[i] if multi_match(registered_cls.get_task_family(), task_family, mode=any): Register._reg.pop(i) i -= 1 success = True logger.debug("removed task class {} from register".format(registered_cls)) return success
def __init__(self, *args, **kwargs): super(SandboxTask, self).__init__(*args, **kwargs) # check if the task execution must be sandboxed if _sandbox_switched: self.effective_sandbox = _current_sandbox[0] else: # is the switch forced? if self.force_sandbox: self.effective_sandbox = self.sandbox # can we run in the requested sandbox? elif multi_match(self.sandbox, self.valid_sandboxes, any): self.effective_sandbox = self.sandbox # we have to fallback else: self.effective_sandbox = self.fallback_sandbox(self.sandbox) if self.effective_sandbox is None: raise Exception("cannot determine fallback sandbox for {} in task {}".format( self.sandbox, self)) if not self.sandboxed: self.sandbox_inst = Sandbox.new(self.effective_sandbox, self) self.sandbox_proxy = SandboxProxy(task=self) logger.debug("created sandbox proxy instance of type '{}'".format( self.effective_sandbox)) else: self.sandbox_inst = None self.sandbox_proxy = None
def get_task_params(cls): params = [] for attr in dir(cls): member = getattr(cls, attr) if isinstance(member, luigi.Parameter): exclude = getattr(cls, "exclude_params_index", set()) if not multi_match(attr, exclude, any): params.append(attr.replace("_", "-")) return params
def before_call(): if not isinstance(task, SandboxTask): raise TypeError("require_sandbox can only be used to decorate methods of tasks that " "inherit from SandboxTask, got '{!r}'".format(task)) if not task.is_sandboxed(): raise Exception("the invocation of method {} requires task {!r} to be sandboxed".format( fn.__name__, task)) if opts["sandbox"] and not multi_match(task.effective_sandbox, make_list(opts["sandbox"])): raise Exception("the invocation of method {} requires the sandbox of task {!r} to " "match '{}'" .format(fn.__name__, task, opts["sandbox"])) return None
def _repr_params(self, all_params=False): # build key value pairs of all significant parameters params = self.get_params() param_values = self.get_param_values(params, [], self.param_kwargs) param_objs = dict(params) pairs = [] for param_name, param_value in param_values: if param_objs[param_name].significant and \ (all_params or not multi_match(param_name, self.exclude_params_repr)): pairs.append((param_name, param_objs[param_name].serialize(param_value))) return pairs
def _repr_params(self, all_params=False): # determine parameters to exclude exclude = set() if not all_params: exclude |= self.exclude_params_repr exclude |= set(self.interactive_params) # build a map "name -> value" for all significant parameters params = OrderedDict() for name, param in self.get_params(): if param.significant and not multi_match(name, exclude): params[name] = getattr(self, name) return params
def cli_args(self, exclude=None, replace=None): exclude = set() if exclude is None else set(make_list(exclude)) if replace is None: replace = {} args = OrderedDict() for name, param in self.get_params(): if multi_match(name, exclude, any): continue raw = replace.get(name, getattr(self, name)) val = param.serialize(raw) args["--" + name.replace("_", "-")] = str(val) return args
def cli_args(self, exclude=None, replace=None): exclude = set() if exclude is None else set(make_list(exclude)) if replace is None: replace = {} args = [] for name, param in self.get_params(): if multi_match(name, exclude, any): continue raw = replace.get(name, getattr(self, name)) val = param.serialize(raw) arg = "--{}".format(name.replace("_", "-")) # TODO: why does quote_cmd([val]) fail while str(val) doesn't args.extend([arg, str(val)]) return args
def _repr_params(self, all_params=False): # build key value pairs of all significant parameters params = self.get_params() exclude = set() if not all_params: exclude |= self.exclude_params_repr exclude |= self.inst_exclude_params_repr() exclude |= set(self.interactive_params) pairs = [] for name, param in params: if param.significant and not multi_match(name, exclude): value = getattr(self, name) pairs.append((name, param.serialize(value))) return pairs
def dbline(cls, default_namespace=None): # determine parameters params = ["workers", "local-scheduler", "help"] for attr in dir(cls): member = getattr(cls, attr) if isinstance(member, luigi.Parameter): exclude = getattr(cls, "exclude_params_db", set()) if not multi_match(attr, exclude, any): params.append(attr.replace("_", "-")) # use a truncated task family when a default namespace is set if default_namespace is None: family = cls.task_family else: family = cls.task_family[(len(default_namespace) + 1):] return cls.__module__ + ":" + family + ":" + " ".join(params)
def cli_args(self, exclude=None, replace=None): if exclude is None: exclude = set() if replace is None: replace = {} # always exclude interactive parameters exclude |= set(self.interactive_params) args = [] for name, param in self.get_params(): if multi_match(name, exclude, any): continue raw = replace.get(name, getattr(self, name)) val = param.serialize(raw) arg = "--{}".format(name.replace("_", "-")) args.extend([arg, quote_cmd([val])]) return args
def cli_args(self, exclude=None, replace=None): if exclude is None: exclude = set() if replace is None: replace = {} args = [] for name, param in self.get_params(): if multi_match(name, exclude, any): continue raw = replace.get(name, getattr(self, name)) val = param.serialize(raw) arg = "--{}".format(name.replace("_", "-")) if isinstance(param, luigi.BoolParameter): if raw: args.extend([arg]) elif isinstance(param, (luigi.IntParameter, luigi.FloatParameter)): args.extend([arg, str(val)]) else: args.extend([arg, "\"{}\"".format(val)]) return args
def replace_console_handlers(loggers=("luigi", "luigi.*", "luigi-*", "law", "law.*"), level=None, force_add=False, check_fn=None): """ Removes all tty stream handlers (i.e. those logging to *stdout* or *stderr*) from certain *loggers* and adds a ``rich.logging.RichHandler`` with a specified *level*. *loggers* can either be logger instances or names. In the latter case, the names are used as patterns to identify matching loggers. Unless *force_add* is *True*, no new handler is added when no tty stream handler was previously registered. *check_fn* can be a function with two arguments, a logger instance and a handler instance, that should return *True* if that handler should be removed. When *None*, all handlers inheriting from the basic ``logging.StreamHandler`` are removed if their *stream* attibute referes to a tty stream. When *level* is *None*, it defaults to the log level of the first removed handler. In case no default level can be determined, *INFO* is used. The removed handlers are returned in a list of 2-tuples (*logger*, *removed_handlers*). """ from rich import logging as rich_logging # prepare the return value ret = [] # default check_fn if check_fn is None: check_fn = lambda logger, handler: is_tty_handler(handler) loggers = make_list(loggers) for name, logger in logging.root.manager.loggerDict.items(): # check if the logger is selected for l in loggers: if logger == l: break elif isinstance(l, six.string_types) and multi_match(name, l): break else: # when this point is reached, the logger was not selected continue removed_handlers = [] handlers = getattr(logger, "handlers", []) for handler in handlers: if check_fn(logger, handler): # get the level if level is None: level = getattr(handler, "level", None) # remove it logger.removeHandler(handler) removed_handlers.append(handler) # when at least one handler was found and removed, or force_add is True, add a rich handler if removed_handlers or force_add: # make sure the level is set if level is None: level = logging.INFO # add the rich handler logger.addHandler(rich_logging.RichHandler(level)) # add the removed handlers to the returned list if removed_handlers: ret.append((logger, removed_handlers)) return ret
def scheduler_on_host(self): config = luigi.interface.core() return multi_match(config.scheduler_host, ["0.0.0.0", "127.0.0.1", "localhost"])
def replace_console_handlers(loggers=("luigi", "luigi.*", "luigi-*", "law", "law.*"), level=None, force_add=False, check_fn=None, **kwargs): """ Removes all tty stream handlers (i.e. those logging to *stdout* or *stderr*) from certain *loggers* and adds a new ``rich.logging.RichHandler`` instance with a specified *level* and all *kwargs* passed as additional options to its constructor. *loggers* can either be logger instances or names. In the latter case, the names are used as patterns to identify matching loggers. Unless *force_add* is *True*, no new handler is added when no tty stream handler was previously registered. *check_fn* can be a function with two arguments, a logger instance and a handler instance, that should return *True* if that handler should be removed. When *None*, all handlers inheriting from the basic ``logging.StreamHandler`` are removed if their *stream* attibute referes to a tty stream. When *level* is *None*, it defaults to the log level of the first removed handler. In case no default level can be determined, *INFO* is used. The removed handlers are returned in a list of 2-tuples (*logger*, *removed_handlers*). """ from rich import logging as rich_logging # prepare the return value ret = [] # default check_fn if check_fn is None: check_fn = lambda logger, handler: is_tty_handler(handler) loggers = make_list(loggers) for name, logger in logging.root.manager.loggerDict.items(): # check if the logger is selected for l in loggers: if logger == l: break elif isinstance(l, six.string_types) and multi_match(name, l): break else: # when this point is reached, the logger was not selected continue removed_handlers = [] handlers = getattr(logger, "handlers", []) for handler in handlers: if check_fn(logger, handler): # get the level if level is None: level = getattr(handler, "level", None) # remove it logger.removeHandler(handler) removed_handlers.append(handler) # when at least one handler was found and removed, or force_add is True, add a rich handler if removed_handlers or force_add: # make sure the level is set if level is None: level = logging.INFO # add the rich handler logger.addHandler(rich_logging.RichHandler(level, **kwargs)) # emit warning for colored_* configs cfg = Config.instance() opts = [(s, o) for s in ["task", "target"] for o in ["colored_str", "colored_repr"]] if any(cfg.get_expanded_boolean(*opt) for opt in opts): logger.warning_once( "interfering_colors_in_rich_handler", "law is currently configured to colorize string representations of tasks and " "targets which might lead to malformed logs of the RichHandler; to avoid this, " "consider updating your law configuration file ({}) to:\n" "[task]\ncolored_repr: False\ncolored_str: False\n\n" "[target]\ncolored_repr: False\ncolored_str: False".format( cfg.config_file), ) # add the removed handlers to the returned list if removed_handlers: ret.append((logger, removed_handlers)) return ret