def test_messenger_indentation_with(capfd): printer = Messenger(verbose=True, indent=2, msg_fn=print, end="") # Temporarily change indentation to 6 spaces with printer.indentation(indent=6): printer("some string") out, err = capfd.readouterr() assert out == " some string" # Back to original indentation printer("some string") out, err = capfd.readouterr() assert out == " some string" # Temporarily add 3 spaces to original indentation with printer.indentation(add_indent=3): printer("some string") # Gives 5 spaces of indentation out, err = capfd.readouterr() assert out == " some string" # Back to original indentation printer("some string") out, err = capfd.readouterr() assert out == " some string"
def test_messenger_print(capfd): printer = Messenger(verbose=True, indent=2, msg_fn=print) # Default indentation printer("Ma name is not John", "Ma name is James", "I shall not repeat this") out, err = capfd.readouterr() assert out == " Ma name is not John Ma name is James I shall not repeat this\n" # Override indentation printer("Ma name is not John", "Ma name is James", "I shall not repeat this", indent=4) out, err = capfd.readouterr() assert out == " Ma name is not John Ma name is James I shall not repeat this\n" # Disable verbosity printer("Ma name is not John", "Ma name is James", "I shall not repeat this", verbose=False) out, err = capfd.readouterr() assert out == ""
def print_nan_stats( x: Union[np.ndarray, pd.DataFrame], message: str, messenger: Optional[Callable] = Messenger( verbose=True, indent=0, msg_fn=print), indent: Optional[int] = None) -> None: """ Print statistics about NaNs in an array. Parameters ---------- x : `numpy.ndarray` or `pandas.DataFrame` The array / data frame to count NaNs in. message : str The message prior to the stats. Full message becomes: `indentation + message + ": " + num NaNs (percentage)` messenger : `utipy.Messenger` or None A `utipy.Messenger` instance used to print/log/... information. When `None`, no printing/logging is performed. The messenger determines the messaging function (e.g. `print` or `log.info`) and indentation when `indent` is `None`. indent : int Indentation of message. When `None`, indentation is determined by `messenger`. """ messenger = check_messenger(messenger) num_nans, perc = nan_stats(x) messenger(f"{message}: {num_nans} ({perc}%)", indent=indent)
def test_messenger_logger(capfd, caplog): logging.basicConfig() _logger = logging.getLogger("Mr.Logger") _logger.setLevel(logging.INFO) logger = Messenger(verbose=True, indent=2, msg_fn=_logger.info) # Default indentation logger("Ma name is not John") def get_last_log_message(): for rec in caplog.records: pass return rec.getMessage() assert get_last_log_message() == " Ma name is not John" # Override indentation logger("Ma name is not John", indent=4) assert get_last_log_message() == " Ma name is not John" # Multiple args logger("Ma name is not John", "Ma name is James", indent=4) assert get_last_log_message() == " Ma name is not John Ma name is James" # Disable verbosity logger("Ma name is not John", verbose=False) out, err = capfd.readouterr() assert out == ""
def mk_dir(path: Union[str, pathlib.Path], arg_name: Union[str, None] = "", raise_on_exists: bool = False, messenger: Optional[Callable] = Messenger(verbose=True, indent=0, msg_fn=print)): """ Make directory if it doesn't exist. Parameters ---------- path : str or `pathlib.Path` Path to directory to make. arg_name : str or None Name of path argument/variable for message when creating a directory and `messenger.verbose` is `True`. raise_on_exists : bool Whether to raise a `FileExistsError` when the directory already exists. messenger : `utipy.Messenger` or None A `utipy.Messenger` instance used to print/log/... information. When `None`, no printing/logging is performed. The messenger determines the messaging function (e.g. `print`) and potential indentation. """ path = pathlib.Path(path) path_exists = path.exists() # Prepare arg name arg_name = _prep_arg_name(arg_name) # Check messenger (always returns Messenger instance) messenger = check_messenger(messenger) # Fail for existing directory (when specified) # Or exit function if path_exists: if raise_on_exists: raise FileExistsError( f"{arg_name}directory already exists: {path.resolve()}") return # Message user about the creation of a new directory messenger(f"{arg_name}directory does not exist and will be created: " f"{path.resolve()}") # Create new directory if it does not already exist try: path.mkdir(parents=True, exist_ok=not raise_on_exists) except FileExistsError as e: # In this case, the directory was likely created between # our existence check and our creation attempt if raise_on_exists: raise FileExistsError( f"{arg_name}directory already exists: {path.resolve()}")
def rm_dir(path: Union[str, pathlib.Path], arg_name: Union[str, None] = "", raise_missing: bool = False, raise_not_dir: bool = True, shutil_ignore_errors: bool = False, shutil_onerror: Optional[Callable] = None, messenger: Optional[Callable] = Messenger(verbose=True, indent=0, msg_fn=print)): """ Remove directory and its contents if it exists using `shutil.rmtree()`. Parameters ---------- path : str or `pathlib.Path` Path to directory to remove. arg_name : str or None Name of path argument/variable for message when creating a directory and `messenger.verbose` is `True`. raise_missing : bool Whether to raise a RuntimeError when the directory does not exist. raise_not_dir : bool Whether to raise a RuntimeError when the path is not to a directory. shutil_ignore_errors : bool Passed to the `ignore_errors` argument in `shutil.rmtree()`. shutil_onerror : bool Passed to the `onerror` argument in `shutil.rmtree()`. messenger : `utipy.Messenger` or None A `utipy.Messenger` instance used to print/log/... information. When `None`, no printing/logging is performed. The messenger determines the messaging function (e.g. `print`) and potential indentation. """ path = pathlib.Path(path) path_exists = path.exists() # Prepare arg name arg_name = _prep_arg_name(arg_name) # Check messenger (always returns Messenger instance) messenger = check_messenger(messenger) if raise_missing and not path_exists: raise RuntimeError(f"{arg_name}path did not exist: {path}") if path_exists and raise_not_dir and not path.is_dir(): raise RuntimeError(f"{arg_name}path was not a directory: {path}") if path_exists and path.is_dir(): # Message user about the removal of the directory messenger(f"{arg_name}directory will be removed: " f"{path.resolve()}") shutil.rmtree(path, ignore_errors=shutil_ignore_errors, onerror=shutil_onerror)
def test_check_messenger(): messenger = Messenger(verbose=True, indent=2, msg_fn=print) assert check_messenger(messenger) is messenger class SubMessenger(Messenger): def __init__(self) -> None: super().__init__(verbose=True, msg_fn=print, indent=0) # Works with subclasses of Messenger submessenger = SubMessenger() assert check_messenger(submessenger) is submessenger # None should return Messenger with `verbose=False` assert isinstance(check_messenger(None), Messenger) assert not check_messenger(None).verbose
def rm_dir( self, name: str, rm_paths: bool = True, raise_on_fail: bool = True, messenger: Optional[Callable] = Messenger(verbose=True, indent=0, msg_fn=print) ) -> None: """ Remove a directory from disk. Parameters ---------- name : str Name of path to a directory to remove from disk. rm_paths : bool Whether to remove all paths that are within the removed directory as well as the path to the directory itself. NOTE: For files that need to exist (e.g. those in the `in_files` collection), leaving the path after removing the file will cause downstream checking of the paths (see `.check_paths()`) will fail (as we removed the files). Those checks are called as part of some of the methods. raise_on_fail : bool Whether to raise an error when the path does not exist. messenger : `utipy.Messenger` or None A `utipy.Messenger` instance used to print/log/... information. When `None`, no printing/logging is performed. The messenger determines the messaging function (e.g. `print`) and potential indentation. """ path = self[name] if path is None: raise ValueError(f"Path object for `{name}` was `None`.") remove_dir(path=path, arg_name=f'{name} path', raise_missing=raise_on_fail, raise_not_dir=raise_on_fail, messenger=messenger) if rm_paths: self.rm_paths_in_dir(dir_path=path, rm_dir=True)
def rm_tmp_dirs( self, rm_paths: bool = True, raise_on_fail: bool = True, messenger: Optional[Callable] = Messenger(verbose=True, indent=0, msg_fn=print) ) -> None: """ Remove all temporary directories from disk. Parameters ---------- rm_paths : bool Whether to remove all paths that are within the removed directories and the paths to the directories themselves. raise_on_fail : bool Whether to raise an error when the path does not exist. messenger : `utipy.Messenger` or None A `utipy.Messenger` instance used to print/log/... information. When `None`, no printing/logging is performed. The messenger determines the messaging function (e.g. `print`) and potential indentation. """ # TODO In case they are nested, we should check their existence # before deleting some of the directories, as that might # delete the existing ones # (I.e. find the top-level tmp dirs and remove those, and don't # try to remove those contained in them) # Delete each path in `tmp_dirs`` for path in self.get_collection(name="tmp_dirs").keys(): self.rm_dir(name=path, raise_on_fail=raise_on_fail, messenger=messenger) if rm_paths: self.rm_paths_in_dir(dir_path=path, rm_dir=True)
def test_messenger_kwargs(capfd): # `end` is a kwarg # Normally `end='\n'` for `print()` printer = Messenger(verbose=True, indent=2, msg_fn=print, end="") # Print with the kwargs set during initialization printer("some string") out, err = capfd.readouterr() assert out == " some string" # Print with call-specific kwargs printer("some string", end=" - Dudley") out, err = capfd.readouterr() assert out == " some string - Dudley" # Check that defaults did not change printer("some string") out, err = capfd.readouterr() assert out == " some string"
def mk_output_dir(self, name: str, messenger: Optional[Callable] = Messenger(verbose=True, indent=0, msg_fn=print)): """ Create non-existing output directory for a given path. For filepaths, it creates the directory the file is located in. Parameters ---------- name : str Name of path to create output directory for. messenger : `utipy.Messenger` or None A `utipy.Messenger` instance used to print/log/... information. When `None`, no printing/logging is performed. The messenger determines the messaging function (e.g. `print`) and potential indentation. """ path = self.get_path(name=name) dir_path = pathlib.Path(path).parent mk_dir(path=dir_path, arg_name=name, messenger=messenger)
def drop(data, value='NaN', thresh=0, direction='>', axis=0, include=None, exclude=None, copy=True, messenger: Optional[Callable] = Messenger(verbose=True, indent=0, msg_fn=print)): """ Drop rows or columns from pandas DataFrame based on values. Drop rows / columns if specific value, or any value, is represented too much, too little, etc. Commands: 'Drop [axis] if [value] appears [direction] than [thresh] percent of the time.' E.g.: 'Drop columns if 0 appears more than 90 percent of the time.' 'Drop rows if *any* value appears exactly 77 percent of the time.' Parameters ---------- data : pd.DataFrame The data to distort. value : str / int / float The value to match. Regular value, 'any', 'NaN', 'inf' thresh : float Threshold. Percentage between 0-1. direction : str Operator sign for comparison. '>', '<', '>=', '<=', '=='. axis : int 0 for columns, 1 for rows. include : list of strings Names of columns / indices of rows to search within. None means ALL are included unless otherwise specified, see *exclude*. exclude : list of strings Names of columns / indices of rows NOT to search within. None means no columns/rows are excluded unless otherwise specified, see *include*. messenger : `utipy.Messenger` or None A `utipy.Messenger` instance used to print/log/... information. When `None`, no printing/logging is performed. The messenger determines the messaging function (e.g. `print`) and potential indentation. Returns ------- pd.DataFrame Examples -------- Uncomment code to run. Remove all rows with any NaNs in dependent variable 'y' # drop(data, value = 'NaN', axis = 1, thresh = 0, # direction = '>', cols = ['y']) Remove all columns with only 1 unique value. I.e. the same value in 100% of the rows. # drop(data, value = 'any', axis = 0, thresh = 1, # direction = '==') Remove all columns that have less than 30% NaNs # drop(data, value = 'NaN', axis = 0, thresh = 0.3, # direction = '<') """ # Check messenger (always returns Messenger instance) messenger = check_messenger(messenger) if value is None: raise ValueError('value cannot be None.') if axis not in [0, 1]: raise ValueError("`axis` must be 0 or 1") # Create copy of dataframe if copy: data = data.copy() if exclude is not None and include is not None: raise ValueError("Either include or exclude must be None.") # Columns if axis == 0: if exclude is not None: # Create include list include = [col for col in data.columns if col not in exclude] if include is not None: # Subset dataframe to only work on included cols data_cols = data.filter(items=include) # Find columns / rows to drop to_drop = _find_exceeders(data_cols, value, thresh, direction, axis=axis) else: # Find columns / rows to drop to_drop = _find_exceeders(data, value, thresh, direction, axis=axis) # Drop columns messenger(f'Dropped {len(to_drop)} columns.') return data.drop(to_drop, axis=1) # Rows elif axis == 1: # Find columns / rows to drop to_drop = _find_exceeders(data, value, thresh, direction, axis=axis) # Remove indices not in include or in exclude # TODO use sets instead if exclude is not None: to_drop = [i for i in to_drop if i not in exclude] elif include is not None: to_drop = [i for i in to_drop if i in include] # Drop rows messenger(f'Dropped {len(to_drop)} rows.') return data.drop(data.index[to_drop], axis=0)
def mk_output_dirs(self, collection: str = None, messenger: Optional[Callable] = Messenger( verbose=True, indent=0, msg_fn=print)): """ Create non-existing output directories. For filepaths, it creates the directory the file is located in. Parameters ---------- collection : str Name of collection to create output directories for. One of: ('out_dirs', 'out_files', 'tmp_files', 'mkdirs_for_tmp_dirs') When `None`, directories are created for all three collections. messenger : `utipy.Messenger` or None A `utipy.Messenger` instance used to print/log/... information. When `None`, no printing/logging is performed. The messenger determines the messaging function (e.g. `print`) and potential indentation. """ # Find which collections to create output dirs for mkdirs_for_out_files = True mkdirs_for_out_dirs = True mkdirs_for_tmp_files = True mkdirs_for_tmp_dirs = True if collection is not None: if collection not in [ "out_files", "out_dirs", "tmp_files", "tmp_dirs" ]: raise ValueError( f"`collection` must be one of the output path collections but was {collection}." ) if collection != "out_files": mkdirs_for_out_files = False if collection != "out_dirs": mkdirs_for_out_files = False if collection != "tmp_files": mkdirs_for_tmp_files = False if collection != "tmp_dirs": mkdirs_for_tmp_dirs = False # Create output directories if they don't exist # For output directories if mkdirs_for_out_dirs: out_dirs = self.get_collection("out_dirs") if out_dirs is None: raise ValueError("`out_dirs` collection was `None`.") for k, v in out_dirs.items(): mk_dir(path=v, arg_name=k, messenger=messenger) # For output files' directories if mkdirs_for_out_files: out_files = self.get_collection("out_files") if out_files is None: raise ValueError("`out_files` collection was `None`.") for k, v in out_files.items(): # Get directory the file should be place in dir_path = pathlib.Path(v).parent mk_dir(path=dir_path, arg_name=k, messenger=messenger) # For tmp directories if mkdirs_for_tmp_dirs: tmp_dirs = self.get_collection("tmp_dirs") if tmp_dirs is None: raise ValueError("`tmp_dirs` collection was `None`.") for k, v in tmp_dirs.items(): mk_dir(path=v, arg_name=k, messenger=messenger) # For tmp files' directories if mkdirs_for_tmp_files: tmp_files = self.get_collection("tmp_files") if tmp_files is None: raise ValueError("`tmp_files` collection was `None`.") for k, v in tmp_files.items(): # Get directory the file should be place in dir_path = pathlib.Path(v).parent mk_dir(path=dir_path, arg_name=k, messenger=messenger)