class Cursor(attrib.Container): """Cursor Class. Args: index: List of row IDs, that are traversed by the cursor. By default the attribute '_index' of the parent object is used. mode: Named string identifier for the cursor :py:attr:`.mode`. The default cursor mode is 'forward-only indexed'. Note: After initializing the curser, it's mode can not be changed anymore. """ # # Protected Class Variables # _default_mode: ClassVar[int] = CUR_MODE_INDEXED # # Public Attributes # mode: property = attrib.Virtual(fget='_get_mode') mode.__doc__ = """ The read-only string attribute *cursor mode* specifies the space separated *scrolling type* and the *operation mode* of the cursor. Supported scrolling types are: :forward-only: The default scrolling type of cursors is called a forward-only cursor and can move only forward through the result set. A forward-only cursor does not support scrolling but only fetching rows from the start to the end of the result set. :scrollable: A scrollable cursor is commonly used in screen-based interactive applications, like spreadsheets, in which users are allowed to scroll back and forth through the result set. However, applications should use scrollable cursors only when forward-only cursors will not do the job, as scrollable cursors are generally more expensive, than forward-only cursors. :random: Random cursors move randomly through the result set. In difference to a randomly sorted cursor, the rows are not unique and the number of fetched rows is not limited to the size of the result set. If the method :meth:`.fetch` is called with a zero value for size, a CursorModeError is raised. Supported operation modes are: :dynamic: A **dynamic cursor** is built on-the-fly and therefore comprises any changes made to the rows in the result set during it's traversal, including new appended rows and the order of it's traversal. This behaviour is regardless of whether the changes occur from inside the cursor or by other users from outside the cursor. Dynamic cursors are threadsafe but do not support counting filtered rows or sorting rows. :indexed: Indexed cursors (aka Keyset-driven cursors) are built on-the-fly with respect to an initial copy of the table index and therefore comprise changes made to the rows in the result set during it's traversal, but not new appended rows nor changes within their order. Keyset driven cursors are threadsafe but do not support sorting rows or counting filtered rows. :static: Static cursors are buffered and built during it's creation time and therfore always display the result set as it was when the cursor was first opened. Static cursors are not threadsafe but support counting the rows with respect to a given filter and sorting the rows. """ batchsize: property = attrib.MetaData(classinfo=int, default=1) """ The read-writable integer attribute *batchsize* specifies the default number of rows which is to be fetched by the method :meth:`.fetch`. It defaults to 1, meaning to fetch a single row at a time. Whether and which batchsize to use depends on the application and should be considered with care. The batchsize can also be adapted during the lifetime of the cursor, which allows dynamic performance optimization. """ rowcount: property = attrib.Virtual(fget='_get_rowcount') """ The read-only integer attribute *rowcount* specifies the current number of rows within the cursor. """ # # Protected Attributes # _mode: property = attrib.MetaData(classinfo=int, default=_default_mode) _index: property = attrib.MetaData(classinfo=list, inherit=True) _getter: property = attrib.Temporary(classinfo=CallableClasses) _filter: property = attrib.Temporary(classinfo=CallableClasses) _mapper: property = attrib.Temporary(classinfo=CallableClasses) _buffer: property = attrib.Temporary(classinfo=list, default=[]) # # Events # def __init__(self, index: OptIntList = None, getter: OptCallable = None, predicate: OptCallable = None, mapper: OptCallable = None, batchsize: OptInt = None, mode: OptStr = None, parent: Optional[attrib.Container] = None) -> None: """Initialize Cursor.""" super().__init__(parent=parent) # Parent is set by container if index is not None: self._index = index self._getter = getter self._filter = predicate self._mapper = mapper if mode: self._set_mode(mode) if batchsize: self.batchsize = batchsize if self._mode & CUR_MODE_INDEXED: self._create_index() if self._mode & CUR_MODE_BUFFERED: self._create_buffer() self.reset() # Initialize iterator def __iter__(self) -> Iterator: self.reset() return self def __next__(self) -> RowLike: return self.next() def __len__(self) -> int: return self.rowcount # # Public Methods # def reset(self) -> None: """Reset cursor position before the first record.""" mode = self._mode if mode & CUR_MODE_BUFFERED: # Iterate over fixed result set self._iter_buffer = iter(self._buffer) elif mode & CUR_MODE_INDEXED: # Iterate over fixed index self._iter_index = iter(self._index) else: # TODO: handle case for dynamic cursors by self._iter_table self._iter_index = iter(self._index) def next(self) -> RowLike: """Return next row that matches the given filter.""" mode = self._mode if mode & CUR_MODE_BUFFERED: return self._get_next_from_buffer() if mode & CUR_MODE_INDEXED: return self._get_next_from_fixed_index() # TODO: For dynamic cursors implement _get_next_from_dynamic_index() return self._get_next_from_fixed_index() def fetch(self, size: OptInt = None) -> RowLikeList: """Fetch rows from the result set. Args: size: Integer value, which represents the number of rows, which is fetched from the result set. For the given size 0 all remaining rows from the result set are fetched. By default the number of rows is given by the cursors batchsize. """ if size is None: size = self.batchsize if self._mode & CUR_MODE_RANDOM and size <= 0: raise CursorModeError(self.mode, 'fetching all rows') finished = False results: RowLikeList = [] while not finished: try: results.append(self.next()) except StopIteration: finished = True else: finished = 0 < size <= len(results) return results # # Protected Methods # def _get_next_from_fixed_index(self) -> RowLike: is_random = self._mode & CUR_MODE_RANDOM matches = False while not matches: if is_random: row_id = random.randrange(len(self._index)) else: row_id = next(self._iter_index) row = self._getter(row_id) if self._filter: matches = self._filter(row) else: matches = True if self._mapper: return self._mapper(row) return row def _get_next_from_buffer(self) -> RowLike: if self._mode & CUR_MODE_RANDOM: row_id = random.randrange(len(self._buffer)) return self._buffer[row_id] return next(self._iter_buffer) def _get_mode(self) -> str: mode = self._mode tokens = [] # Add name of traversal mode if mode & CUR_MODE_RANDOM: tokens.append('random') elif mode & CUR_MODE_SCROLLABLE: tokens.append('scrollable') # Add name of operation mode if mode & CUR_MODE_BUFFERED: tokens.append('static') elif mode & CUR_MODE_INDEXED: tokens.append('indexed') else: tokens.append('dynamic') return ' '.join(tokens) def _set_mode(self, name: str) -> None: mode = 0 name = name.strip(' ').lower() # Set traversal mode flags if 'random' in name: mode |= CUR_MODE_RANDOM elif 'scrollable' in name: mode |= CUR_MODE_SCROLLABLE # Set operation mode flags if 'static' in name: mode |= CUR_MODE_BUFFERED | CUR_MODE_INDEXED elif 'indexed' in name: mode |= CUR_MODE_INDEXED self._mode = mode def _get_rowcount(self) -> int: mode = self._mode if mode & CUR_MODE_RANDOM: raise CursorModeError(self.mode, 'counting rows') if mode & CUR_MODE_BUFFERED: return len(self._buffer) if self._filter: raise CursorModeError(self.mode, 'counting filtered rows') return len(self._index) def _create_index(self) -> None: self._index = self._index.copy() def _create_buffer(self) -> None: cur = self.__class__( # Create new dynamic cursor index=self._index, getter=self._getter, predicate=self._filter, mapper=self._mapper) self._buffer = cur.fetch(0) # Fetch all from result set
class Cursor(attrib.Container, ABC): """Database Cursor. These objects represent a database cursor, which is used to manage the context of a fetch operation. Cursors created from the same connection are not isolated, i.e., any changes done to the database by a cursor are immediately visible by the other cursors. Cursors created from different connections can or can not be isolated, depending on how the transaction support is implemented (see also the connection's .rollback() and .commit() methods). """ # # Cursor attributes # arraysize: property = attrib.MetaData(classinfo=int, default=1) arraysize.__doc__ = """ This read/write attribute specifies the number of rows to fetch at a time with `fetchmany`. It defaults to 1 meaning to fetch a single row at a time. Implementations must observe this value with respect to the `fetchmany` method, but are free to interact with the database a single row at a time. It may also be used in the implementation of `executemany`. """ description: property = attrib.Virtual(fget='_get_description') description.__doc__ = """ Sequence of 7-item sequences containing information about one result column: name, type_code, display_size, internal_size, precision, scale, null_ok The first two items (name and type_code) are mandatory, the other five are optional and are set to None if no meaningful values can be provided. This attribute will be None for operations that do not return rows or if the cursor has not had an operation invoked via the .execute*() method yet. """ @abstractmethod def _get_description(self) -> list: pass rowcount: property = attrib.Virtual(fget='_get_rowcount') description.__doc__ = """ This read-only attribute specifies the number of rows that the last execute*() produced (for DQL statements like SELECT) or affected (for DML statements like UPDATE or INSERT). The attribute is -1 in case no .execute*() has been performed on the cursor or the rowcount of the last operation is cannot be determined by the interface. """ @abstractmethod def _get_rowcount(self) -> int: pass # # Cursor Methods # @abstractmethod def callproc(self, procname: str, *args: Any, **kwds: Any) -> Any: """Call stored database procedure. Call a stored database procedure with the given name. The sequence of parameters must contain one entry for each argument, that the procedure expects. The result of the call is returned as modified copy of the input sequence. Input parameters are left untouched, output and input/output parameters replaced with possibly new values. The procedure may also provide a result set as output. This must then be made available through the standard .fetch*() methods. If the database does not support the functionality required by the method, the interface should throw an exception in case the method is used. """ pass @abstractmethod def close(self) -> None: """Close the cursor now (rather than whenever __del__ is called). The cursor will be unusable from this point forward; an Error (or subclass) exception will be raised if any operation is attempted with the cursor. """ pass @abstractmethod def execute(self, operation: str, *args: Any) -> Any: """Prepare and execute a database operation (query or command). Parameters may be provided as sequence or mapping and will be bound to variables in the operation. Variables are specified in a database-specific notation, which is identified by the module global `paramstyle`. A reference to the operation will be retained by the cursor. If the same operation object is passed in again, then the cursor can optimize its behavior. This is most effective for algorithms where the same operation is used, but different parameters are bound to it. For maximum efficiency when reusing an operation, it is best to use the `setinputsizes` method to specify the parameter types and sizes ahead of time. It is legal for a parameter to not match the predefined information; the implementation should compensate, possibly with a loss of efficiency. The parameters may also be specified as list of tuples to e.g. insert multiple rows in a single operation, but this kind of usage is deprecated: .executemany() should be used instead. """ pass @abstractmethod def executemany(self, operation: str, seq_of_parameters: list) -> Any: """Prepare and execute database operation for multiple parameters. Prepare a database operation (query or command) and then execute it against all parameter sequences or mappings found in the sequence *seq_of_parameters*. Modules are free to implement this method using multiple calls to the `execute` method or by using array operations to have the database process the sequence as a whole in one call. Use of this method for an operation which produces one or more result sets constitutes undefined behavior, and the implementation is permitted (but not required) to raise an exception when it detects that a result set has been created by an invocation of the operation. """ pass @abstractmethod def fetchone(self) -> OptList: """Fetch the next row of a query result. Fetch the next row of a query result set, returning a single sequence, or None when no more data is available. An Error (or subclass) exception is raised if the previous call to `execute` did not produce any result set or no call was issued yet. """ pass @abstractmethod def fetchmany(self, size: OptInt) -> list: """Fetch the next set of rows of a query result. Fetch the next set of rows of a query result, returning a sequence of sequences (e.g. a list of tuples). An empty sequence is returned when no more rows are available. The number of rows to fetch per call is specified by the parameter. If it is not given, the cursor's arraysize determines the number of rows to be fetched. The method should try to fetch as many rows as indicated by the size parameter. If this is not possible due to the specified number of rows not being available, fewer rows may be returned. An Error (or subclass) exception is raised if the previous call to `execute` did not produce any result set or no call was issued yet. Note there are performance considerations involved with the size parameter. For optimal performance, it is usually best to use the `arraysize` attribute. If the size parameter is used, then it is best for it to retain the same value from one `fetchmany` call to the next. """ pass @abstractmethod def fetchall(self) -> list: """Fetch all remaining rows of a query result. Fetch all remaining rows of a query result, returning them as a sequence of sequences (e.g. a list of tuples). Note that the cursor's arraysize attribute can affect the performance of this operation. An Error (or subclass) exception is raised if the previous call to `execute` did not produce any result set or no call was issued yet. """ pass @abstractmethod def nextset(self) -> OptBool: """Skip cursor to the next available set (if supported). This method will make the cursor skip to the next available set, discarding any remaining rows from the current set. If there are no more sets, the method returns None. Otherwise, it returns a true value and subsequent calls to the `fetch*` methods will return rows from the next result set. An Error (or subclass) exception is raised if the previous call to an `execute*` method did not produce any result set or no call was issued yet. If the database does not support the functionality required by the method, the interface should throw an exception in case the method is used. """ pass @abstractmethod def setinputsizes(self, sizes: list) -> None: """Set input sizes for database operations (query or command). This can be used before a call to `execute*` to predefine memory areas for the operation's parameters. *sizes* is specified as a sequence with one item for each input parameter. The item should be a type object that corresponds to the input that will be used, or it should be an integer specifying the maximum length of a string parameter. If the item is None, then no predefined memory area will be reserved for that column (this is useful to avoid predefined areas for large inputs). This method would be used before the `execute*` method is invoked. Implementations are free to have this method do nothing and users are free to not use it. """ pass @abstractmethod def setoutputsize(self, size: int, column: OptInt) -> None: """Set a column buffer size for fetches of large columns. The column is specified as an index into the result sequence. Not specifying the column will set the default size for all large columns in the cursor. This method would be used before an `execute*` method is invoked. Implementations are free to have this method do nothing and users are free to not use it. """ pass
class Session(attrib.Container): """Session.""" # # Private Class Variables # _config_file_path: ClassVar[str] = '%user_config_dir%/nemoa.ini' _config_file_struct: ClassVar[SecDict] = { 'session': { 'path': Path, 'restore_on_startup': bool, 'autosave_on_exit': bool } } _default_config: ClassVar[StrDict] = { 'path': None, 'restore_on_startup': False, 'autosave_on_exit': False } _default_paths: StrList = [ '%user_data_dir%', '%site_data_dir%', '%package_data_dir%' ] # # Public Attributes and Attribute Groups # dc: attrib.Group = attrib.create_group(attrib.DCGroup, remote=True) config: property = attrib.MetaData(classinfo=dict) config.__doc__ = """Session configuration.""" paths: property = attrib.MetaData(classinfo=list) paths.__doc__ = """Search paths for workspaces.""" files: property = attrib.Virtual(fget='_get_files') files.__doc__ = """Files within the current workspace.""" folders: property = attrib.Virtual(fget='_get_folders') folders.__doc__ = """Folders within the current workspace.""" path: property = attrib.Virtual(fget='_get_path') path.__doc__ = """Filepath of the current workspace.""" logger: property = attrib.Temporary(classinfo=log.Logger) logger.__doc__ = """Logger instance.""" # # Protected Attributes # _ws: property = attrib.Content(classinfo=wsfile.WsFile) # # Events # def __init__(self, workspace: OptPathLike = None, basedir: OptPathLike = None, pwd: OptBytes = None) -> None: """Initialize instance variables and load workspace from file.""" super().__init__() # Initialize instance variables with default values self.config = self._default_config.copy() self._ws = wsfile.WsFile() self.paths = [env.expand(path) for path in self._default_paths] self.logger = log.get_instance() # Bind session to workspace self.parent = self._ws # Load session configuration from file if env.is_file(self._config_file_path): self._load_config() # Load workspace from file filepath: OptPath = None if workspace and isinstance(workspace, (Path, str)): filepath = Path(workspace) elif self.config.get('restore_on_startup'): cfg_path = self.config.get('path') if isinstance(cfg_path, (Path, str)): filepath = Path(cfg_path) if isinstance(filepath, Path): self.load(workspace=filepath, basedir=basedir, pwd=pwd) def __enter__(self) -> 'Session': """Enter with statement.""" return self def __exit__(self, cls: ExcType, obj: Exc, tb: Traceback) -> None: """Exit with statement.""" self.close() # Close Workspace self._save_config() # Save config def __del__(self) -> None: """Run destructor for instance.""" # # Public Methods # def load(self, workspace: OptPathLike = None, basedir: OptPathLike = None, pwd: OptBytes = None) -> None: """Load Workspace from file. Args: workspace: basedir: pwd: Bytes representing password of workspace file. """ path = self._locate_path(workspace=workspace, basedir=basedir) self._ws = wsfile.WsFile(filepath=path, pwd=pwd) self.parent = self._ws def save(self) -> None: """Save Workspace to current file.""" self._ws.save() def saveas(self, filepath: PathLike) -> None: """Save the workspace to a file. Args: filepath: String or :term:`path-like object`, that represents the name of a workspace file. """ self._ws.saveas(filepath) def close(self) -> None: """Close current session.""" if self.config.get('autosave_on_exit') and self._ws.changed: self.save() if hasattr(self._ws, 'close'): self._ws.close() def get_file_accessor(self, path: PathLike) -> FileAccessorBase: """Get file accessor to workspace member. Args: path: String or :term:`path-like object`, that represents a workspace member. In reading mode the path has to point to a valid workspace file, or a FileNotFoundError is raised. In writing mode the path by default is treated as a file path. New directories can be written by setting the argument is_dir to True. Returns: :class:`File accessor <nemoa.types.FileAccessorBase>` to workspace member. """ return self._ws.get_file_accessor(path) def open(self, filepath: PathLike, workspace: OptPathLike = None, basedir: OptPathLike = None, pwd: OptBytes = None, mode: str = '', encoding: OptStr = None, is_dir: bool = False) -> FileLike: """Open file within current or given workspace. Args: filepath: String or :term:`path-like object`, that represents a workspace member. In reading mode the path has to point to a valid workspace file, or a FileNotFoundError is raised. In writing mode the path by default is treated as a file path. New directories can be written by setting the argument is_dir to True. workspace: basedir: mode: String, which characters specify the mode in which the file is to be opened. The default mode is reading in text mode. Suported characters are: 'r': Reading mode (default) 'w': Writing mode 'b': Binary mode 't': Text mode (default) encoding: In binary mode encoding has not effect. In text mode encoding specifies the name of the encoding, which in reading and writing mode respectively is used to decode the stream’s bytes into strings, and to encode strings into bytes. By default the preferred encoding of the operating system is used. is_dir: Boolean value which determines, if the path is to be treated as a directory or not. This information is required for writing directories to the workspace. The default behaviour is not to treat paths as directories. Returns: Context manager for :term:`file object` in reading or writing mode. """ if workspace: path = self._locate_path(workspace=workspace, basedir=basedir) ws = wsfile.WsFile(filepath=path, pwd=pwd) return ws.open(filepath, mode=mode, encoding=encoding, is_dir=is_dir) return self._ws.open(filepath, mode=mode, encoding=encoding, is_dir=is_dir) def append(self, source: PathLike, target: OptPathLike = None) -> bool: """Append file to the current workspace. Args: source: String or :term:`path-like object`, that points to a valid file in the directory structure if the system. If the file does not exist, a FileNotFoundError is raised. If the filepath points to a directory, a IsADirectoryError is raised. target: String or :term:`path-like object`, that points to a valid directory in the directory structure of the workspace. By default the root directory is used. If the directory does not exist, a FileNotFoundError is raised. If the target directory already contains a file, which name equals the filename of the source, a FileExistsError is raised. Returns: Boolean value which is True if the file has been appended. """ return self._ws.append(source, target=target) def unlink(self, filepath: PathLike, ignore_missing: bool = True) -> bool: """Remove file from the current workspace. Args: filepath: String or :term:`path-like object`, that points to a file in the directory structure of the workspace. If the filapath points to a directory, an IsADirectoryError is raised. For the case, that the file does not exist, the argument ignore_missing determines, if a FileNotFoundError is raised. ignore_missing: Boolean value which determines, if FileNotFoundError is raised, if the target file does not exist. The default behaviour, is to ignore missing files. Returns: Boolean value, which is True if the given file was removed. """ return self._ws.unlink(filepath, ignore_missing=ignore_missing) def mkdir(self, dirpath: PathLike, ignore_exists: bool = False) -> bool: """Create a new directory in current workspace. Args: dirpath: String or :term:`path-like object`, that represents a valid directory name in the directory structure of the workspace. If the directory already exists, the argument ignore_exists determines, if a FileExistsError is raised. ignore_exists: Boolean value which determines, if FileExistsError is raised, if the target directory already exists. The default behaviour is to raise an error, if the file already exists. Returns: Boolean value, which is True if the given directory was created. """ return self._ws.mkdir(dirpath, ignore_exists=ignore_exists) def rmdir(self, dirpath: PathLike, recursive: bool = False, ignore_missing: bool = False) -> bool: """Remove directory from current workspace. Args: dirpath: String or :term:`path-like object`, that points to a directory in the directory structure of the workspace. If the directory does not exist, the argument ignore_missing determines, if a FileNotFoundError is raised. ignore_missing: Boolean value which determines, if FileNotFoundError is raised, if the target directory does not exist. The default behaviour, is to raise an error if the directory is missing. recursive: Boolean value which determines, if directories are removed recursively. If recursive is False, then only empty directories can be removed. If recursive, however, is True, then all files and subdirectories are alse removed. By default recursive is False. Returns: Boolean value, which is True if the given directory was removed. """ return self._ws.rmdir(dirpath, recursive=recursive, ignore_missing=ignore_missing) def search(self, pattern: OptStr = None) -> StrList: """Search for files in the current workspace. Args: pattern: Search pattern that contains Unix shell-style wildcards: '*': Matches arbitrary strings '?': Matches single characters [seq]: Matches any character in seq [!seq]: Matches any character not in seq By default a list of all files and directories is returned. Returns: List of files and directories in the directory structure of the workspace, that match the search pattern. """ return self._ws.search(pattern) def copy(self, source: PathLike, target: PathLike) -> bool: """Copy file within current workspace. Args: source: String or :term:`path-like object`, that points to a file in the directory structure of the workspace. If the file does not exist, a FileNotFoundError is raised. If the filepath points to a directory, an IsADirectoryError is raised. target: String or :term:`path-like object`, that points to a new filename or an existing directory in the directory structure of the workspace. If the target is a directory the target file consists of the directory and the basename of the source file. If the target file already exists a FileExistsError is raised. Returns: Boolean value which is True if the file was copied. """ return self._ws.copy(source, target) def move(self, source: PathLike, target: PathLike) -> bool: """Move file within current workspace. Args: source: String or :term:`path-like object`, that points to a file in the directory structure of the workspace. If the file does not exist, a FileNotFoundError is raised. If the filepath points to a directory, an IsADirectoryError is raised. target: String or :term:`path-like object`, that points to a new filename or an existing directory in the directory structure of the workspace. If the target is a directory the target file consists of the directory and the basename of the source file. If the target file already exists a FileExistsError is raised. Returns: Boolean value which is True if the file has been moved. """ return self._ws.move(source, target) def read_text(self, filepath: PathLike, encoding: OptStr = None) -> str: """Read text from file in current workspace. Args: filepath: String or :term:`path-like object`, that points to a valid file in the directory structure of the workspace. If the file does not exist a FileNotFoundError is raised. encoding: Specifies the name of the encoding, which is used to decode the stream’s bytes into strings. By default the preferred encoding of the operating system is used. Returns: Contents of the given filepath encoded as string. """ return self._ws.read_text(filepath, encoding=encoding) def read_bytes(self, filepath: PathLike) -> bytes: """Read bytes from file in current workspace. Args: filepath: String or :term:`path-like object`, that points to a valid file in the dirctory structure of the workspace. If the file does not exist a FileNotFoundError is raised. Returns: Contents of the given filepath as bytes. """ return self._ws.read_bytes(filepath) def write_text(self, text: str, filepath: PathLike, encoding: OptStr = None) -> int: """Write text to file. Args: text: String, which has to be written to the given file. filepath: String or :term:`path-like object`, that represents a valid filename in the dirctory structure of the workspace. encoding: Specifies the name of the encoding, which is used to encode strings into bytes. By default the preferred encoding of the operating system is used. Returns: Number of characters, that are written to the file. """ return self._ws.write_text(text, filepath, encoding=encoding) def write_bytes(self, data: BytesLike, filepath: PathLike) -> int: """Write bytes to file. Args: data: Bytes, which are to be written to the given file. filepath: String or :term:`path-like object`, that represents a valid filename in the dirctory structure of the workspace. Returns: Number of bytes, that are written to the file. """ return self._ws.write_bytes(data, filepath) def log(self, level: StrOrInt, msg: str, *args: Any, **kwds: Any) -> None: """Log event. Args: level: Integer value or string, which describes the severity of the event. Ordered by ascending severity, the allowed level names are: 'DEBUG', 'INFO', 'WARNING', 'ERROR' and 'CRITICAL'. The respectively corresponding level numbers are 10, 20, 30, 40 and 50. msg: Message ``format string``_, which may can contain literal text or replacement fields delimited by braces. Each replacement field contains either the numeric index of a positional argument, given by *args, or the name of a keyword argument, given by the keyword *extra*. *args: Arguments, which can be used by the message format string. **kwds: Additional Keywords, used by the function `Logger.log()`_. """ self.logger.log(level, msg, *args, **kwds) # # Private Methods # def _load_config(self) -> None: config = inifile.load(self._config_file_path, self._config_file_struct) if 'session' in config and isinstance(config['session'], dict): for key, val in config['session'].items(): self.config[key] = val def _save_config(self) -> None: config = {'session': self.config} inifile.save(config, self._config_file_path) def _get_path(self) -> OptPath: return self._ws.path def _get_files(self) -> StrList: return self._ws.search() def _get_folders(self) -> StrList: return self._ws.folders def _locate_path(self, workspace: OptPathLike = None, basedir: OptPathLike = None) -> OptPath: if not workspace: return None if not basedir: # If workspace is a fully qualified file path in the directory # structure of the system, ignore the 'paths' list if env.is_file(workspace): return env.expand(workspace) # Use the 'paths' list to find a workspace for path in self.paths: candidate = Path(path, workspace) if candidate.is_file(): return candidate raise FileNotFoundError(f"file {workspace} does not exist") return Path(basedir, workspace)
class CSVFile(attrib.Container): """CSV-File Class. Args: file: String or :term:`path-like object`, which points to a readable CSV-file in the directory structure of the system, or a :term:`file object` in reading mode. delim: String containing CSV-delimiter. By default the CSV-delimiter is detected from the CSV-file. labels: List of column labels in CSV-file. By default the list of column labels is taken from the first content line in the CSV-file. usecols: Indices of the columns which are to be imported from the file. By default all columns are imported. namecol: Column ID of column, which contains the row annotation. By default the first column is used for annotation. """ # # Class Variables # _delim_candidates: ClassVar[StrList] = [',', '\t', ';', ' ', ':'] """ Optional list of strings containing delimiter candidates to search for. Default: [',', '\t', ';', ' ', ':'] """ _delim_mincount: ClassVar[int] = 3 """ Minimum number of lines used to detect CSV delimiter. Thereby only non comment and non empty lines are used. """ _delim_maxcount: ClassVar[int] = 100 """ Maximum number of lines used to detect CSV delimiter. Thereby only non comment and non empty lines are used. """ # # Public Attributes # comment: property = attrib.Virtual(fget='_get_comment') comment.__doc__ = """ String containing the initial '#' lines of the CSV-file or an empty string, if no initial comment lines could be detected. """ delim: property = attrib.Virtual(fget='_get_delim') delim.__doc__ = """ Delimiter string of the CSV-file or None, if the delimiter could not be detected. """ format: property = attrib.Virtual(fget='_get_format') format.__doc__ = """ CSV-Header format. The following formats are supported: 0: :RFC:`4180`: The column header equals the size of the rows. 1: `R-Table`: The column header has a size that is reduced by one, compared to the rows. This smaller number of entries follows by the convention, that in R the CSV export of tables adds an extra column with row names as the first column. The column name of this column is omitted within the header. """ colnames: property = attrib.Virtual(fget='_get_colnames') colnames.__doc__ = """ List of strings containing column names from first non comment, non empty line of CSV-file. """ fields: property = attrib.Virtual(fget='_get_fields') colnames.__doc__ = """ List of pairs containing the column names and the estimated or given column types of the CSV-file. """ rownames: property = attrib.Virtual(fget='_get_rownames') rownames.__doc__ = """ List of strings containing row names from column with id given by namecol or None, if namecol is not given. """ namecol: property = attrib.Virtual(fget='_get_namecol') namecol.__doc__ = """ Index of the column of a CSV-file that contains the row names. The value None is used for CSV-files that do not contain row names. """ # # Protected Attributes # _file: property = attrib.Content(classinfo=TextFileClasses) _comment: property = attrib.MetaData(classinfo=str, default=None) _delim: property = attrib.MetaData(classinfo=str, default=None) _format: property = attrib.MetaData(classinfo=str, default=None) _colnames: property = attrib.MetaData(classinfo=list, default=None) _rownames: property = attrib.MetaData(classinfo=list, default=None) _namecol: property = attrib.MetaData(classinfo=int, default=None) # # Events # def __init__(self, file: FileRef, mode: str = '', comment: OptStr = None, delim: OptStr = None, csvformat: OptInt = None, labels: OptStrList = None, usecols: OptIntTuple = None, namecol: OptInt = None) -> None: """Initialize instance attributes.""" super().__init__() self._file = file self._comment = comment self._delim = delim self._csvformat = csvformat self._colnames = labels self._namecol = namecol # # Public Methods # def select(self, columns: OptStrTuple = None) -> OptNpArray: """Load numpy ndarray from CSV-file. Args: columns: List of column labels in CSV-file. By default the list of column labels is taken from the first content line in the CSV-file. Returns: :class:`numpy.ndarray` containing data from CSV-file, or None if the data could not be imported. """ # Check type of 'cols' check.has_opt_type("'columns'", columns, tuple) # Get column names and formats usecols = self._get_usecols(columns) colnames = self._get_colnames() names = tuple(colnames[colid] for colid in usecols) lblcol = self._get_namecol() if lblcol is None: formats = tuple(['<f8'] * len(usecols)) elif lblcol not in usecols: formats = tuple(['<U12'] + ['<f8'] * len(usecols)) names = ('label', ) + names usecols = (lblcol, ) + usecols else: lbllbl = colnames[lblcol] formats = tuple(['<U12'] + ['<f8'] * (len(usecols) - 1)) names = tuple(['label'] + [l for l in names if l != lbllbl]) usecols = tuple([lblcol] + [c for c in usecols if c != lblcol]) # Import data from CSV-file as numpy array with textfile.openx(self._file, mode='r') as fh: return np.loadtxt(fh, skiprows=self._get_skiprows(), delimiter=self._get_delim(), usecols=usecols, dtype={ 'names': names, 'formats': formats }) @contextmanager def open(self, mode: str = '', columns: OptStrTuple = None) -> IterCSVIOBase: """Open CSV-file in reading or writing mode. Args: mode: String, which characters specify the mode in which the file is to be opened. The default mode is reading mode. Supported characters are: 'r': Reading mode (default) 'w': Writing mode columns: Yields: :term:`File object`, that supports the given mode. """ # Open file handler fh: CSVIOBase if 'w' in mode: if 'r' in mode: raise ValueError( "'mode' is not allowed to contain characters 'r' AND 'w'") fh = self._open_write() else: fh = self._open_read(columns) try: yield fh finally: fh.close() def read(self) -> List[tuple]: with self.open(mode='r') as fp: content = [row for row in fp] return content def write(self, rows: List[Iterable]) -> None: with self.open(mode='w') as fp: for row in rows: fp.write_row(row) # # Protected Methods # def _get_comment(self) -> str: # Return comment if set manually if self._comment is not None: return self._comment return textfile.get_comment(self._file) def _get_delim(self) -> OptStr: # Return delimiter if set manually if self._delim is not None: return self._delim # Initialize CSV-Sniffer with default values sniffer = csv.Sniffer() sniffer.preferred = self._delim_candidates delim: OptStr = None # Detect delimiter with textfile.openx(self._file, mode='r') as fd: size, probe = 0, '' for line in fd: # Check termination criteria if size > self._delim_maxcount: break # Check exclusion criteria strip = line.strip() if not strip or strip.startswith('#'): continue # Increase probe size probe += line size += 1 if size <= self._delim_mincount: continue # Try to detect delimiter from probe using csv.Sniffer try: dialect = sniffer.sniff(probe) except csv.Error: continue delim = dialect.delimiter break return delim def _get_format(self) -> OptInt: # Return value if set manually if self._csvformat is not None: return self._csvformat # Get first and second content lines (non comment, non empty) of # CSV-file lines = textfile.get_content(self._file, lines=2) if len(lines) != 2: return None # Determine column label format delim = self.delim if lines[0].count(delim) == lines[1].count(delim): return CSV_FORMAT_STANDARD if lines[0].count(delim) == lines[1].count(delim) - 1: return CSV_FORMAT_RTABLE return None def _get_colnames(self) -> StrList: # Return value if set manually if self._colnames is not None: return self._colnames # Get first content line (non comment, non empty) of CSV-file line = textfile.get_content(self._file, lines=1)[0] # Get column names from first content line names = [col.strip('\"\'\n\r\t ') for col in line.split(self.delim)] # Format column labels if self.format == CSV_FORMAT_STANDARD: return names if self.format == CSV_FORMAT_RTABLE: return [''] + names raise BadCSVFile(f"file {self._file.name} is not valid") def _get_fields(self) -> Fields: colnames = self.colnames delim = self.delim lines = textfile.get_content(self._file, lines=3) if len(lines) != 3: return [] row1 = lines[1].split(delim) row2 = lines[2].split(delim) fields = [] for colname, str1, str2 in zip(colnames, row1, row2): type1 = literal.estimate(str1) if type1: type2 = literal.estimate(str1) if type2 == type1: fields.append((colname, type1)) continue fields.append((colname, str)) return fields def _get_rownames(self) -> OptList: # Check type of 'cols' lblcol = self._get_namecol() if lblcol is None: return None lbllbl = self.colnames[lblcol] # Import CSV-file to NumPy ndarray with textfile.openx(self._file, mode='r') as fh: rownames = np.loadtxt(fh, skiprows=self._get_skiprows(), delimiter=self._get_delim(), usecols=(lblcol, ), dtype={ 'names': (lbllbl, ), 'formats': ('<U12', ) }) return [name[0] for name in rownames.flat] def _get_skiprows(self) -> int: # Count how many 'comment' and 'blank' rows are to be skipped skiprows = 1 with textfile.openx(self._file, mode='r') as fd: for line in fd: strip = line.strip() if not strip or strip.startswith('#'): skiprows += 1 continue break return skiprows def _get_namecol(self) -> OptInt: # Return value if set manually if self._namecol is not None: return self._namecol # In R-tables the first column is always used for record names if self.format == CSV_FORMAT_RTABLE: return 0 # Get first and second content lines (non comment, non empty) of # CSV-file lines = textfile.get_content(self._file, lines=2) if len(lines) != 2: return None # Determine annotation column id from first value in the second line, # which can not be converted to a float values = [col.strip('\"\' \n') for col in lines[1].split(self.delim)] for cid, val in enumerate(values): try: float(val) except ValueError: return cid return None def _get_usecols(self, columns: OptStrTuple = None) -> IntTuple: # Get column labels colnames = self._get_colnames() if not columns: return tuple(range(len(colnames))) # Check if columns exist check.is_subset("'columns'", set(columns), 'colnames', set(colnames)) return tuple(colnames.index(col) for col in columns) def _get_fmt_params(self) -> StrDict: return {'delimiter': self.delim} def _open_read(self, columns: OptStrTuple = None) -> CSVReader: usecols = self._get_usecols(columns) skiprows = self._get_skiprows() fields = self.fields usefields = [fields[colid] for colid in usecols] fmt = self._get_fmt_params() return CSVReader(self._file, skiprows=skiprows, usecols=usecols, fields=usefields, **fmt) def _open_write(self, columns: OptStrTuple = None) -> CSVWriter: fmt = self._get_fmt_params() return CSVWriter(self._file, header=self.colnames, comment=self.comment, **fmt)
class WsFile(attrib.Container): """Workspace File. Workspace files are Zip-Archives, that contain a INI-formatted configuration file 'workspace.ini' in the archives root, and arbitrary resource files within subfolders. Args: filepath: String or :term:`path-like object`, that points to a valid workspace file or None. If the filepath points to a valid workspace file, then the class instance is initialized with a memory copy of the file. If the given file, however, does not exist, isn't a valid ZipFile, or does not contain a workspace configuration, respectively one of the errors FileNotFoundError, BadZipFile or BadWsFile is raised. The default behaviour, if the filepath is None, is to create an empty workspace in the memory, that uses the default folders layout. In this case the attribute maintainer is initialized with the current username. pwd: Bytes representing password of workspace file. """ # # Protected Class Variables # _config_file: ClassVar[Path] = Path('workspace.ini') _default_config: ClassVar[ConfigDict] = { 'dc': { 'creator': env.get_username(), 'date': datetime.datetime.now()}} _default_dir_layout: ClassVar[StrList] = [ 'dataset', 'network', 'system', 'model', 'script'] _default_encoding = env.get_encoding() # # Public Attributes and Attribute Groups # dc: attrib.Group = attrib.create_group(attrib.DCGroup) startup: property = attrib.MetaData(classinfo=Path, category='hooks') startup.__doc__ = """ The startup script is a path, that points to a python script inside the workspace, which is executed after loading the workspace. """ path: property = attrib.Virtual(fget='_get_path') path.__doc__ = """Filepath of the workspace.""" name: property = attrib.Virtual(fget='_get_name') name.__doc__ = """Filename of the workspace without file extension.""" files: property = attrib.Virtual(fget='search') files.__doc__ = """List of all files within the workspace.""" folders: property = attrib.Virtual(fget='_get_folders') folders.__doc__ = """List of all folders within the workspace.""" changed: property = attrib.Virtual(fget='_get_changed') changed.__doc__ = """Tells whether the workspace file has been changed.""" # # Protected Attributes # _file: property = attrib.Content(classinfo=ZipFile) _buffer: property = attrib.Content(classinfo=BytesIOBaseClass) _path: property = attrib.Temporary(classinfo=Path) _pwd: property = attrib.Temporary(classinfo=bytes) _changed: property = attrib.Temporary(classinfo=bool, default=False) # # Events # def __init__( self, filepath: OptPathLike = None, pwd: OptBytes = None, parent: Optional[attrib.Container] = None) -> None: """Load Workspace from file.""" super().__init__() if filepath: self.load(filepath, pwd=pwd) else: self._create_new() def __enter__(self) -> 'WsFile': """Enter with statement.""" return self def __exit__(self, cls: ExcType, obj: Exc, tb: Traceback) -> None: """Close workspace file and buffer.""" self.close() # # Public Methods # def load(self, filepath: PathLike, pwd: OptBytes = None) -> None: """Load Workspace from file. Args: filepath: String or :term:`path-like object`, that points to a valid workspace file. If the filepath points to a valid workspace file, then the class instance is initialized with a memory copy of the file. If the given file, however, does not exist, isn't a valid ZipFile, or does not contain a workspace configuration, respectively one of the errors FileNotFoundError, BadZipFile or BadWsFile is raised. pwd: Bytes representing password of workspace file. """ # Initialize instance Variables, Buffer and buffered ZipFile self._changed = False self._path = env.expand(filepath) self._pwd = pwd self._buffer = BytesIO() self._file = ZipFile(self._buffer, mode='w') # Copy contents from ZipFile to buffered ZipFile with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) try: with ZipFile(self.path, mode='r') as fh: for zinfo in fh.infolist(): data = fh.read(zinfo, pwd=pwd) # TODO ([email protected]): The zipfile standard # module currently does not support encryption in write # mode of new ZipFiles. See: # https://docs.python.org/3/library/zipfile.html # When support is provided, the below line for writing # files shall be replaced by: # self._file.writestr(zinfo, data, pwd=pwd) self._file.writestr(zinfo, data) except FileNotFoundError as err: raise FileNotFoundError( f"file '{self.path}' does not exist") from err except BadZipFile as err: raise BadZipFile( f"file '{self.path}' is not a valid ZIP file") from err # Try to open and load workspace configuration from buffer structure = { 'dc': self._get_attr_types(group='dc'), 'hooks': self._get_attr_types(category='hooks')} try: with self.open(self._config_file) as file: cfg = inifile.load(file, structure=structure) except KeyError as err: raise BadWsFile( f"workspace '{self.path}' is not valid: " f"file '{self._config_file}' could not be loaded") from err # Link configuration self._set_attr_values(cfg.get('dc', {}), group='dc') # type: ignore def save(self) -> None: """Save the workspace to it's filepath.""" if isinstance(self.path, Path): self.saveas(self.path) else: raise FileNotGivenError( "use saveas() to save the workspace to a file") def saveas(self, filepath: PathLike) -> None: """Save the workspace to a file. Args: filepath: String or :term:`path-like object`, that represents the name of a workspace file. """ path = env.expand(filepath) # Update datetime self.date = datetime.datetime.now() # Update 'workspace.ini' with self.open(self._config_file, mode='w') as file: inifile.save({ 'dc': self._get_attr_values(group='dc'), 'hooks': self._get_attr_values(category='hooks')}, file) # Remove duplicates from workspace self._remove_duplicates() # Mark plattform, which created the files as Windows # to avoid inference of wrong Unix permissions for zinfo in self._file.infolist(): zinfo.create_system = 0 # Close ZipArchive (to allow to read the buffer) self._file.close() # Read buffer and write workspace file if not isinstance(self._buffer, BytesIO): raise TypeError("buffer has not been initialized") with open(path, 'wb') as file: file.write(self._buffer.getvalue()) # Close buffer self._buffer.close() # Reload saved workpace from file self.load(path, pwd=self._pwd) def get_file_accessor(self, path: PathLike) -> FileAccessorBase: """Get file accessor to workspace member. Args: path: String or :term:`path-like object`, that represents a workspace member. In reading mode the path has to point to a valid workspace file, or a FileNotFoundError is raised. In writing mode the path by default is treated as a file path. New directories can be written by setting the argument is_dir to True. Returns: :class:`File accessor <nemoa.types.FileAccessorBase>` to workspace member. """ def wrap_open(path: PathLike) -> AnyFunc: def wrapped_open( obj: FileAccessorBase, *args: Any, **kwds: Any) -> FileLike: return self.open(path, *args, **kwds) return wrapped_open return type( # pylint: disable=E0110 'FileAccessor', (FileAccessorBase,), { 'name': str(path), 'open': wrap_open(path)})() def open( self, path: PathLike, mode: str = 'r', encoding: OptStr = None, is_dir: bool = False) -> FileLike: """Open file within the workspace. Args: path: String or :term:`path-like object`, that represents a workspace member. In reading mode the path has to point to a valid workspace file, or a FileNotFoundError is raised. In writing mode the path by default is treated as a file path. New directories can be written by setting the argument is_dir to True. mode: String, which characters specify the mode in which the file is to be opened. The default mode is reading in text mode. Suported characters are: 'r': Reading mode (default) 'w': Writing mode 'b': Binary mode 't': Text mode (default) encoding: In binary mode encoding has not effect. In text mode encoding specifies the name of the encoding, which in reading and writing mode respectively is used to decode the stream’s bytes into strings, and to encode strings into bytes. By default the preferred encoding of the operating system is used. is_dir: Boolean value which determines, if the path is to be treated as a directory or not. This information is required for writing directories to the workspace. The default behaviour is not to treat paths as directories. Returns: :term:`File object` in reading or writing mode. Examples: >>> with self.open('workspace.ini') as file: >>> print(file.read()) """ # Open file handler to workspace member if 'w' in mode: if 'r' in mode: raise ValueError( "'mode' is not allowed to contain the " "characters 'r' AND 'w'") file = self._open_write(path, is_dir=is_dir) else: file = self._open_read(path) # Wrap binary files to text files if required if 'b' in mode: if 't' in mode: raise ValueError( "'mode' is not allowed to contain the " "characters 'b' AND 't'") return file return TextIOWrapper( file, encoding=encoding or self._default_encoding, write_through=True) def close(self) -> None: """Close current workspace and buffer.""" if hasattr(self._file, 'close'): self._file.close() if hasattr(self._buffer, 'close'): self._buffer.close() def copy(self, source: PathLike, target: PathLike) -> bool: """Copy file within workspace. Args: source: String or :term:`path-like object`, that points to a file in the directory structure of the workspace. If the file does not exist, a FileNotFoundError is raised. If the filepath points to a directory, an IsADirectoryError is raised. target: String or :term:`path-like object`, that points to a new filename or an existing directory in the directory structure of the workspace. If the target is a directory the target file consists of the directory and the basename of the source file. If the target file already exists a FileExistsError is raised. Returns: Boolean value which is True if the file was copied. """ # Check if source file exists and is not a directory src_file = PurePath(source).as_posix() src_infos = self._locate(source) if not src_infos: raise FileNotFoundError( f"workspace file '{src_file}' does not exist") src_info = src_infos[-1] if getattr(src_info, 'is_dir')(): raise IsADirectoryError( f"'{src_file}/' is a directory not a file") # If target is a directory get name of target file from # source filename tgt_file = PurePath(target).as_posix() if tgt_file == '.': tgt_file = Path(src_file).name else: tgt_infos = self._locate(target) if tgt_infos: if getattr(tgt_infos[-1], 'is_dir')(): tgt_path = PurePath(tgt_file, Path(src_file).name) tgt_file = tgt_path.as_posix() # Check if target file already exists if self._locate(tgt_file): raise FileExistsError( f"workspace file '{tgt_file}' already exist.") # Read binary data from source file data = self._file.read(src_info, pwd=self._pwd) # Create ZipInfo for target file from source file info tgt_time = getattr(src_info, 'date_time') tgt_info = ZipInfo(filename=tgt_file, date_time=tgt_time) # type: ignore # Write binary data to target file # TODO ([email protected]): The zipfile standard module currently # does not support encryption in write mode. See: # https://docs.python.org/3/library/zipfile.html # When support is provided, the below line shall be replaced by: # self._file.writestr(tgt_info, data, pwd=self._pwd) self._file.writestr(tgt_info, data) self._changed = True # Check if new file exists return bool(self._locate(tgt_file)) def move(self, source: PathLike, target: PathLike) -> bool: """Move file within workspace. Args: source: String or :term:`path-like object`, that points to a file in the directory structure of the workspace. If the file does not exist, a FileNotFoundError is raised. If the filepath points to a directory, an IsADirectoryError is raised. target: String or :term:`path-like object`, that points to a new filename or an existing directory in the directory structure of the workspace. If the target is a directory the target file consists of the directory and the basename of the source file. If the target file already exists a FileExistsError is raised. Returns: Boolean value which is True if the file has been moved. """ # Copy source file to target file or directory # and on success remove source file return self.copy(source, target) and self.unlink(source) def append(self, source: PathLike, target: OptPathLike = None) -> bool: """Append file to the workspace. Args: source: String or :term:`path-like object`, that points to a valid file in the directory structure if the system. If the file does not exist, a FileNotFoundError is raised. If the filepath points to a directory, a IsADirectoryError is raised. target: String or :term:`path-like object`, that points to a valid directory in the directory structure of the workspace. By default the root directory is used. If the directory does not exist, a FileNotFoundError is raised. If the target directory already contains a file, which name equals the filename of the source, a FileExistsError is raised. Returns: Boolean value which is True if the file has been appended. """ # Check source file src_file = env.expand(source) if not src_file.exists(): raise FileNotFoundError(f"file '{src_file}' does not exist") if src_file.is_dir(): raise IsADirectoryError(f"'{src_file}' is a directory not a file") # Check target directory if target: tgt_dir = PurePath(target).as_posix() + '/' if not self._locate(tgt_dir): raise FileNotFoundError( f"workspace directory '{tgt_dir}' does not exist") else: tgt_dir = '.' tgt_file = Path(tgt_dir, src_file.name) if self._locate(tgt_file): raise FileExistsError( f"workspace directory '{tgt_dir}' already contains a file " f"with name '{src_file.name}'") # Create ZipInfo entry from source file filename = PurePath(tgt_file).as_posix() date_time = time.localtime(src_file.stat().st_mtime)[:6] zinfo = ZipInfo(filename=filename, date_time=date_time) # type: ignore # Copy file to archive with src_file.open('rb') as src: data = src.read() # TODO ([email protected]): The zipfile standard module currently # does not support encryption in write mode. See: # https://docs.python.org/3/library/zipfile.html # When support is provided, the below line shall be replaced by: # self._file.writestr(zinfo, data, pwd=pwd) self._file.writestr(zinfo, data) return True def read_text(self, filepath: PathLike, encoding: OptStr = None) -> str: """Read text from file. Args: filepath: String or :term:`path-like object`, that points to a valid file in the directory structure of the workspace. If the file does not exist a FileNotFoundError is raised. encoding: Specifies the name of the encoding, which is used to decode the stream’s bytes into strings. By default the preferred encoding of the operating system is used. Returns: Contents of the given filepath encoded as string. """ with self.open(filepath, mode='r', encoding=encoding) as file: text = file.read() if not isinstance(text, str): return '' return text def read_bytes(self, filepath: PathLike) -> bytes: """Read bytes from file. Args: filepath: String or :term:`path-like object`, that points to a valid file in the dirctory structure of the workspace. If the file does not exist a FileNotFoundError is raised. Returns: Contents of the given filepath as bytes. """ with self.open(filepath, mode='rb') as file: blob = file.read() if not isinstance(blob, bytes): return b'' return blob def write_text( self, text: str, filepath: PathLike, encoding: OptStr = None) -> int: """Write text to file. Args: text: String, which has to be written to the given file. filepath: String or :term:`path-like object`, that represents a valid filename in the dirctory structure of the workspace. encoding: Specifies the name of the encoding, which is used to encode strings into bytes. By default the preferred encoding of the operating system is used. Returns: Number of characters, that are written to the file. """ with self.open(filepath, mode='w', encoding=encoding) as file: if isinstance(file, TextIOBaseClass): return file.write(text) return 0 def write_bytes(self, blob: BytesLike, filepath: PathLike) -> int: """Write bytes to file. Args: blob: Bytes, which are to be written to the given file. filepath: String or :term:`path-like object`, that represents a valid filename in the dirctory structure of the workspace. Returns: Number of bytes, that are written to the file. """ with self.open(filepath, mode='wb') as file: if isinstance(file, BytesIOBaseClass): return file.write(blob) return 0 def unlink(self, filepath: PathLike, ignore_missing: bool = True) -> bool: """Remove file from workspace. Args: filepath: String or :term:`path-like object`, that points to a file in the directory structure of the workspace. If the filepath points to a directory, an IsADirectoryError is raised. For the case, that the file does not exist, the argument ignore_missing determines, if a FileNotFoundError is raised. ignore_missing: Boolean value which determines, if FileNotFoundError is raised, if the target file does not exist. The default behaviour, is to ignore missing files. Returns: Boolean value, which is True if the given file was removed. """ matches = self._locate(filepath) if not matches: if ignore_missing: return True filename = PurePath(filepath).as_posix() raise FileNotFoundError(f"file '{filename}' does not exist") if getattr(matches[-1], 'is_dir')(): dirname = PurePath(filepath).as_posix() + '/' raise IsADirectoryError(f"'{dirname}' is a directory not a file") return self._remove_members(matches) def mkdir(self, dirpath: PathLike, ignore_exists: bool = False) -> bool: """Create a new directory at the given path. Args: dirpath: String or :term:`path-like object`, that represents a valid directory name in the directory structure of the workspace. If the directory already exists, the argument ignore_exists determines, if a FileExistsError is raised. ignore_exists: Boolean value which determines, if FileExistsError is raised, if the target directory already exists. The default behaviour is to raise an error, if the file already exists. Returns: Boolean value, which is True if the given directory was created. """ matches = self._locate(dirpath) if not matches: with self.open(dirpath, mode='w', is_dir=True): pass elif not ignore_exists: dirname = PurePath(dirpath).as_posix() + '/' raise FileExistsError(f"directory '{dirname}' already exists") return True def rmdir( self, dirpath: PathLike, recursive: bool = False, ignore_missing: bool = False) -> bool: """Remove directory from workspace. Args: dirpath: String or :term:`path-like object`, that points to a directory in the directory structure of the workspace. If the directory does not exist, the argument ignore_missing determines, if a FileNotFoundError is raised. ignore_missing: Boolean value which determines, if FileNotFoundError is raised, if the target directory does not exist. The default behaviour, is to raise an error if the directory is missing. recursive: Boolean value which determines, if directories are removed recursively. If recursive is False, then only empty directories can be removed. If recursive, however, is True, then all files and subdirectories are alse removed. By default recursive is False. Returns: Boolean value, which is True if the given directory was removed. """ matches = self._locate(dirpath) dirname = PurePath(dirpath).as_posix() + '/' if not matches: if ignore_missing: return True raise FileNotFoundError(f"directory '{dirname}' does not exist") files = self.search(dirname + '*') if not files: return self._remove_members(matches) if not recursive: raise DirNotEmptyError(f"directory '{dirname}' is not empty") allmatches = matches for file in files: allmatches += self._locate(file) return self._remove_members(allmatches) def search(self, pattern: OptStr = None) -> StrList: """Search for files in the workspace. Args: pattern: Search pattern that contains Unix shell-style wildcards: '*': Matches arbitrary strings '?': Matches single characters [seq]: Matches any character in seq [!seq]: Matches any character not in seq By default a list of all files and directories is returned. Returns: List of files and directories in the directory structure of the workspace, that match the search pattern. """ # Get list of normalized unique paths of workspace members paths: PathLikeList = [] for zinfo in self._file.infolist(): path = PurePath(zinfo.filename).as_posix() if getattr(zinfo, 'is_dir')(): path += '/' if path not in paths: paths.append(path) # Match path list with given pattern if pattern: paths = env.match_paths(paths, pattern) # Sort paths return sorted([str(path) for path in paths]) # # Protected Methods # def _create_new(self) -> None: # Initialize instance Variables, Buffer and buffered ZipFile self._set_attr_values(self._default_config['dc'], group='dc') self._path = None self._changed = False self._pwd = None self._buffer = BytesIO() self._file = ZipFile(self._buffer, mode='w') # Create folders for folder in self._default_dir_layout: self.mkdir(folder) def _open_read(self, path: PathLike) -> BytesIOLike: # Locate workspace member by it's path # and open file handler for reading the file matches = self._locate(path) if not matches: fname = PurePath(path).as_posix() raise FileNotFoundError( f"workspace member with filename '{fname}' does not exist") # Select latest version of file zinfo = matches[-1] return self._file.open(zinfo, pwd=self._pwd, mode='r') def _open_write(self, path: PathLike, is_dir: bool = False) -> BytesIOLike: # Determine workspace member name from path # and get ZipInfo with local time as date_time filename = PurePath(path).as_posix() if is_dir: filename += '/' zinfo = ZipInfo( # type: ignore filename=filename, date_time=time.localtime()[:6]) # Catch Warning for duplicate files with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) # TODO ([email protected]): The zipfile standard # module currently does not support encryption in write # mode of new ZipFiles. See: # https://docs.python.org/3/library/zipfile.html # When support is provided, the below line for writing # files shall be replaced by: # file = self._file.open(zinfo, mode='w', pwd=self._pwd) file = self._file.open(zinfo, mode='w') self._changed = True return file def _locate(self, path: PathLike, sort: bool = True) -> ZipInfoList: # Get list of member zipinfos zinfos = self._file.infolist() # Match members by path-like filenames matches = [i for i in zinfos if Path(i.filename) == Path(path)] if sort: # Sort matches by datetime matches = sorted(matches, key=lambda i: i.date_time) # Return sorted matches return matches def _get_name(self) -> OptStr: return getattr(self._path, 'stem', None) def _get_path(self) -> OptPath: return self._path def _get_changed(self) -> bool: return self._changed def _get_folders(self) -> StrList: names: StrList = [] for zinfo in self._file.infolist(): if getattr(zinfo, 'is_dir')(): name = PurePath(zinfo.filename).as_posix() + '/' names.append(name) return sorted(names) def _remove_members(self, zinfos: ZipInfoList) -> bool: # Return True if list of members is empty if not zinfos: return True # Remove entries in the list of members from workspace new_zinfos = [] zids = [(zinfo.filename, zinfo.date_time) for zinfo in zinfos] for zinfo in self._file.infolist(): zid = (zinfo.filename, zinfo.date_time) if zid in zids: zids.remove(zid) else: new_zinfos.append(zinfo) # If any entry on the list could not be found raise an error if zids: names = [zid[0] for zid in zids] raise FileNotFoundError( f"could not locate workspace members: {names}") # Create new ZipArchive in Memory new_buffer = BytesIO() new_file = ZipFile(new_buffer, mode='w') # Copy all workspace members on the new list from current # to new workspace for zinfo in new_zinfos: data = self._file.read(zinfo, pwd=self._pwd) new_file.writestr(zinfo, data) # Close current workspace and buffer and link new workspace and buffer self._file.close() self._buffer.close() self._buffer = new_buffer self._file = new_file self._changed = True return True def _remove_duplicates(self) -> bool: # Get list of duplicates zinfos: ZipInfoList = [] for filename in self.files: zinfos += self._locate(filename, sort=True)[:-1] # Remove duplicates return self._remove_members(zinfos)