Example #1
0
class Cursor(attrib.Container):
    """Cursor Class.

    Args:
        index: List of row IDs, that are traversed by the cursor. By default the
            attribute '_index' of the parent object is used.
        mode: Named string identifier for the cursor :py:attr:`.mode`. The
            default cursor mode is 'forward-only indexed'. Note: After
            initializing the curser, it's mode can not be changed anymore.

    """

    #
    # Protected Class Variables
    #

    _default_mode: ClassVar[int] = CUR_MODE_INDEXED

    #
    # Public Attributes
    #

    mode: property = attrib.Virtual(fget='_get_mode')
    mode.__doc__ = """
    The read-only string attribute *cursor mode* specifies the space separated
    *scrolling type* and the *operation mode* of the cursor. Supported scrolling
    types are:

    :forward-only: The default scrolling type of cursors is called a
        forward-only cursor and can move only forward through the result set. A
        forward-only cursor does not support scrolling but only fetching rows
        from the start to the end of the result set.
    :scrollable: A scrollable cursor is commonly used in screen-based
        interactive applications, like spreadsheets, in which users are allowed
        to scroll back and forth through the result set. However, applications
        should use scrollable cursors only when forward-only cursors will not do
        the job, as scrollable cursors are generally more expensive, than
        forward-only cursors.
    :random: Random cursors move randomly through the result set. In difference
        to a randomly sorted cursor, the rows are not unique and the number of
        fetched rows is not limited to the size of the result set. If the method
        :meth:`.fetch` is called with a zero value for size, a
        CursorModeError is raised.

    Supported operation modes are:

    :dynamic: A **dynamic cursor** is built on-the-fly and therefore comprises
        any changes made to the rows in the result set during it's traversal,
        including new appended rows and the order of it's traversal. This
        behaviour is regardless of whether the changes occur from inside the
        cursor or by other users from outside the cursor. Dynamic cursors are
        threadsafe but do not support counting filtered rows or sorting rows.
    :indexed: Indexed cursors (aka Keyset-driven cursors) are built on-the-fly
        with respect to an initial copy of the table index and therefore
        comprise changes made to the rows in the result set during it's
        traversal, but not new appended rows nor changes within their order.
        Keyset driven cursors are threadsafe but do not support sorting rows or
        counting filtered rows.
    :static: Static cursors are buffered and built during it's creation time and
        therfore always display the result set as it was when the cursor was
        first opened. Static cursors are not threadsafe but support counting the
        rows with respect to a given filter and sorting the rows.

    """

    batchsize: property = attrib.MetaData(classinfo=int, default=1)
    """
    The read-writable integer attribute *batchsize* specifies the default number
    of rows which is to be fetched by the method :meth:`.fetch`. It defaults
    to 1, meaning to fetch a single row at a time. Whether and which batchsize
    to use depends on the application and should be considered with care. The
    batchsize can also be adapted during the lifetime of the cursor, which
    allows dynamic performance optimization.
    """

    rowcount: property = attrib.Virtual(fget='_get_rowcount')
    """
    The read-only integer attribute *rowcount* specifies the current number of
    rows within the cursor.
    """

    #
    # Protected Attributes
    #

    _mode: property = attrib.MetaData(classinfo=int, default=_default_mode)
    _index: property = attrib.MetaData(classinfo=list, inherit=True)
    _getter: property = attrib.Temporary(classinfo=CallableClasses)
    _filter: property = attrib.Temporary(classinfo=CallableClasses)
    _mapper: property = attrib.Temporary(classinfo=CallableClasses)
    _buffer: property = attrib.Temporary(classinfo=list, default=[])

    #
    # Events
    #

    def __init__(self,
                 index: OptIntList = None,
                 getter: OptCallable = None,
                 predicate: OptCallable = None,
                 mapper: OptCallable = None,
                 batchsize: OptInt = None,
                 mode: OptStr = None,
                 parent: Optional[attrib.Container] = None) -> None:
        """Initialize Cursor."""
        super().__init__(parent=parent)  # Parent is set by container
        if index is not None:
            self._index = index
        self._getter = getter
        self._filter = predicate
        self._mapper = mapper
        if mode:
            self._set_mode(mode)
        if batchsize:
            self.batchsize = batchsize
        if self._mode & CUR_MODE_INDEXED:
            self._create_index()
        if self._mode & CUR_MODE_BUFFERED:
            self._create_buffer()
        self.reset()  # Initialize iterator

    def __iter__(self) -> Iterator:
        self.reset()
        return self

    def __next__(self) -> RowLike:
        return self.next()

    def __len__(self) -> int:
        return self.rowcount

    #
    # Public Methods
    #

    def reset(self) -> None:
        """Reset cursor position before the first record."""
        mode = self._mode
        if mode & CUR_MODE_BUFFERED:  # Iterate over fixed result set
            self._iter_buffer = iter(self._buffer)
        elif mode & CUR_MODE_INDEXED:  # Iterate over fixed index
            self._iter_index = iter(self._index)
        else:  # TODO: handle case for dynamic cursors by self._iter_table
            self._iter_index = iter(self._index)

    def next(self) -> RowLike:
        """Return next row that matches the given filter."""
        mode = self._mode
        if mode & CUR_MODE_BUFFERED:
            return self._get_next_from_buffer()
        if mode & CUR_MODE_INDEXED:
            return self._get_next_from_fixed_index()
        # TODO: For dynamic cursors implement _get_next_from_dynamic_index()
        return self._get_next_from_fixed_index()

    def fetch(self, size: OptInt = None) -> RowLikeList:
        """Fetch rows from the result set.

        Args:
            size: Integer value, which represents the number of rows, which is
                fetched from the result set. For the given size 0 all remaining
                rows from the result set are fetched. By default the number of
                rows is given by the cursors batchsize.

        """
        if size is None:
            size = self.batchsize
        if self._mode & CUR_MODE_RANDOM and size <= 0:
            raise CursorModeError(self.mode, 'fetching all rows')
        finished = False
        results: RowLikeList = []
        while not finished:
            try:
                results.append(self.next())
            except StopIteration:
                finished = True
            else:
                finished = 0 < size <= len(results)
        return results

    #
    # Protected Methods
    #

    def _get_next_from_fixed_index(self) -> RowLike:
        is_random = self._mode & CUR_MODE_RANDOM
        matches = False
        while not matches:
            if is_random:
                row_id = random.randrange(len(self._index))
            else:
                row_id = next(self._iter_index)
            row = self._getter(row_id)
            if self._filter:
                matches = self._filter(row)
            else:
                matches = True
        if self._mapper:
            return self._mapper(row)
        return row

    def _get_next_from_buffer(self) -> RowLike:
        if self._mode & CUR_MODE_RANDOM:
            row_id = random.randrange(len(self._buffer))
            return self._buffer[row_id]
        return next(self._iter_buffer)

    def _get_mode(self) -> str:
        mode = self._mode
        tokens = []
        # Add name of traversal mode
        if mode & CUR_MODE_RANDOM:
            tokens.append('random')
        elif mode & CUR_MODE_SCROLLABLE:
            tokens.append('scrollable')
        # Add name of operation mode
        if mode & CUR_MODE_BUFFERED:
            tokens.append('static')
        elif mode & CUR_MODE_INDEXED:
            tokens.append('indexed')
        else:
            tokens.append('dynamic')
        return ' '.join(tokens)

    def _set_mode(self, name: str) -> None:
        mode = 0
        name = name.strip(' ').lower()

        # Set traversal mode flags
        if 'random' in name:
            mode |= CUR_MODE_RANDOM
        elif 'scrollable' in name:
            mode |= CUR_MODE_SCROLLABLE

        # Set operation mode flags
        if 'static' in name:
            mode |= CUR_MODE_BUFFERED | CUR_MODE_INDEXED
        elif 'indexed' in name:
            mode |= CUR_MODE_INDEXED
        self._mode = mode

    def _get_rowcount(self) -> int:
        mode = self._mode
        if mode & CUR_MODE_RANDOM:
            raise CursorModeError(self.mode, 'counting rows')
        if mode & CUR_MODE_BUFFERED:
            return len(self._buffer)
        if self._filter:
            raise CursorModeError(self.mode, 'counting filtered rows')
        return len(self._index)

    def _create_index(self) -> None:
        self._index = self._index.copy()

    def _create_buffer(self) -> None:
        cur = self.__class__(  # Create new dynamic cursor
            index=self._index,
            getter=self._getter,
            predicate=self._filter,
            mapper=self._mapper)
        self._buffer = cur.fetch(0)  # Fetch all from result set
Example #2
0
class Table(attrib.Container):
    """Table Class."""

    #
    # Public Attributes
    #

    fields: property = attrib.Virtual(fget='_get_fields')
    colnames: property = attrib.Virtual(fget='_get_colnames')

    #
    # Protected Attributes
    #

    _store: property = attrib.Content(classinfo=list, default=[])
    _diff: property = attrib.Temporary(classinfo=list, default=[])
    _index: property = attrib.Temporary(classinfo=list, default=[])
    _iter_index: property = attrib.Temporary()
    _Record: property = attrib.Temporary(classinfo=type)

    #
    # Events
    #

    def __init__(self, columns: OptFieldLike = None) -> None:
        """ """
        super().__init__()
        if columns:
            self._create_header(columns)

    def __iter__(self) -> Iterator:
        self._iter_index = iter(self._index)
        return self

    def __next__(self) -> Record:
        row = self.get_row(next(self._iter_index))
        while not row:
            row = self.get_row(next(self._iter_index))
        return row

    def __len__(self) -> int:
        return len(self._index)

    #
    # Public Methods
    #

    def commit(self) -> None:
        """Apply changes to table."""
        # Delete / Update rows in storage table
        for rowid in list(range(len(self._store))):
            row = self.get_row(rowid)
            if not row:
                continue
            state = row.state
            if state & ROW_STATE_DELETE:
                self._store[rowid] = None
                try:
                    self._index.remove(rowid)
                except ValueError:
                    pass
            elif state & (ROW_STATE_CREATE | ROW_STATE_UPDATE):
                self._store[rowid] = self._diff[rowid]
                self._store[rowid].state = 0

        # Flush diff table
        self._diff = [None] * len(self._store)

    def rollback(self) -> None:
        """Revoke changes from table."""
        # Remove newly created rows from index and reset states of already
        # existing rows
        for rowid in list(range(len(self._store))):
            row = self.get_row(rowid)
            if not row:
                continue
            state = row.state
            if state & ROW_STATE_CREATE:
                try:
                    self._index.remove(rowid)
                except ValueError:
                    pass
            else:
                self._store[rowid].state = 0

        # Flush diff table
        self._diff = [None] * len(self._store)

    def get_cursor(self,
                   predicate: OptCallable = None,
                   mapper: OptCallable = None,
                   mode: OptStr = None) -> Cursor:
        """ """
        return Cursor(getter=self.get_row,
                      predicate=predicate,
                      mapper=mapper,
                      mode=mode,
                      parent=self)

    def get_row(self, rowid: int) -> OptRow:
        """ """
        return self._diff[rowid] or self._store[rowid]

    def get_rows(self,
                 predicate: OptCallable = None,
                 mode: OptStr = None) -> Cursor:
        """ """
        return self.get_cursor(predicate=predicate, mode=mode)

    def append_row(self, *args: Any, **kwds: Any) -> None:
        """ """
        row = self._create_row(*args, **kwds)
        self._store.append(None)
        self._diff.append(row)
        self._append_row_id(row.id)

    def delete_row(self, rowid: int) -> None:
        """ """
        row = self.get_row(rowid)
        if not row:
            raise RowLookupError(rowid)
        row.delete()

    def delete_rows(self, predicate: OptCallable = None) -> None:
        """ """
        for row in self.get_rows(predicate):
            row.delete()

    def update_row(self, rowid: int, **kwds: Any) -> None:
        """ """
        row = self.get_row(rowid)
        if not row:
            raise RowLookupError(rowid)
        row.update(**kwds)

    def update_rows(self, predicate: OptCallable = None, **kwds: Any) -> None:
        """ """
        for row in self.get_rows(predicate):
            row.update(**kwds)

    def select(self,
               columns: OptStrTuple = None,
               predicate: OptCallable = None,
               fmt: type = tuple,
               mode: OptStr = None) -> RowLikeList:
        """ """
        if not columns:
            mapper = self._get_mapper(self.colnames, fmt=fmt)
        else:
            check.is_subset("'columns'", set(columns), "table column names",
                            set(self.colnames))
            mapper = self._get_mapper(columns, fmt=fmt)
        return self.get_cursor(  # type: ignore
            predicate=predicate, mapper=mapper, mode=mode)

    def pack(self) -> None:
        """Remove empty records from storage table and rebuild table index."""
        # Commit pending changes
        self.commit()

        # Remove empty records
        self._store = list(filter(None.__ne__, self._store))

        # Rebuild table index
        self._index = list(range(len(self._store)))
        for rowid in self._index:
            self._store[rowid].id = rowid

        # Rebuild diff table
        self._diff = [None] * len(self._store)

    #
    # Protected Methods
    #

    def _get_mapper(self, columns: StrTuple, fmt: type = tuple) -> Callable:
        def mapper_tuple(row: Record) -> tuple:
            return tuple(getattr(row, col) for col in columns)

        def mapper_dict(row: Record) -> dict:
            return {col: getattr(row, col) for col in columns}

        if fmt == tuple:
            return mapper_tuple
        if fmt == dict:
            return mapper_dict
        raise TableError(f"'fmt' requires to be tuple or dict")

    def _get_fields(self) -> FieldTuple:
        return dataclasses.fields(self._Record)

    def _get_colnames(self) -> StrTuple:
        return tuple(field.name for field in self.fields)

    def _create_row_id(self) -> int:
        return len(self._store)

    def _append_row_id(self, rowid: int) -> None:
        self._index.append(rowid)

    def _remove_row_id(self, rowid: int) -> None:
        self._index.remove(rowid)

    def _update_row_diff(self, rowid: int, **kwds: Any) -> None:
        row = self.get_row(rowid)
        if not row:
            raise RowLookupError(rowid)
        upd = dataclasses.replace(row, **kwds)
        upd.id = rowid
        upd.state = row.state
        self._diff[rowid] = upd

    def _remove_row_diff(self, rowid: int) -> None:
        self._diff[rowid] = None

    def _create_row(self, *args: Any, **kwds: Any) -> Record:
        return self._Record(*args, **kwds)  # pylint: disable=E0110

    def _create_header(self, columns: FieldLike) -> None:
        # Check types of fieldlike column descriptors and convert them to field
        # descriptors, that are accepted by dataclasses.make_dataclass()
        fields: list = []
        for each in columns:
            if isinstance(each, str):
                fields.append(each)
                continue
            check.has_type(f"field {each}", each, tuple)
            check.has_size(f"field {each}", each, min_size=2, max_size=3)
            check.has_type("first arg", each[0], str)
            check.has_type("second arg", each[1], type)
            if len(each) == 2:
                fields.append(each)
                continue
            check.has_type("third arg", each[2], (Field, dict))
            if isinstance(each[2], Field):
                fields.append(each)
                continue
            field = dataclasses.field(**each[2])
            fields.append(each[:2] + (field, ))

        # Create record namespace with table hooks
        namespace = {
            '_create_row_id': self._create_row_id,
            '_delete_hook': self._remove_row_id,
            '_restore_hook': self._append_row_id,
            '_update_hook': self._update_row_diff,
            '_revoke_hook': self._remove_row_diff
        }

        # Create Record dataclass and constructor
        self._Record = dataclasses.make_dataclass('Row',
                                                  fields,
                                                  bases=(Record, ),
                                                  namespace=namespace)

        # Create slots
        self._Record.__slots__ = ['id', 'state'] + [
            field.name for field in dataclasses.fields(self._Record)
        ]

        # Reset store, diff and index
        self._store = []
        self._diff = []
        self._index = []
Example #3
0
class Cursor(attrib.Container, ABC):
    """Database Cursor.

    These objects represent a database cursor, which is used to manage the
    context of a fetch operation. Cursors created from the same connection are
    not isolated, i.e., any changes done to the database by a cursor are
    immediately visible by the other cursors. Cursors created from different
    connections can or can not be isolated, depending on how the transaction
    support is implemented (see also the connection's .rollback() and .commit()
    methods).

    """

    #
    # Cursor attributes
    #

    arraysize: property = attrib.MetaData(classinfo=int, default=1)
    arraysize.__doc__ = """
    This read/write attribute specifies the number of rows to fetch at a time
    with `fetchmany`. It defaults to 1 meaning to fetch a single row at a time.
    Implementations must observe this value with respect to the `fetchmany`
    method, but are free to interact with the database a single row at a time.
    It may also be used in the implementation of `executemany`.
    """

    description: property = attrib.Virtual(fget='_get_description')
    description.__doc__ = """
    Sequence of 7-item sequences containing information about one result column:
    name, type_code, display_size, internal_size, precision, scale, null_ok
    The first two items (name and type_code) are mandatory, the other five are
    optional and are set to None if no meaningful values can be provided.
    This attribute will be None for operations that do not return rows or if the
    cursor has not had an operation invoked via the .execute*() method yet.
    """

    @abstractmethod
    def _get_description(self) -> list:
        pass

    rowcount: property = attrib.Virtual(fget='_get_rowcount')
    description.__doc__ = """
    This read-only attribute specifies the number of rows that the last
    execute*() produced (for DQL statements like SELECT) or affected (for DML
    statements like UPDATE or INSERT). The attribute is -1 in case no
    .execute*() has been performed on the cursor or the rowcount of the last
    operation is cannot be determined by the interface.
    """

    @abstractmethod
    def _get_rowcount(self) -> int:
        pass

    #
    # Cursor Methods
    #

    @abstractmethod
    def callproc(self, procname: str, *args: Any, **kwds: Any) -> Any:
        """Call stored database procedure.

        Call a stored database procedure with the given name. The sequence of
        parameters must contain one entry for each argument, that the procedure
        expects. The result of the call is returned as modified copy of the
        input sequence. Input parameters are left untouched, output and
        input/output parameters replaced with possibly new values.

        The procedure may also provide a result set as output. This must then be
        made available through the standard .fetch*() methods.

        If the database does not support the functionality required by the
        method, the interface should throw an exception in case the method is
        used.
        """
        pass

    @abstractmethod
    def close(self) -> None:
        """Close the cursor now (rather than whenever __del__ is called).

        The cursor will be unusable from this point forward; an Error (or
        subclass) exception will be raised if any operation is attempted with
        the cursor.
        """
        pass

    @abstractmethod
    def execute(self, operation: str, *args: Any) -> Any:
        """Prepare and execute a database operation (query or command).

        Parameters may be provided as sequence or mapping and will be bound to
        variables in the operation. Variables are specified in a
        database-specific notation, which is identified by the module global
        `paramstyle`.

        A reference to the operation will be retained by the cursor. If the same
        operation object is passed in again, then the cursor can optimize its
        behavior. This is most effective for algorithms where the same operation
        is used, but different parameters are bound to it.

        For maximum efficiency when reusing an operation, it is best to use the
        `setinputsizes` method to specify the parameter types and sizes ahead
        of time. It is legal for a parameter to not match the predefined
        information; the implementation should compensate, possibly with a loss
        of efficiency.

        The parameters may also be specified as list of tuples to e.g. insert
        multiple rows in a single operation, but this kind of usage is
        deprecated: .executemany() should be used instead.
        """
        pass

    @abstractmethod
    def executemany(self, operation: str, seq_of_parameters: list) -> Any:
        """Prepare and execute database operation for multiple parameters.

        Prepare a database operation (query or command) and then execute it
        against all parameter sequences or mappings found in the sequence
        *seq_of_parameters*.

        Modules are free to implement this method using multiple calls to the
        `execute` method or by using array operations to have the database
        process the sequence as a whole in one call.

        Use of this method for an operation which produces one or more result
        sets constitutes undefined behavior, and the implementation is permitted
        (but not required) to raise an exception when it detects that a result
        set has been created by an invocation of the operation.
        """
        pass

    @abstractmethod
    def fetchone(self) -> OptList:
        """Fetch the next row of a query result.

        Fetch the next row of a query result set, returning a single sequence,
        or None when no more data is available.

        An Error (or subclass) exception is raised if the previous call to
        `execute` did not produce any result set or no call was issued yet.
        """
        pass

    @abstractmethod
    def fetchmany(self, size: OptInt) -> list:
        """Fetch the next set of rows of a query result.

        Fetch the next set of rows of a query result, returning a sequence of
        sequences (e.g. a list of tuples). An empty sequence is returned when no
        more rows are available.

        The number of rows to fetch per call is specified by the parameter. If
        it is not given, the cursor's arraysize determines the number of rows to
        be fetched. The method should try to fetch as many rows as indicated by
        the size parameter. If this is not possible due to the specified number
        of rows not being available, fewer rows may be returned.

        An Error (or subclass) exception is raised if the previous call to
        `execute` did not produce any result set or no call was issued yet.

        Note there are performance considerations involved with the size
        parameter. For optimal performance, it is usually best to use the
        `arraysize` attribute. If the size parameter is used, then it is best
        for it to retain the same value from one `fetchmany` call to the next.
        """
        pass

    @abstractmethod
    def fetchall(self) -> list:
        """Fetch all remaining rows of a query result.

        Fetch all remaining rows of a query result, returning them as a sequence
        of sequences (e.g. a list of tuples). Note that the cursor's arraysize
        attribute can affect the performance of this operation.

        An Error (or subclass) exception is raised if the previous call to
        `execute` did not produce any result set or no call was issued yet.
        """
        pass

    @abstractmethod
    def nextset(self) -> OptBool:
        """Skip cursor to the next available set (if supported).

        This method will make the cursor skip to the next available set,
        discarding any remaining rows from the current set. If there are no more
        sets, the method returns None. Otherwise, it returns a true value and
        subsequent calls to the `fetch*` methods will return rows from the next
        result set.

        An Error (or subclass) exception is raised if the previous call to an
        `execute*` method did not produce any result set or no call was issued
        yet.

        If the database does not support the functionality required by the
        method, the interface should throw an exception in case the method is
        used.
        """
        pass

    @abstractmethod
    def setinputsizes(self, sizes: list) -> None:
        """Set input sizes for database operations (query or command).

        This can be used before a call to `execute*` to predefine memory areas
        for the operation's parameters. *sizes* is specified as a sequence with
        one item for each input parameter. The item should be a type object that
        corresponds to the input that will be used, or it should be an integer
        specifying the maximum length of a string parameter. If the item is
        None, then no predefined memory area will be reserved for that column
        (this is useful to avoid predefined areas for large inputs).

        This method would be used before the `execute*` method is invoked.
        Implementations are free to have this method do nothing and users are
        free to not use it.
        """
        pass

    @abstractmethod
    def setoutputsize(self, size: int, column: OptInt) -> None:
        """Set a column buffer size for fetches of large columns.

        The column is specified as an index into the result sequence. Not
        specifying the column will set the default size for all large columns in
        the cursor.

        This method would be used before an `execute*` method is invoked.
        Implementations are free to have this method do nothing and users are
        free to not use it.
        """
        pass
Example #4
0
class Logger(attrib.Container):
    """Logger class.

    Args:
        name: String identifier of Logger, given as a period-separated
            hierarchical value like 'foo.bar.baz'. The name of a Logger also
            identifies respective parents and children by the name hierachy,
            which equals the Python package hierarchy.
        file: String or :term:`path-like object` that identifies a valid
            filename in the directory structure of the operating system. If they
            do not exist, the parent directories of the file are created. If no
            file is given, a default logfile within the applications
            *user-log-dir* is created. If the logfile can not be created a
            temporary logfile in the systems *temp* folder is created as a
            fallback.
        level: Integer value or string, which describes the minimum required
            severity of events, to be logged. Ordered by ascending severity, the
            allowed level names are: 'DEBUG', 'INFO', 'WARNING', 'ERROR' and
            'CRITICAL'. The respectively corresponding level numbers are 10, 20,
            30, 40 and 50. The default level is 'INFO'.

    """

    #
    # Protected Class Variables
    #

    _level_names: ClassVar[StrList] = [
        'NOTSET', 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
    _default_name: ClassVar[str] = env.get_var('name') or __name__
    _default_file: ClassVar[Path] = Path(
        env.get_dir('user_log_dir'), _default_name + '.log')
    _default_level: ClassVar[StrOrInt] = logging.INFO

    #
    # Public Attributes
    #

    logger: property = attrib.Virtual(
        fget='_get_logger', fset='_set_logger', classinfo=logging.Logger)

    name: property = attrib.Virtual(
        fget='_get_name', fset='_set_name', classinfo=str)
    name.__doc__ = """
    String identifier of Logger, given as a period-separated hierarchical value
    like 'foo.bar.baz'. The name of a Logger also identifies respective parents
    and children by the name hierachy, which equals the Python package
    hierarchy.
    """

    file: property = attrib.Virtual(
        fget='_get_file', fset='_set_file', classinfo=(str, Path))
    file.__doc__ = """
    String or :term:`path-like object` that identifies a valid filename in the
    directory structure of the operating system. If they do not exist, the
    parent directories of the file are created. If no file is given, a default
    logfile within the applications *user-log-dir* is created. If the logfile
    can not be created a temporary logfile in the systems *temp* folder is
    created as a fallback.
    """

    level: property = attrib.Virtual(
        fget='_get_level', fset='_set_level', classinfo=(str, int))
    level.__doc__ = """
    Integer value or string, which describes the minimum required severity of
    events, to be logged. Ordered by ascending severity, the allowed level names
    are: 'DEBUG', 'INFO', 'WARNING', 'ERROR' and 'CRITICAL'. The respectively
    corresponding level numbers are 10, 20, 30, 40 and 50. The default level is
    'INFO'.
    """

    #
    # Protected Attributes
    #

    _logger: property = attrib.Temporary(classinfo=logging.Logger)

    #
    # Events
    #

    def __init__(self,
            name: str = _default_name,
            file: PathLike = _default_file,
            level: StrOrInt = _default_level) -> None:
        """Initialize instance."""
        # Initialize Attribute Container
        super().__init__()

        # Start logging
        self._start_logging(name=name, file=file, level=level)

    def __del__(self) -> None:
        """Run destructor for instance."""
        self._stop_logging()

    def __str__(self) -> str:
        """Represent instance as string."""
        return str(self.logger)

    #
    # Public Methods
    #

    def log(self, level: StrOrInt, msg: str, *args: Any, **kwds: Any) -> None:
        """Log event.

        Args:
            level: Integer value or string, which describes the severity of the
                event. In the order of ascending severity, the accepted level
                names are: 'DEBUG', 'INFO', 'WARNING', 'ERROR' and 'CRITICAL'.
                The respectively corresponding level numbers are 10, 20, 30, 40
                and 50.
            msg: Message :ref:`format string <formatstrings>`, containing
                literal text or braces delimited replacement fields. Each
                replacement field contains either the numeric index of a
                positional argument, given by *args, or the name of a keyword
                argument, given by the keyword *extra*.
            *args: Arguments, which can be used by the message format string.
            **kwds: Additional Keywords, used by :meth:`logging.Logger.log`.

        """
        if isinstance(level, str):
            level = self._get_level_number(level)
        self.logger.log(level, msg, *args, **kwds)

    #
    # Protected Methods
    #

    def _start_logging(
            self, name: str = _default_name, file: PathLike = _default_file,
            level: StrOrInt = _default_level) -> bool:
        logger = logging.getLogger(name) # Create new logger instance
        self._set_logger(logger) # Bind new logger instance to global variable
        self._set_level(level) # Set log level
        self._set_file(file) # Add file handler for logfile
        if not self.file.is_file(): # If an error occured stop logging
            self._stop_logging()
            return False
        return True

    def _stop_logging(self) -> None:
        for handler in self.logger.handlers: # Close file handlers
            with contextlib.suppress(AttributeError):
                handler.close()
        self._logger = None

    def _get_logger(self, auto_start: bool = True) -> logging.Logger:
        if not self._logger:
            if auto_start:
                self._start_logging()
            else:
                raise NotExistsError("logging has not been started")
        return self._logger

    def _set_logger(
            self, logger: logging.Logger, auto_stop: bool = True) -> None:
        if self._logger:
            if auto_stop:
                self._stop_logging()
            else:
                raise ExistsError("logging has already been started")
        self._logger = logger

    def _get_name(self) -> str:
        return self.logger.name

    def _set_name(self, name: str) -> None:
        self.logger.name = name

    def _get_file(self) -> OptPath:
        for handler in self.logger.handlers:
            with contextlib.suppress(AttributeError):
                return Path(handler.baseFilename)
        return None

    def _set_file(self, filepath: PathLike = _default_file) -> None:
        # Locate valid logfile
        logfile = self._locate_logfile(filepath)
        if not isinstance(logfile, Path):
            warnings.warn("could not set logfile")
            return None

        # Close and remove all previous file handlers
        if self.logger.hasHandlers():
            remove = [h for h in self.logger.handlers if hasattr(h, 'close')]
            for handler in remove:
                handler.close()
                self.logger.removeHandler(handler)

        # Add file handler for logfile
        handers = importlib.import_module('logging.handlers')
        handler = getattr(handers, 'TimedRotatingFileHandler')(
            str(logfile), when="d", interval=1, backupCount=5)
        formatter = logging.Formatter(
            fmt="%(asctime)s %(levelname)s %(message)s",
            datefmt="%Y-%m-%d %H:%M:%S")
        handler.setFormatter(formatter)
        self.logger.addHandler(handler)
        return None

    def _get_level(self, as_name: bool = True) -> StrOrInt:
        level = getattr(self.logger, 'getEffectiveLevel')()
        if not as_name:
            return level
        return self._get_level_name(level)

    def _get_level_name(self, level: int) -> str:
        names = self._level_names
        return names[int(max(min(level, 50), 0) / 10)]

    def _get_level_number(self, name: str) -> int:
        name = name.upper()
        names = self._level_names
        if not name in names:
            allowed = ', '.join(names[1:])
            raise ValueError(
                f"{name} is not a valid level name, "
                f"allowed values are: {allowed}")
        return names.index(name) * 10

    def _set_level(self, level: StrOrInt) -> None:
        if isinstance(level, str):
            level = level.upper()
        getattr(self.logger, 'setLevel')(level)

    def _locate_logfile(
            self, filepath: PathLike = _default_file) -> OptPath:
        # Get valid logfile from filepath
        if isinstance(filepath, (str, Path)):
            logfile = env.expand(filepath)
            if env.touch(logfile):
                return logfile

        # Get temporary logfile
        logfile = Path(tempfile.NamedTemporaryFile().name + '.log')
        if env.touch(logfile):
            warnings.warn(
                f"logfile '{filepath}' is not valid: "
                f"using temporary logfile '{logfile}'")
            return logfile
        return None
Example #5
0
class CSVFile(attrib.Container):
    """CSV-File Class.

    Args:
        file: String or :term:`path-like object`, which points to a readable
            CSV-file in the directory structure of the system, or a :term:`file
            object` in reading mode.
        delim: String containing CSV-delimiter. By default the CSV-delimiter is
            detected from the CSV-file.
        labels: List of column labels in CSV-file. By default the list of column
            labels is taken from the first content line in the CSV-file.
        usecols: Indices of the columns which are to be imported from the file.
            By default all columns are imported.
        namecol: Column ID of column, which contains the row annotation.
            By default the first column is used for annotation.

    """

    #
    # Class Variables
    #

    _delim_candidates: ClassVar[StrList] = [',', '\t', ';', ' ', ':']
    """
    Optional list of strings containing delimiter candidates to search for.
    Default: [',', '\t', ';', ' ', ':']
    """

    _delim_mincount: ClassVar[int] = 3
    """
    Minimum number of lines used to detect CSV delimiter. Thereby only non
    comment and non empty lines are used.
    """

    _delim_maxcount: ClassVar[int] = 100
    """
    Maximum number of lines used to detect CSV delimiter. Thereby only non
    comment and non empty lines are used.
    """

    #
    # Public Attributes
    #

    comment: property = attrib.Virtual(fget='_get_comment')
    comment.__doc__ = """
    String containing the initial '#' lines of the CSV-file or an empty string,
    if no initial comment lines could be detected.
    """

    delim: property = attrib.Virtual(fget='_get_delim')
    delim.__doc__ = """
    Delimiter string of the CSV-file or None, if the delimiter could not be
    detected.
    """

    format: property = attrib.Virtual(fget='_get_format')
    format.__doc__ = """
    CSV-Header format. The following formats are supported:
        0: :RFC:`4180`:
            The column header equals the size of the rows.
        1: `R-Table`:
            The column header has a size that is reduced by one, compared to the
            rows. This smaller number of entries follows by the convention, that
            in R the CSV export of tables adds an extra column with row names
            as the first column. The column name of this column is omitted
            within the header.
    """

    colnames: property = attrib.Virtual(fget='_get_colnames')
    colnames.__doc__ = """
    List of strings containing column names from first non comment, non empty
    line of CSV-file.
    """

    fields: property = attrib.Virtual(fget='_get_fields')
    colnames.__doc__ = """
    List of pairs containing the column names and the estimated or given column
    types of the CSV-file.
    """

    rownames: property = attrib.Virtual(fget='_get_rownames')
    rownames.__doc__ = """
    List of strings containing row names from column with id given by namecol or
    None, if namecol is not given.
    """

    namecol: property = attrib.Virtual(fget='_get_namecol')
    namecol.__doc__ = """
    Index of the column of a CSV-file that contains the row names. The value
    None is used for CSV-files that do not contain row names.
    """

    #
    # Protected Attributes
    #

    _file: property = attrib.Content(classinfo=TextFileClasses)
    _comment: property = attrib.MetaData(classinfo=str, default=None)
    _delim: property = attrib.MetaData(classinfo=str, default=None)
    _format: property = attrib.MetaData(classinfo=str, default=None)
    _colnames: property = attrib.MetaData(classinfo=list, default=None)
    _rownames: property = attrib.MetaData(classinfo=list, default=None)
    _namecol: property = attrib.MetaData(classinfo=int, default=None)

    #
    # Events
    #

    def __init__(self,
                 file: FileRef,
                 mode: str = '',
                 comment: OptStr = None,
                 delim: OptStr = None,
                 csvformat: OptInt = None,
                 labels: OptStrList = None,
                 usecols: OptIntTuple = None,
                 namecol: OptInt = None) -> None:
        """Initialize instance attributes."""
        super().__init__()
        self._file = file
        self._comment = comment
        self._delim = delim
        self._csvformat = csvformat
        self._colnames = labels
        self._namecol = namecol

    #
    # Public Methods
    #

    def select(self, columns: OptStrTuple = None) -> OptNpArray:
        """Load numpy ndarray from CSV-file.

        Args:
            columns: List of column labels in CSV-file. By default the list of
                column labels is taken from the first content line in the
                CSV-file.

        Returns:
            :class:`numpy.ndarray` containing data from CSV-file, or None if
            the data could not be imported.

        """
        # Check type of 'cols'
        check.has_opt_type("'columns'", columns, tuple)

        # Get column names and formats
        usecols = self._get_usecols(columns)
        colnames = self._get_colnames()
        names = tuple(colnames[colid] for colid in usecols)
        lblcol = self._get_namecol()
        if lblcol is None:
            formats = tuple(['<f8'] * len(usecols))
        elif lblcol not in usecols:
            formats = tuple(['<U12'] + ['<f8'] * len(usecols))
            names = ('label', ) + names
            usecols = (lblcol, ) + usecols
        else:
            lbllbl = colnames[lblcol]
            formats = tuple(['<U12'] + ['<f8'] * (len(usecols) - 1))
            names = tuple(['label'] + [l for l in names if l != lbllbl])
            usecols = tuple([lblcol] + [c for c in usecols if c != lblcol])

        # Import data from CSV-file as numpy array
        with textfile.openx(self._file, mode='r') as fh:
            return np.loadtxt(fh,
                              skiprows=self._get_skiprows(),
                              delimiter=self._get_delim(),
                              usecols=usecols,
                              dtype={
                                  'names': names,
                                  'formats': formats
                              })

    @contextmanager
    def open(self,
             mode: str = '',
             columns: OptStrTuple = None) -> IterCSVIOBase:
        """Open CSV-file in reading or writing mode.

        Args:
            mode: String, which characters specify the mode in which the file is
                to be opened. The default mode is reading mode. Supported
                characters are:
                'r': Reading mode (default)
                'w': Writing mode
            columns:

        Yields:
            :term:`File object`, that supports the given mode.

        """
        # Open file handler
        fh: CSVIOBase
        if 'w' in mode:
            if 'r' in mode:
                raise ValueError(
                    "'mode' is not allowed to contain characters 'r' AND 'w'")
            fh = self._open_write()
        else:
            fh = self._open_read(columns)

        try:
            yield fh
        finally:
            fh.close()

    def read(self) -> List[tuple]:
        with self.open(mode='r') as fp:
            content = [row for row in fp]
        return content

    def write(self, rows: List[Iterable]) -> None:
        with self.open(mode='w') as fp:
            for row in rows:
                fp.write_row(row)

    #
    # Protected Methods
    #

    def _get_comment(self) -> str:
        # Return comment if set manually
        if self._comment is not None:
            return self._comment
        return textfile.get_comment(self._file)

    def _get_delim(self) -> OptStr:
        # Return delimiter if set manually
        if self._delim is not None:
            return self._delim

        # Initialize CSV-Sniffer with default values
        sniffer = csv.Sniffer()
        sniffer.preferred = self._delim_candidates
        delim: OptStr = None

        # Detect delimiter
        with textfile.openx(self._file, mode='r') as fd:
            size, probe = 0, ''
            for line in fd:
                # Check termination criteria
                if size > self._delim_maxcount:
                    break
                # Check exclusion criteria
                strip = line.strip()
                if not strip or strip.startswith('#'):
                    continue
                # Increase probe size
                probe += line
                size += 1
                if size <= self._delim_mincount:
                    continue
                # Try to detect delimiter from probe using csv.Sniffer
                try:
                    dialect = sniffer.sniff(probe)
                except csv.Error:
                    continue
                delim = dialect.delimiter
                break

        return delim

    def _get_format(self) -> OptInt:
        # Return value if set manually
        if self._csvformat is not None:
            return self._csvformat

        # Get first and second content lines (non comment, non empty) of
        # CSV-file
        lines = textfile.get_content(self._file, lines=2)
        if len(lines) != 2:
            return None

        # Determine column label format
        delim = self.delim
        if lines[0].count(delim) == lines[1].count(delim):
            return CSV_FORMAT_STANDARD
        if lines[0].count(delim) == lines[1].count(delim) - 1:
            return CSV_FORMAT_RTABLE
        return None

    def _get_colnames(self) -> StrList:
        # Return value if set manually
        if self._colnames is not None:
            return self._colnames

        # Get first content line (non comment, non empty) of CSV-file
        line = textfile.get_content(self._file, lines=1)[0]

        # Get column names from first content line
        names = [col.strip('\"\'\n\r\t ') for col in line.split(self.delim)]

        # Format column labels
        if self.format == CSV_FORMAT_STANDARD:
            return names
        if self.format == CSV_FORMAT_RTABLE:
            return [''] + names
        raise BadCSVFile(f"file {self._file.name} is not valid")

    def _get_fields(self) -> Fields:
        colnames = self.colnames
        delim = self.delim
        lines = textfile.get_content(self._file, lines=3)
        if len(lines) != 3:
            return []
        row1 = lines[1].split(delim)
        row2 = lines[2].split(delim)
        fields = []
        for colname, str1, str2 in zip(colnames, row1, row2):
            type1 = literal.estimate(str1)
            if type1:
                type2 = literal.estimate(str1)
                if type2 == type1:
                    fields.append((colname, type1))
                    continue
            fields.append((colname, str))
        return fields

    def _get_rownames(self) -> OptList:
        # Check type of 'cols'
        lblcol = self._get_namecol()
        if lblcol is None:
            return None
        lbllbl = self.colnames[lblcol]

        # Import CSV-file to NumPy ndarray
        with textfile.openx(self._file, mode='r') as fh:
            rownames = np.loadtxt(fh,
                                  skiprows=self._get_skiprows(),
                                  delimiter=self._get_delim(),
                                  usecols=(lblcol, ),
                                  dtype={
                                      'names': (lbllbl, ),
                                      'formats': ('<U12', )
                                  })
        return [name[0] for name in rownames.flat]

    def _get_skiprows(self) -> int:
        # Count how many 'comment' and 'blank' rows are to be skipped
        skiprows = 1
        with textfile.openx(self._file, mode='r') as fd:
            for line in fd:
                strip = line.strip()
                if not strip or strip.startswith('#'):
                    skiprows += 1
                    continue
                break
        return skiprows

    def _get_namecol(self) -> OptInt:
        # Return value if set manually
        if self._namecol is not None:
            return self._namecol

        # In R-tables the first column is always used for record names
        if self.format == CSV_FORMAT_RTABLE:
            return 0

        # Get first and second content lines (non comment, non empty) of
        # CSV-file
        lines = textfile.get_content(self._file, lines=2)
        if len(lines) != 2:
            return None

        # Determine annotation column id from first value in the second line,
        # which can not be converted to a float
        values = [col.strip('\"\' \n') for col in lines[1].split(self.delim)]
        for cid, val in enumerate(values):
            try:
                float(val)
            except ValueError:
                return cid
        return None

    def _get_usecols(self, columns: OptStrTuple = None) -> IntTuple:
        # Get column labels
        colnames = self._get_colnames()
        if not columns:
            return tuple(range(len(colnames)))
        # Check if columns exist
        check.is_subset("'columns'", set(columns), 'colnames', set(colnames))
        return tuple(colnames.index(col) for col in columns)

    def _get_fmt_params(self) -> StrDict:
        return {'delimiter': self.delim}

    def _open_read(self, columns: OptStrTuple = None) -> CSVReader:
        usecols = self._get_usecols(columns)
        skiprows = self._get_skiprows()
        fields = self.fields
        usefields = [fields[colid] for colid in usecols]
        fmt = self._get_fmt_params()
        return CSVReader(self._file,
                         skiprows=skiprows,
                         usecols=usecols,
                         fields=usefields,
                         **fmt)

    def _open_write(self, columns: OptStrTuple = None) -> CSVWriter:
        fmt = self._get_fmt_params()
        return CSVWriter(self._file,
                         header=self.colnames,
                         comment=self.comment,
                         **fmt)
Example #6
0
class Session(attrib.Container):
    """Session."""

    #
    # Private Class Variables
    #

    _config_file_path: ClassVar[str] = '%user_config_dir%/nemoa.ini'
    _config_file_struct: ClassVar[SecDict] = {
        'session': {
            'path': Path,
            'restore_on_startup': bool,
            'autosave_on_exit': bool
        }
    }
    _default_config: ClassVar[StrDict] = {
        'path': None,
        'restore_on_startup': False,
        'autosave_on_exit': False
    }
    _default_paths: StrList = [
        '%user_data_dir%', '%site_data_dir%', '%package_data_dir%'
    ]

    #
    # Public Attributes and Attribute Groups
    #

    dc: attrib.Group = attrib.create_group(attrib.DCGroup, remote=True)

    config: property = attrib.MetaData(classinfo=dict)
    config.__doc__ = """Session configuration."""

    paths: property = attrib.MetaData(classinfo=list)
    paths.__doc__ = """Search paths for workspaces."""

    files: property = attrib.Virtual(fget='_get_files')
    files.__doc__ = """Files within the current workspace."""

    folders: property = attrib.Virtual(fget='_get_folders')
    folders.__doc__ = """Folders within the current workspace."""

    path: property = attrib.Virtual(fget='_get_path')
    path.__doc__ = """Filepath of the current workspace."""

    logger: property = attrib.Temporary(classinfo=log.Logger)
    logger.__doc__ = """Logger instance."""

    #
    # Protected Attributes
    #

    _ws: property = attrib.Content(classinfo=wsfile.WsFile)

    #
    # Events
    #

    def __init__(self,
                 workspace: OptPathLike = None,
                 basedir: OptPathLike = None,
                 pwd: OptBytes = None) -> None:
        """Initialize instance variables and load workspace from file."""
        super().__init__()

        # Initialize instance variables with default values
        self.config = self._default_config.copy()
        self._ws = wsfile.WsFile()
        self.paths = [env.expand(path) for path in self._default_paths]
        self.logger = log.get_instance()

        # Bind session to workspace
        self.parent = self._ws

        # Load session configuration from file
        if env.is_file(self._config_file_path):
            self._load_config()

        # Load workspace from file
        filepath: OptPath = None
        if workspace and isinstance(workspace, (Path, str)):
            filepath = Path(workspace)
        elif self.config.get('restore_on_startup'):
            cfg_path = self.config.get('path')
            if isinstance(cfg_path, (Path, str)):
                filepath = Path(cfg_path)
        if isinstance(filepath, Path):
            self.load(workspace=filepath, basedir=basedir, pwd=pwd)

    def __enter__(self) -> 'Session':
        """Enter with statement."""
        return self

    def __exit__(self, cls: ExcType, obj: Exc, tb: Traceback) -> None:
        """Exit with statement."""
        self.close()  # Close Workspace
        self._save_config()  # Save config

    def __del__(self) -> None:
        """Run destructor for instance."""

    #
    # Public Methods
    #

    def load(self,
             workspace: OptPathLike = None,
             basedir: OptPathLike = None,
             pwd: OptBytes = None) -> None:
        """Load Workspace from file.

        Args:
            workspace:
            basedir:
            pwd: Bytes representing password of workspace file.

        """
        path = self._locate_path(workspace=workspace, basedir=basedir)
        self._ws = wsfile.WsFile(filepath=path, pwd=pwd)
        self.parent = self._ws

    def save(self) -> None:
        """Save Workspace to current file."""
        self._ws.save()

    def saveas(self, filepath: PathLike) -> None:
        """Save the workspace to a file.

        Args:
            filepath: String or :term:`path-like object`, that represents the
                name of a workspace file.

        """
        self._ws.saveas(filepath)

    def close(self) -> None:
        """Close current session."""
        if self.config.get('autosave_on_exit') and self._ws.changed:
            self.save()
        if hasattr(self._ws, 'close'):
            self._ws.close()

    def get_file_accessor(self, path: PathLike) -> FileAccessorBase:
        """Get file accessor to workspace member.

        Args:
            path: String or :term:`path-like object`, that represents a
                workspace member. In reading mode the path has to point to a
                valid workspace file, or a FileNotFoundError is raised. In
                writing mode the path by default is treated as a file path. New
                directories can be written by setting the argument is_dir to
                True.

        Returns:
            :class:`File accessor <nemoa.types.FileAccessorBase>` to workspace
            member.

        """
        return self._ws.get_file_accessor(path)

    def open(self,
             filepath: PathLike,
             workspace: OptPathLike = None,
             basedir: OptPathLike = None,
             pwd: OptBytes = None,
             mode: str = '',
             encoding: OptStr = None,
             is_dir: bool = False) -> FileLike:
        """Open file within current or given workspace.

        Args:
            filepath: String or :term:`path-like object`, that represents a
                workspace member. In reading mode the path has to point to a
                valid workspace file, or a FileNotFoundError is raised. In
                writing mode the path by default is treated as a file path. New
                directories can be written by setting the argument is_dir to
                True.
            workspace:
            basedir:
            mode: String, which characters specify the mode in which the file is
                to be opened. The default mode is reading in text mode. Suported
                characters are:
                'r': Reading mode (default)
                'w': Writing mode
                'b': Binary mode
                't': Text mode (default)
            encoding: In binary mode encoding has not effect. In text mode
                encoding specifies the name of the encoding, which in reading
                and writing mode respectively is used to decode the stream’s
                bytes into strings, and to encode strings into bytes. By default
                the preferred encoding of the operating system is used.
            is_dir: Boolean value which determines, if the path is to be treated
                as a directory or not. This information is required for writing
                directories to the workspace. The default behaviour is not to
                treat paths as directories.

        Returns:
            Context manager for :term:`file object` in reading or writing mode.

        """
        if workspace:
            path = self._locate_path(workspace=workspace, basedir=basedir)
            ws = wsfile.WsFile(filepath=path, pwd=pwd)
            return ws.open(filepath,
                           mode=mode,
                           encoding=encoding,
                           is_dir=is_dir)
        return self._ws.open(filepath,
                             mode=mode,
                             encoding=encoding,
                             is_dir=is_dir)

    def append(self, source: PathLike, target: OptPathLike = None) -> bool:
        """Append file to the current workspace.

        Args:
            source: String or :term:`path-like object`, that points to a valid
                file in the directory structure if the system. If the file does
                not exist, a FileNotFoundError is raised. If the filepath points
                to a directory, a IsADirectoryError is raised.
            target: String or :term:`path-like object`, that points to a valid
                directory in the directory structure of the workspace. By
                default the root directory is used. If the directory does not
                exist, a FileNotFoundError is raised. If the target directory
                already contains a file, which name equals the filename of the
                source, a FileExistsError is raised.

        Returns:
            Boolean value which is True if the file has been appended.

        """
        return self._ws.append(source, target=target)

    def unlink(self, filepath: PathLike, ignore_missing: bool = True) -> bool:
        """Remove file from the current workspace.

        Args:
            filepath: String or :term:`path-like object`, that points to a file
                in the directory structure of the workspace. If the filapath
                points to a directory, an IsADirectoryError is raised. For the
                case, that the file does not exist, the argument ignore_missing
                determines, if a FileNotFoundError is raised.
            ignore_missing: Boolean value which determines, if FileNotFoundError
                is raised, if the target file does not exist. The default
                behaviour, is to ignore missing files.

        Returns:
            Boolean value, which is True if the given file was removed.

        """
        return self._ws.unlink(filepath, ignore_missing=ignore_missing)

    def mkdir(self, dirpath: PathLike, ignore_exists: bool = False) -> bool:
        """Create a new directory in current workspace.

        Args:
            dirpath: String or :term:`path-like object`, that represents a valid
                directory name in the directory structure of the workspace. If
                the directory already exists, the argument ignore_exists
                determines, if a FileExistsError is raised.
            ignore_exists: Boolean value which determines, if FileExistsError is
                raised, if the target directory already exists. The default
                behaviour is to raise an error, if the file already exists.

        Returns:
            Boolean value, which is True if the given directory was created.

        """
        return self._ws.mkdir(dirpath, ignore_exists=ignore_exists)

    def rmdir(self,
              dirpath: PathLike,
              recursive: bool = False,
              ignore_missing: bool = False) -> bool:
        """Remove directory from current workspace.

        Args:
            dirpath: String or :term:`path-like object`, that points to a
                directory in the directory structure of the workspace. If the
                directory does not exist, the argument ignore_missing
                determines, if a FileNotFoundError is raised.
            ignore_missing: Boolean value which determines, if FileNotFoundError
                is raised, if the target directory does not exist. The default
                behaviour, is to raise an error if the directory is missing.
            recursive: Boolean value which determines, if directories are
                removed recursively. If recursive is False, then only empty
                directories can be removed. If recursive, however, is True, then
                all files and subdirectories are alse removed. By default
                recursive is False.

        Returns:
            Boolean value, which is True if the given directory was removed.

        """
        return self._ws.rmdir(dirpath,
                              recursive=recursive,
                              ignore_missing=ignore_missing)

    def search(self, pattern: OptStr = None) -> StrList:
        """Search for files in the current workspace.

        Args:
            pattern: Search pattern that contains Unix shell-style wildcards:
                '*': Matches arbitrary strings
                '?': Matches single characters
                [seq]: Matches any character in seq
                [!seq]: Matches any character not in seq
                By default a list of all files and directories is returned.

        Returns:
            List of files and directories in the directory structure of the
            workspace, that match the search pattern.

        """
        return self._ws.search(pattern)

    def copy(self, source: PathLike, target: PathLike) -> bool:
        """Copy file within current workspace.

        Args:
            source: String or :term:`path-like object`, that points to a file in
                the directory structure of the workspace. If the file does not
                exist, a FileNotFoundError is raised. If the filepath points to
                a directory, an IsADirectoryError is raised.
            target: String or :term:`path-like object`, that points to a new
                filename or an existing directory in the directory structure of
                the workspace. If the target is a directory the target file
                consists of the directory and the basename of the source file.
                If the target file already exists a FileExistsError is raised.

        Returns:
            Boolean value which is True if the file was copied.

        """
        return self._ws.copy(source, target)

    def move(self, source: PathLike, target: PathLike) -> bool:
        """Move file within current workspace.

        Args:
            source: String or :term:`path-like object`, that points to a file in
                the directory structure of the workspace. If the file does not
                exist, a FileNotFoundError is raised. If the filepath points to
                a directory, an IsADirectoryError is raised.
            target: String or :term:`path-like object`, that points to a new
                filename or an existing directory in the directory structure of
                the workspace. If the target is a directory the target file
                consists of the directory and the basename of the source file.
                If the target file already exists a FileExistsError is raised.

        Returns:
            Boolean value which is True if the file has been moved.

        """
        return self._ws.move(source, target)

    def read_text(self, filepath: PathLike, encoding: OptStr = None) -> str:
        """Read text from file in current workspace.

        Args:
            filepath: String or :term:`path-like object`, that points to a valid
                file in the directory structure of the workspace. If the file
                does not exist a FileNotFoundError is raised.
            encoding: Specifies the name of the encoding, which is used to
                decode the stream’s bytes into strings. By default the preferred
                encoding of the operating system is used.

        Returns:
            Contents of the given filepath encoded as string.

        """
        return self._ws.read_text(filepath, encoding=encoding)

    def read_bytes(self, filepath: PathLike) -> bytes:
        """Read bytes from file in current workspace.

        Args:
            filepath: String or :term:`path-like object`, that points to a valid
                file in the dirctory structure of the workspace. If the file
                does not exist a FileNotFoundError is raised.

        Returns:
            Contents of the given filepath as bytes.

        """
        return self._ws.read_bytes(filepath)

    def write_text(self,
                   text: str,
                   filepath: PathLike,
                   encoding: OptStr = None) -> int:
        """Write text to file.

        Args:
            text: String, which has to be written to the given file.
            filepath: String or :term:`path-like object`, that represents a
                valid filename in the dirctory structure of the workspace.
            encoding: Specifies the name of the encoding, which is used to
                encode strings into bytes. By default the preferred encoding of
                the operating system is used.

        Returns:
            Number of characters, that are written to the file.

        """
        return self._ws.write_text(text, filepath, encoding=encoding)

    def write_bytes(self, data: BytesLike, filepath: PathLike) -> int:
        """Write bytes to file.

        Args:
            data: Bytes, which are to be written to the given file.
            filepath: String or :term:`path-like object`, that represents a
                valid filename in the dirctory structure of the workspace.

        Returns:
            Number of bytes, that are written to the file.

        """
        return self._ws.write_bytes(data, filepath)

    def log(self, level: StrOrInt, msg: str, *args: Any, **kwds: Any) -> None:
        """Log event.

        Args:
            level: Integer value or string, which describes the severity of the
                event. Ordered by ascending severity, the allowed level names
                are: 'DEBUG', 'INFO', 'WARNING', 'ERROR' and 'CRITICAL'. The
                respectively corresponding level numbers are 10, 20, 30, 40 and
                50.
            msg: Message ``format string``_, which may can contain literal text
                or replacement fields delimited by braces. Each replacement
                field contains either the numeric index of a positional
                argument, given by *args, or the name of a keyword argument,
                given by the keyword *extra*.
            *args: Arguments, which can be used by the message format string.
            **kwds: Additional Keywords, used by the function `Logger.log()`_.

        """
        self.logger.log(level, msg, *args, **kwds)

    #
    # Private Methods
    #

    def _load_config(self) -> None:
        config = inifile.load(self._config_file_path, self._config_file_struct)
        if 'session' in config and isinstance(config['session'], dict):
            for key, val in config['session'].items():
                self.config[key] = val

    def _save_config(self) -> None:
        config = {'session': self.config}
        inifile.save(config, self._config_file_path)

    def _get_path(self) -> OptPath:
        return self._ws.path

    def _get_files(self) -> StrList:
        return self._ws.search()

    def _get_folders(self) -> StrList:
        return self._ws.folders

    def _locate_path(self,
                     workspace: OptPathLike = None,
                     basedir: OptPathLike = None) -> OptPath:
        if not workspace:
            return None
        if not basedir:
            # If workspace is a fully qualified file path in the directory
            # structure of the system, ignore the 'paths' list
            if env.is_file(workspace):
                return env.expand(workspace)
            # Use the 'paths' list to find a workspace
            for path in self.paths:
                candidate = Path(path, workspace)
                if candidate.is_file():
                    return candidate
            raise FileNotFoundError(f"file {workspace} does not exist")
        return Path(basedir, workspace)
Example #7
0
class WsFile(attrib.Container):
    """Workspace File.

    Workspace files are Zip-Archives, that contain a INI-formatted
    configuration file 'workspace.ini' in the archives root, and arbitrary
    resource files within subfolders.

    Args:
        filepath: String or :term:`path-like object`, that points to a valid
            workspace file or None. If the filepath points to a valid workspace
            file, then the class instance is initialized with a memory copy of
            the file. If the given file, however, does not exist, isn't a valid
            ZipFile, or does not contain a workspace configuration, respectively
            one of the errors FileNotFoundError, BadZipFile or BadWsFile is
            raised. The default behaviour, if the filepath is None, is to create
            an empty workspace in the memory, that uses the default folders
            layout. In this case the attribute maintainer is initialized with
            the current username.
        pwd: Bytes representing password of workspace file.

    """

    #
    # Protected Class Variables
    #

    _config_file: ClassVar[Path] = Path('workspace.ini')
    _default_config: ClassVar[ConfigDict] = {
        'dc': {
            'creator': env.get_username(),
            'date': datetime.datetime.now()}}
    _default_dir_layout: ClassVar[StrList] = [
        'dataset', 'network', 'system', 'model', 'script']
    _default_encoding = env.get_encoding()

    #
    # Public Attributes and Attribute Groups
    #

    dc: attrib.Group = attrib.create_group(attrib.DCGroup)

    startup: property = attrib.MetaData(classinfo=Path, category='hooks')
    startup.__doc__ = """
    The startup script is a path, that points to a python script inside the
    workspace, which is executed after loading the workspace.
    """

    path: property = attrib.Virtual(fget='_get_path')
    path.__doc__ = """Filepath of the workspace."""

    name: property = attrib.Virtual(fget='_get_name')
    name.__doc__ = """Filename of the workspace without file extension."""

    files: property = attrib.Virtual(fget='search')
    files.__doc__ = """List of all files within the workspace."""

    folders: property = attrib.Virtual(fget='_get_folders')
    folders.__doc__ = """List of all folders within the workspace."""

    changed: property = attrib.Virtual(fget='_get_changed')
    changed.__doc__ = """Tells whether the workspace file has been changed."""

    #
    # Protected Attributes
    #

    _file: property = attrib.Content(classinfo=ZipFile)
    _buffer: property = attrib.Content(classinfo=BytesIOBaseClass)
    _path: property = attrib.Temporary(classinfo=Path)
    _pwd: property = attrib.Temporary(classinfo=bytes)
    _changed: property = attrib.Temporary(classinfo=bool, default=False)

    #
    # Events
    #

    def __init__(
            self, filepath: OptPathLike = None, pwd: OptBytes = None,
            parent: Optional[attrib.Container] = None) -> None:
        """Load Workspace from file."""
        super().__init__()
        if filepath:
            self.load(filepath, pwd=pwd)
        else:
            self._create_new()

    def __enter__(self) -> 'WsFile':
        """Enter with statement."""
        return self

    def __exit__(self, cls: ExcType, obj: Exc, tb: Traceback) -> None:
        """Close workspace file and buffer."""
        self.close()

    #
    # Public Methods
    #

    def load(self, filepath: PathLike, pwd: OptBytes = None) -> None:
        """Load Workspace from file.

        Args:
            filepath: String or :term:`path-like object`, that points to a valid
                workspace file. If the filepath points to a valid workspace
                file, then the class instance is initialized with a memory copy
                of the file. If the given file, however, does not exist, isn't a
                valid ZipFile, or does not contain a workspace configuration,
                respectively one of the errors FileNotFoundError, BadZipFile or
                BadWsFile is raised.
            pwd: Bytes representing password of workspace file.

        """
        # Initialize instance Variables, Buffer and buffered ZipFile
        self._changed = False
        self._path = env.expand(filepath)
        self._pwd = pwd
        self._buffer = BytesIO()
        self._file = ZipFile(self._buffer, mode='w')

        # Copy contents from ZipFile to buffered ZipFile
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", UserWarning)
            try:
                with ZipFile(self.path, mode='r') as fh:
                    for zinfo in fh.infolist():
                        data = fh.read(zinfo, pwd=pwd)
                        # TODO ([email protected]): The zipfile standard
                        # module currently does not support encryption in write
                        # mode of new ZipFiles. See:
                        # https://docs.python.org/3/library/zipfile.html
                        # When support is provided, the below line for writing
                        # files shall be replaced by:
                        # self._file.writestr(zinfo, data, pwd=pwd)
                        self._file.writestr(zinfo, data)
            except FileNotFoundError as err:
                raise FileNotFoundError(
                    f"file '{self.path}' does not exist") from err
            except BadZipFile as err:
                raise BadZipFile(
                    f"file '{self.path}' is not a valid ZIP file") from err

        # Try to open and load workspace configuration from buffer
        structure = {
            'dc': self._get_attr_types(group='dc'),
            'hooks': self._get_attr_types(category='hooks')}
        try:
            with self.open(self._config_file) as file:
                cfg = inifile.load(file, structure=structure)
        except KeyError as err:
            raise BadWsFile(
                f"workspace '{self.path}' is not valid: "
                f"file '{self._config_file}' could not be loaded") from err

        # Link configuration
        self._set_attr_values(cfg.get('dc', {}), group='dc') # type: ignore

    def save(self) -> None:
        """Save the workspace to it's filepath."""
        if isinstance(self.path, Path):
            self.saveas(self.path)
        else:
            raise FileNotGivenError(
                "use saveas() to save the workspace to a file")

    def saveas(self, filepath: PathLike) -> None:
        """Save the workspace to a file.

        Args:
            filepath: String or :term:`path-like object`, that represents the
                name of a workspace file.

        """
        path = env.expand(filepath)

        # Update datetime
        self.date = datetime.datetime.now()

        # Update 'workspace.ini'
        with self.open(self._config_file, mode='w') as file:
            inifile.save({
                'dc': self._get_attr_values(group='dc'),
                'hooks': self._get_attr_values(category='hooks')}, file)

        # Remove duplicates from workspace
        self._remove_duplicates()

        # Mark plattform, which created the files as Windows
        # to avoid inference of wrong Unix permissions
        for zinfo in self._file.infolist():
            zinfo.create_system = 0

        # Close ZipArchive (to allow to read the buffer)
        self._file.close()

        # Read buffer and write workspace file
        if not isinstance(self._buffer, BytesIO):
            raise TypeError("buffer has not been initialized")
        with open(path, 'wb') as file:
            file.write(self._buffer.getvalue())

        # Close buffer
        self._buffer.close()

        # Reload saved workpace from file
        self.load(path, pwd=self._pwd)

    def get_file_accessor(self, path: PathLike) -> FileAccessorBase:
        """Get file accessor to workspace member.

        Args:
            path: String or :term:`path-like object`, that represents a
                workspace member. In reading mode the path has to point to a
                valid workspace file, or a FileNotFoundError is raised. In
                writing mode the path by default is treated as a file path. New
                directories can be written by setting the argument is_dir to
                True.

        Returns:
            :class:`File accessor <nemoa.types.FileAccessorBase>` to workspace
            member.

        """
        def wrap_open(path: PathLike) -> AnyFunc:
            def wrapped_open(
                    obj: FileAccessorBase, *args: Any, **kwds: Any) -> FileLike:
                return self.open(path, *args, **kwds)
            return wrapped_open

        return type( # pylint: disable=E0110
            'FileAccessor', (FileAccessorBase,), {
            'name': str(path),
            'open': wrap_open(path)})()

    def open(
            self, path: PathLike, mode: str = 'r', encoding: OptStr = None,
            is_dir: bool = False) -> FileLike:
        """Open file within the workspace.

        Args:
            path: String or :term:`path-like object`, that represents a
                workspace member. In reading mode the path has to point to a
                valid workspace file, or a FileNotFoundError is raised. In
                writing mode the path by default is treated as a file path. New
                directories can be written by setting the argument is_dir to
                True.
            mode: String, which characters specify the mode in which the file is
                to be opened. The default mode is reading in text mode. Suported
                characters are:
                'r': Reading mode (default)
                'w': Writing mode
                'b': Binary mode
                't': Text mode (default)
            encoding: In binary mode encoding has not effect. In text mode
                encoding specifies the name of the encoding, which in reading
                and writing mode respectively is used to decode the stream’s
                bytes into strings, and to encode strings into bytes. By default
                the preferred encoding of the operating system is used.
            is_dir: Boolean value which determines, if the path is to be treated
                as a directory or not. This information is required for writing
                directories to the workspace. The default behaviour is not to
                treat paths as directories.

        Returns:
            :term:`File object` in reading or writing mode.

        Examples:
            >>> with self.open('workspace.ini') as file:
            >>>     print(file.read())

        """
        # Open file handler to workspace member
        if 'w' in mode:
            if 'r' in mode:
                raise ValueError(
                    "'mode' is not allowed to contain the "
                    "characters 'r' AND 'w'")
            file = self._open_write(path, is_dir=is_dir)
        else:
            file = self._open_read(path)

        # Wrap binary files to text files if required
        if 'b' in mode:
            if 't' in mode:
                raise ValueError(
                    "'mode' is not allowed to contain the "
                    "characters 'b' AND 't'")
            return file
        return TextIOWrapper(
            file, encoding=encoding or self._default_encoding,
            write_through=True)

    def close(self) -> None:
        """Close current workspace and buffer."""
        if hasattr(self._file, 'close'):
            self._file.close()
        if hasattr(self._buffer, 'close'):
            self._buffer.close()

    def copy(self, source: PathLike, target: PathLike) -> bool:
        """Copy file within workspace.

        Args:
            source: String or :term:`path-like object`, that points to a file in
                the directory structure of the workspace. If the file does not
                exist, a FileNotFoundError is raised. If the filepath points to
                a directory, an IsADirectoryError is raised.
            target: String or :term:`path-like object`, that points to a new
                filename or an existing directory in the directory structure of
                the workspace. If the target is a directory the target file
                consists of the directory and the basename of the source file.
                If the target file already exists a FileExistsError is raised.

        Returns:
            Boolean value which is True if the file was copied.

        """
        # Check if source file exists and is not a directory
        src_file = PurePath(source).as_posix()
        src_infos = self._locate(source)
        if not src_infos:
            raise FileNotFoundError(
                f"workspace file '{src_file}' does not exist")
        src_info = src_infos[-1]
        if getattr(src_info, 'is_dir')():
            raise IsADirectoryError(
                f"'{src_file}/' is a directory not a file")

        # If target is a directory get name of target file from
        # source filename
        tgt_file = PurePath(target).as_posix()
        if tgt_file == '.':
            tgt_file = Path(src_file).name
        else:
            tgt_infos = self._locate(target)
            if tgt_infos:
                if getattr(tgt_infos[-1], 'is_dir')():
                    tgt_path = PurePath(tgt_file, Path(src_file).name)
                    tgt_file = tgt_path.as_posix()

        # Check if target file already exists
        if self._locate(tgt_file):
            raise FileExistsError(
                f"workspace file '{tgt_file}' already exist.")

        # Read binary data from source file
        data = self._file.read(src_info, pwd=self._pwd)

        # Create ZipInfo for target file from source file info
        tgt_time = getattr(src_info, 'date_time')
        tgt_info = ZipInfo(filename=tgt_file, date_time=tgt_time) # type: ignore

        # Write binary data to target file
        # TODO ([email protected]): The zipfile standard module currently
        # does not support encryption in write mode. See:
        # https://docs.python.org/3/library/zipfile.html
        # When support is provided, the below line shall be replaced by:
        # self._file.writestr(tgt_info, data, pwd=self._pwd)
        self._file.writestr(tgt_info, data)
        self._changed = True

        # Check if new file exists
        return bool(self._locate(tgt_file))

    def move(self, source: PathLike, target: PathLike) -> bool:
        """Move file within workspace.

        Args:
            source: String or :term:`path-like object`, that points to a file in
                the directory structure of the workspace. If the file does not
                exist, a FileNotFoundError is raised. If the filepath points to
                a directory, an IsADirectoryError is raised.
            target: String or :term:`path-like object`, that points to a new
                filename or an existing directory in the directory structure of
                the workspace. If the target is a directory the target file
                consists of the directory and the basename of the source file.
                If the target file already exists a FileExistsError is raised.

        Returns:
            Boolean value which is True if the file has been moved.

        """
        # Copy source file to target file or directory
        # and on success remove source file
        return self.copy(source, target) and self.unlink(source)

    def append(self, source: PathLike, target: OptPathLike = None) -> bool:
        """Append file to the workspace.

        Args:
            source: String or :term:`path-like object`, that points to a valid
                file in the directory structure if the system. If the file does
                not exist, a FileNotFoundError is raised. If the filepath points
                to a directory, a IsADirectoryError is raised.
            target: String or :term:`path-like object`, that points to a valid
                directory in the directory structure of the workspace. By
                default the root directory is used. If the directory does not
                exist, a FileNotFoundError is raised. If the target directory
                already contains a file, which name equals the filename of the
                source, a FileExistsError is raised.

        Returns:
            Boolean value which is True if the file has been appended.

        """
        # Check source file
        src_file = env.expand(source)
        if not src_file.exists():
            raise FileNotFoundError(f"file '{src_file}' does not exist")
        if src_file.is_dir():
            raise IsADirectoryError(f"'{src_file}' is a directory not a file")

        # Check target directory
        if target:
            tgt_dir = PurePath(target).as_posix() + '/'
            if not self._locate(tgt_dir):
                raise FileNotFoundError(
                    f"workspace directory '{tgt_dir}' does not exist")
        else:
            tgt_dir = '.'
        tgt_file = Path(tgt_dir, src_file.name)
        if self._locate(tgt_file):
            raise FileExistsError(
                f"workspace directory '{tgt_dir}' already contains a file "
                f"with name '{src_file.name}'")

        # Create ZipInfo entry from source file
        filename = PurePath(tgt_file).as_posix()
        date_time = time.localtime(src_file.stat().st_mtime)[:6]
        zinfo = ZipInfo(filename=filename, date_time=date_time) # type: ignore

        # Copy file to archive
        with src_file.open('rb') as src:
            data = src.read()
        # TODO ([email protected]): The zipfile standard module currently
        # does not support encryption in write mode. See:
        # https://docs.python.org/3/library/zipfile.html
        # When support is provided, the below line shall be replaced by:
        # self._file.writestr(zinfo, data, pwd=pwd)
        self._file.writestr(zinfo, data)

        return True

    def read_text(self, filepath: PathLike, encoding: OptStr = None) -> str:
        """Read text from file.

        Args:
            filepath: String or :term:`path-like object`, that points to a valid
                file in the directory structure of the workspace. If the file
                does not exist a FileNotFoundError is raised.
            encoding: Specifies the name of the encoding, which is used to
                decode the stream’s bytes into strings. By default the preferred
                encoding of the operating system is used.

        Returns:
            Contents of the given filepath encoded as string.

        """
        with self.open(filepath, mode='r', encoding=encoding) as file:
            text = file.read()
        if not isinstance(text, str):
            return ''
        return text

    def read_bytes(self, filepath: PathLike) -> bytes:
        """Read bytes from file.

        Args:
            filepath: String or :term:`path-like object`, that points to a valid
                file in the dirctory structure of the workspace. If the file
                does not exist a FileNotFoundError is raised.

        Returns:
            Contents of the given filepath as bytes.

        """
        with self.open(filepath, mode='rb') as file:
            blob = file.read()
        if not isinstance(blob, bytes):
            return b''
        return blob

    def write_text(
            self, text: str, filepath: PathLike,
            encoding: OptStr = None) -> int:
        """Write text to file.

        Args:
            text: String, which has to be written to the given file.
            filepath: String or :term:`path-like object`, that represents a
                valid filename in the dirctory structure of the workspace.
            encoding: Specifies the name of the encoding, which is used to
                encode strings into bytes. By default the preferred encoding of
                the operating system is used.

        Returns:
            Number of characters, that are written to the file.

        """
        with self.open(filepath, mode='w', encoding=encoding) as file:
            if isinstance(file, TextIOBaseClass):
                return file.write(text)
        return 0

    def write_bytes(self, blob: BytesLike, filepath: PathLike) -> int:
        """Write bytes to file.

        Args:
            blob: Bytes, which are to be written to the given file.
            filepath: String or :term:`path-like object`, that represents a
                valid filename in the dirctory structure of the workspace.

        Returns:
            Number of bytes, that are written to the file.

        """
        with self.open(filepath, mode='wb') as file:
            if isinstance(file, BytesIOBaseClass):
                return file.write(blob)
        return 0

    def unlink(self, filepath: PathLike, ignore_missing: bool = True) -> bool:
        """Remove file from workspace.

        Args:
            filepath: String or :term:`path-like object`, that points to a file
                in the directory structure of the workspace. If the filepath
                points to a directory, an IsADirectoryError is raised. For the
                case, that the file does not exist, the argument ignore_missing
                determines, if a FileNotFoundError is raised.
            ignore_missing: Boolean value which determines, if FileNotFoundError
                is raised, if the target file does not exist. The default
                behaviour, is to ignore missing files.

        Returns:
            Boolean value, which is True if the given file was removed.

        """
        matches = self._locate(filepath)
        if not matches:
            if ignore_missing:
                return True
            filename = PurePath(filepath).as_posix()
            raise FileNotFoundError(f"file '{filename}' does not exist")
        if getattr(matches[-1], 'is_dir')():
            dirname = PurePath(filepath).as_posix() + '/'
            raise IsADirectoryError(f"'{dirname}' is a directory not a file")
        return self._remove_members(matches)

    def mkdir(self, dirpath: PathLike, ignore_exists: bool = False) -> bool:
        """Create a new directory at the given path.

        Args:
            dirpath: String or :term:`path-like object`, that represents a valid
                directory name in the directory structure of the workspace. If
                the directory already exists, the argument ignore_exists
                determines, if a FileExistsError is raised.
            ignore_exists: Boolean value which determines, if FileExistsError is
                raised, if the target directory already exists. The default
                behaviour is to raise an error, if the file already exists.

        Returns:
            Boolean value, which is True if the given directory was created.

        """
        matches = self._locate(dirpath)
        if not matches:
            with self.open(dirpath, mode='w', is_dir=True):
                pass
        elif not ignore_exists:
            dirname = PurePath(dirpath).as_posix() + '/'
            raise FileExistsError(f"directory '{dirname}' already exists")
        return True

    def rmdir(
            self, dirpath: PathLike, recursive: bool = False,
            ignore_missing: bool = False) -> bool:
        """Remove directory from workspace.

        Args:
            dirpath: String or :term:`path-like object`, that points to a
                directory in the directory structure of the workspace. If the
                directory does not exist, the argument ignore_missing
                determines, if a FileNotFoundError is raised.
            ignore_missing: Boolean value which determines, if FileNotFoundError
                is raised, if the target directory does not exist. The default
                behaviour, is to raise an error if the directory is missing.
            recursive: Boolean value which determines, if directories are
                removed recursively. If recursive is False, then only empty
                directories can be removed. If recursive, however, is True, then
                all files and subdirectories are alse removed. By default
                recursive is False.

        Returns:
            Boolean value, which is True if the given directory was removed.

        """
        matches = self._locate(dirpath)
        dirname = PurePath(dirpath).as_posix() + '/'
        if not matches:
            if ignore_missing:
                return True
            raise FileNotFoundError(f"directory '{dirname}' does not exist")
        files = self.search(dirname + '*')
        if not files:
            return self._remove_members(matches)
        if not recursive:
            raise DirNotEmptyError(f"directory '{dirname}' is not empty")
        allmatches = matches
        for file in files:
            allmatches += self._locate(file)
        return self._remove_members(allmatches)

    def search(self, pattern: OptStr = None) -> StrList:
        """Search for files in the workspace.

        Args:
            pattern: Search pattern that contains Unix shell-style wildcards:
                '*': Matches arbitrary strings
                '?': Matches single characters
                [seq]: Matches any character in seq
                [!seq]: Matches any character not in seq
                By default a list of all files and directories is returned.

        Returns:
            List of files and directories in the directory structure of the
            workspace, that match the search pattern.

        """
        # Get list of normalized unique paths of workspace members
        paths: PathLikeList = []
        for zinfo in self._file.infolist():
            path = PurePath(zinfo.filename).as_posix()
            if getattr(zinfo, 'is_dir')():
                path += '/'
            if path not in paths:
                paths.append(path)

        # Match path list with given pattern
        if pattern:
            paths = env.match_paths(paths, pattern)

        # Sort paths
        return sorted([str(path) for path in paths])

    #
    # Protected Methods
    #

    def _create_new(self) -> None:
        # Initialize instance Variables, Buffer and buffered ZipFile
        self._set_attr_values(self._default_config['dc'], group='dc')
        self._path = None
        self._changed = False
        self._pwd = None
        self._buffer = BytesIO()
        self._file = ZipFile(self._buffer, mode='w')

        # Create folders
        for folder in self._default_dir_layout:
            self.mkdir(folder)

    def _open_read(self, path: PathLike) -> BytesIOLike:
        # Locate workspace member by it's path
        # and open file handler for reading the file
        matches = self._locate(path)
        if not matches:
            fname = PurePath(path).as_posix()
            raise FileNotFoundError(
                f"workspace member with filename '{fname}' does not exist")
        # Select latest version of file
        zinfo = matches[-1]
        return self._file.open(zinfo, pwd=self._pwd, mode='r')

    def _open_write(self, path: PathLike, is_dir: bool = False) -> BytesIOLike:
        # Determine workspace member name from path
        # and get ZipInfo with local time as date_time
        filename = PurePath(path).as_posix()
        if is_dir:
            filename += '/'
        zinfo = ZipInfo( # type: ignore
            filename=filename,
            date_time=time.localtime()[:6])
        # Catch Warning for duplicate files
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", UserWarning)
            # TODO ([email protected]): The zipfile standard
            # module currently does not support encryption in write
            # mode of new ZipFiles. See:
            # https://docs.python.org/3/library/zipfile.html
            # When support is provided, the below line for writing
            # files shall be replaced by:
            # file = self._file.open(zinfo, mode='w', pwd=self._pwd)
            file = self._file.open(zinfo, mode='w')
        self._changed = True
        return file

    def _locate(self, path: PathLike, sort: bool = True) -> ZipInfoList:
        # Get list of member zipinfos
        zinfos = self._file.infolist()
        # Match members by path-like filenames
        matches = [i for i in zinfos if Path(i.filename) == Path(path)]
        if sort:
            # Sort matches by datetime
            matches = sorted(matches, key=lambda i: i.date_time)
        # Return sorted matches
        return matches

    def _get_name(self) -> OptStr:
        return getattr(self._path, 'stem', None)

    def _get_path(self) -> OptPath:
        return self._path

    def _get_changed(self) -> bool:
        return self._changed

    def _get_folders(self) -> StrList:
        names: StrList = []
        for zinfo in self._file.infolist():
            if getattr(zinfo, 'is_dir')():
                name = PurePath(zinfo.filename).as_posix() + '/'
                names.append(name)
        return sorted(names)

    def _remove_members(self, zinfos: ZipInfoList) -> bool:
        # Return True if list of members is empty
        if not zinfos:
            return True

        # Remove entries in the list of members from workspace
        new_zinfos = []
        zids = [(zinfo.filename, zinfo.date_time) for zinfo in zinfos]
        for zinfo in self._file.infolist():
            zid = (zinfo.filename, zinfo.date_time)
            if zid in zids:
                zids.remove(zid)
            else:
                new_zinfos.append(zinfo)

        # If any entry on the list could not be found raise an error
        if zids:
            names = [zid[0] for zid in zids]
            raise FileNotFoundError(
                f"could not locate workspace members: {names}")

        # Create new ZipArchive in Memory
        new_buffer = BytesIO()
        new_file = ZipFile(new_buffer, mode='w')

        # Copy all workspace members on the new list from current
        # to new workspace
        for zinfo in new_zinfos:
            data = self._file.read(zinfo, pwd=self._pwd)
            new_file.writestr(zinfo, data)

        # Close current workspace and buffer and link new workspace and buffer
        self._file.close()
        self._buffer.close()
        self._buffer = new_buffer
        self._file = new_file
        self._changed = True

        return True

    def _remove_duplicates(self) -> bool:
        # Get list of duplicates
        zinfos: ZipInfoList = []
        for filename in self.files:
            zinfos += self._locate(filename, sort=True)[:-1]

        # Remove duplicates
        return self._remove_members(zinfos)