예제 #1
0
def initialize_database(path: util.PathLike,
                        schema: SchemaLike,
                        files: bool = False) -> None:
    """
    Initialize a bare database directory at *path*.

    Initialization creates the directory at *path* if it does not
    exist, writes the schema, an deletes any existing files defined by
    the schema.

    .. warning::

       If *path* points to an existing directory, all relation files
       defined by the schema will be overwritten or deleted.

    Args:
        path: the path to the destination database directory
        schema: the destination database schema
        files: if `True`, create an empty file for every relation in
            *schema*
    """
    path = Path(path).expanduser()
    if isinstance(schema, (str, Path)):
        schema = read_schema(schema)

    path.mkdir(exist_ok=True)
    write_schema(path, schema)
    _cleanup_files(path, set(schema))
    if files:
        for name in schema:
            path.joinpath(name).touch()
예제 #2
0
def is_database_directory(path: util.PathLike) -> bool:
    """
    Return `True` if *path* is a valid TSDB database directory.

    A path is a valid database directory if it is a directory
    containing a schema file. This is a simple test; the schema file
    itself is not checked for validity.
    """
    path = Path(path).expanduser()
    return path.is_dir() and path.joinpath(SCHEMA_FILENAME).is_file()
예제 #3
0
def write_schema(path: util.PathLike, schema: Schema) -> None:
    """
    Serialize *schema* and write it to the relations file at *path*.

    If *path* is a directory, write to a `relations` file under
    *path*, otherwise write to the file *path*.
    """
    path = Path(path).expanduser()
    if path.is_dir():
        path = path.joinpath(SCHEMA_FILENAME)
    path.write_text(_format_schema(schema) + '\n')
예제 #4
0
def read_schema(path: util.PathLike) -> Schema:
    """
    Instantiate schema dict from a schema file given by *path*.

    If *path* is a directory, use the relations file under *path*. If
    *path* is a file, use it directly as the schema's path. Otherwise
    raise a :exc:`TSDBSchemaError`.
    """
    path = Path(path).expanduser()
    if path.is_dir():
        path = path.joinpath(SCHEMA_FILENAME)
    if not path.is_file():
        raise TSDBSchemaError(f'no valid schema file at {path!s}')

    return _parse_schema(path.read_text())
예제 #5
0
    def __init__(self,
                 path: util.PathLike = None,
                 schema: tsdb.SchemaLike = None,
                 encoding: str = 'utf-8') -> None:
        # Virtual test suites use a temporary directory
        if path is None:
            self._tempdir = tempfile.TemporaryDirectory()
            path = Path(self._tempdir.name)
        else:
            path = Path(path).expanduser()
            path.mkdir(exist_ok=True)  # can fail if path is a file

        # Ensure test suite directory has a relations file
        if not path.joinpath(tsdb.SCHEMA_FILENAME).is_file():
            if schema is None:
                raise ITSDBError(
                    '*schema* argument is required for new test suites')
            elif isinstance(schema, (str, Path)):
                schema = tsdb.read_schema(schema)
            tsdb.write_schema(path, schema)

        super().__init__(path, autocast=False, encoding=encoding)
        self._data: Dict[str, Table] = {}
예제 #6
0
def write_database(db: Database,
                   path: util.PathLike,
                   names: Iterable[str] = None,
                   schema: SchemaLike = None,
                   gzip: bool = False,
                   encoding: str = 'utf-8') -> None:
    """
    Write TSDB database *db* to *path*.

    If *path* is an existing file (not a directory), a
    :class:`TSDBError` is raised. If *path* is an existing directory,
    the files for all relations in the destination schema will be
    cleared.  Every relation name in *names* must exist in the
    destination schema. If *schema* is given (even if it is the same
    as for *db*), every record will be remade (using
    :func:`make_record`) using the schema, and columns may be dropped
    or `None` values inserted as necessary, but no more sophisticated
    changes will be made.

    .. warning::

       If *path* points to an existing directory, all relation files
       defined by the schema will be overwritten or deleted.

    Args:
        db: Database containing data to write
        path: the path to the destination database directory
        names: list of names of relations to write; if `None` use all
            relations in the destination schema
        schema: the destination database schema; if `None` use the
            schema of *db*
        gzip: if `True`, compress all non-empty files; if `False`, do
            not compress
        encoding: character encoding for the database files
    """
    path = Path(path).expanduser()
    if path.is_file():
        raise TSDBError(f'not a directory: {path!s}')
    remake_records = schema is not None
    if schema is None:
        schema = db.schema
    elif isinstance(schema, (str, Path)):
        schema = read_schema(schema)
    if names is None:
        names = list(schema)

    # Prepare destination directory
    path.mkdir(exist_ok=True)
    write_schema(path, schema)

    for name in names:
        fields = schema[name]
        relation: Iterable[Record] = []
        if name in db.schema:
            try:
                relation = db[name]
            except (TSDBError, KeyError):
                pass
            if remake_records:
                relation = _remake_records(relation, db.schema[name], fields)
        write(path,
              name,
              relation,
              fields,
              append=False,
              gzip=gzip,
              encoding=encoding)

    # only delete other files at the end in case db.path == path
    _cleanup_files(path, set(schema).difference(names))
예제 #7
0
def write(dir: util.PathLike,
          name: str,
          records: Iterable[Record],
          fields: Fields,
          append: bool = False,
          gzip: bool = False,
          encoding: str = 'utf-8') -> None:
    """
    Write *records* to relation *name* in the database at *dir*.

    The simplest way to write data to a file would be something like
    the following:

    >>> with open(os.path.join(db.path, 'item'), 'w') as fh:
    ...     print('\\n'.join(map(tsdb.join, db['item'])), file=fh)

    This function improves on that method by doing the following:

    * Determining the path from the *gzip* parameter and existing files

    * Writing plain text or compressed data, as appropriate

    * Appending or overwriting data, as requested

    * Using the schema information to format fields

    * Writing to a temporary file then copying when done; this
      prevents accidental data loss when overwriting a file that is
      being read

    * Deleting any alternative (compressed or plain text) file to
      avoid having inconsistent files (e.g., delete any existing
      `item` when writing `item.gz`)

    Note that *append* cannot be used with *gzip* or with an existing
    gzipped file and in such a case a :exc:`NotImplementedError` will
    be raised. This may be allowed in the future, but as appending to
    a gzipped file (in general) results in inefficient compression, it
    is better to append to plain text and compress when done.

    Args:
        dir: path to the database directory
        name: name of the relation to write
        records: iterable of records to write
        fields: iterable of :class:`Field` objects
        append: if `True`, append to rather than overwrite the file
        gzip: if `True` and the file is not empty, compress the file
            with `gzip`; if `False`, do not compress
        encoding: character encoding of the file
    Example:
        >>> tsdb.write('my-profile',
        ...            'item',
        ...            item_records,
        ...            schema['item'])
    """
    dir = Path(dir).expanduser()

    if encoding is None:
        encoding = 'utf-8'

    if not dir.is_dir():
        raise TSDBError(f'invalid test suite directory: {dir}')

    tx_path, gz_path, use_gz = _get_paths(dir, name)
    if append and (gzip or use_gz):
        raise NotImplementedError('cannot append to a gzipped file')

    mode = 'ab' if append else 'wb'

    with tempfile.NamedTemporaryFile(mode='w+b',
                                     suffix='.tmp',
                                     prefix=name,
                                     dir=dir) as f_tmp:

        for record in records:
            f_tmp.write((join(record, fields) + '\n').encode(encoding))

        # only gzip non-empty files
        gzip = gzip and f_tmp.tell() != 0
        dest, other = (gz_path, tx_path) if gzip else (tx_path, gz_path)

        # now copy the temp file to the destination
        f_tmp.seek(0)
        if gzip:
            with gzopen(dest, mode) as f_out:
                shutil.copyfileobj(f_tmp, f_out)
        else:
            with dest.open(mode=mode) as f_out:
                shutil.copyfileobj(f_tmp, f_out)

    # clean up other (gz or non-gz) file if it exists
    if other.is_file():
        other.unlink()