def initialize_database(path: util.PathLike, schema: SchemaLike, files: bool = False) -> None: """ Initialize a bare database directory at *path*. Initialization creates the directory at *path* if it does not exist, writes the schema, an deletes any existing files defined by the schema. .. warning:: If *path* points to an existing directory, all relation files defined by the schema will be overwritten or deleted. Args: path: the path to the destination database directory schema: the destination database schema files: if `True`, create an empty file for every relation in *schema* """ path = Path(path).expanduser() if isinstance(schema, (str, Path)): schema = read_schema(schema) path.mkdir(exist_ok=True) write_schema(path, schema) _cleanup_files(path, set(schema)) if files: for name in schema: path.joinpath(name).touch()
def is_database_directory(path: util.PathLike) -> bool: """ Return `True` if *path* is a valid TSDB database directory. A path is a valid database directory if it is a directory containing a schema file. This is a simple test; the schema file itself is not checked for validity. """ path = Path(path).expanduser() return path.is_dir() and path.joinpath(SCHEMA_FILENAME).is_file()
def write_schema(path: util.PathLike, schema: Schema) -> None: """ Serialize *schema* and write it to the relations file at *path*. If *path* is a directory, write to a `relations` file under *path*, otherwise write to the file *path*. """ path = Path(path).expanduser() if path.is_dir(): path = path.joinpath(SCHEMA_FILENAME) path.write_text(_format_schema(schema) + '\n')
def read_schema(path: util.PathLike) -> Schema: """ Instantiate schema dict from a schema file given by *path*. If *path* is a directory, use the relations file under *path*. If *path* is a file, use it directly as the schema's path. Otherwise raise a :exc:`TSDBSchemaError`. """ path = Path(path).expanduser() if path.is_dir(): path = path.joinpath(SCHEMA_FILENAME) if not path.is_file(): raise TSDBSchemaError(f'no valid schema file at {path!s}') return _parse_schema(path.read_text())
def __init__(self, path: util.PathLike = None, schema: tsdb.SchemaLike = None, encoding: str = 'utf-8') -> None: # Virtual test suites use a temporary directory if path is None: self._tempdir = tempfile.TemporaryDirectory() path = Path(self._tempdir.name) else: path = Path(path).expanduser() path.mkdir(exist_ok=True) # can fail if path is a file # Ensure test suite directory has a relations file if not path.joinpath(tsdb.SCHEMA_FILENAME).is_file(): if schema is None: raise ITSDBError( '*schema* argument is required for new test suites') elif isinstance(schema, (str, Path)): schema = tsdb.read_schema(schema) tsdb.write_schema(path, schema) super().__init__(path, autocast=False, encoding=encoding) self._data: Dict[str, Table] = {}
def write_database(db: Database, path: util.PathLike, names: Iterable[str] = None, schema: SchemaLike = None, gzip: bool = False, encoding: str = 'utf-8') -> None: """ Write TSDB database *db* to *path*. If *path* is an existing file (not a directory), a :class:`TSDBError` is raised. If *path* is an existing directory, the files for all relations in the destination schema will be cleared. Every relation name in *names* must exist in the destination schema. If *schema* is given (even if it is the same as for *db*), every record will be remade (using :func:`make_record`) using the schema, and columns may be dropped or `None` values inserted as necessary, but no more sophisticated changes will be made. .. warning:: If *path* points to an existing directory, all relation files defined by the schema will be overwritten or deleted. Args: db: Database containing data to write path: the path to the destination database directory names: list of names of relations to write; if `None` use all relations in the destination schema schema: the destination database schema; if `None` use the schema of *db* gzip: if `True`, compress all non-empty files; if `False`, do not compress encoding: character encoding for the database files """ path = Path(path).expanduser() if path.is_file(): raise TSDBError(f'not a directory: {path!s}') remake_records = schema is not None if schema is None: schema = db.schema elif isinstance(schema, (str, Path)): schema = read_schema(schema) if names is None: names = list(schema) # Prepare destination directory path.mkdir(exist_ok=True) write_schema(path, schema) for name in names: fields = schema[name] relation: Iterable[Record] = [] if name in db.schema: try: relation = db[name] except (TSDBError, KeyError): pass if remake_records: relation = _remake_records(relation, db.schema[name], fields) write(path, name, relation, fields, append=False, gzip=gzip, encoding=encoding) # only delete other files at the end in case db.path == path _cleanup_files(path, set(schema).difference(names))
def write(dir: util.PathLike, name: str, records: Iterable[Record], fields: Fields, append: bool = False, gzip: bool = False, encoding: str = 'utf-8') -> None: """ Write *records* to relation *name* in the database at *dir*. The simplest way to write data to a file would be something like the following: >>> with open(os.path.join(db.path, 'item'), 'w') as fh: ... print('\\n'.join(map(tsdb.join, db['item'])), file=fh) This function improves on that method by doing the following: * Determining the path from the *gzip* parameter and existing files * Writing plain text or compressed data, as appropriate * Appending or overwriting data, as requested * Using the schema information to format fields * Writing to a temporary file then copying when done; this prevents accidental data loss when overwriting a file that is being read * Deleting any alternative (compressed or plain text) file to avoid having inconsistent files (e.g., delete any existing `item` when writing `item.gz`) Note that *append* cannot be used with *gzip* or with an existing gzipped file and in such a case a :exc:`NotImplementedError` will be raised. This may be allowed in the future, but as appending to a gzipped file (in general) results in inefficient compression, it is better to append to plain text and compress when done. Args: dir: path to the database directory name: name of the relation to write records: iterable of records to write fields: iterable of :class:`Field` objects append: if `True`, append to rather than overwrite the file gzip: if `True` and the file is not empty, compress the file with `gzip`; if `False`, do not compress encoding: character encoding of the file Example: >>> tsdb.write('my-profile', ... 'item', ... item_records, ... schema['item']) """ dir = Path(dir).expanduser() if encoding is None: encoding = 'utf-8' if not dir.is_dir(): raise TSDBError(f'invalid test suite directory: {dir}') tx_path, gz_path, use_gz = _get_paths(dir, name) if append and (gzip or use_gz): raise NotImplementedError('cannot append to a gzipped file') mode = 'ab' if append else 'wb' with tempfile.NamedTemporaryFile(mode='w+b', suffix='.tmp', prefix=name, dir=dir) as f_tmp: for record in records: f_tmp.write((join(record, fields) + '\n').encode(encoding)) # only gzip non-empty files gzip = gzip and f_tmp.tell() != 0 dest, other = (gz_path, tx_path) if gzip else (tx_path, gz_path) # now copy the temp file to the destination f_tmp.seek(0) if gzip: with gzopen(dest, mode) as f_out: shutil.copyfileobj(f_tmp, f_out) else: with dest.open(mode=mode) as f_out: shutil.copyfileobj(f_tmp, f_out) # clean up other (gz or non-gz) file if it exists if other.is_file(): other.unlink()