class _SortedResults: "_SortedResults(iterable column, desc) -> _SortedResults" __slots__ = slots("iter column direction") def __init__(self, iterable, column, desc): """Initializes sorting adapter with given data.""" self.__iter = iterable self.__column = column self.__direction = desc def __iter__(self): """Iterates over internal data in the order requested.""" title, *rows = tuple(self.__iter) index = title.index(self.__column) yield title for row in sorted(rows, key=ROW[index], reverse=self.__direction): yield row def order_by(self, column, desc=False): """Returns results that are sorted on an additional level.""" return type(self)(self, column, desc) def table(self): """Converts the sorted results into a table object.""" return Table.from_iter(self)
class _View: """_View(database, query, *name_changes) -> _View""" __slots__ = slots("database query name_changes") def __init__(self, database, query, *name_changes): """Initializes _View instance with details of saved query.""" self.__database = database self.__query = query self.__name_changes = name_changes def __getstate__(self): """Returns everything needed to pickle _View instance.""" return self.__database, self.__query.__code__, self.__name_changes def __setstate__(self, state): """Sets the state of the _View instance when unpickled.""" database, query, name_changes = state self.__database = database self.__query = types.LambdaType(query, sys.modules, "", (), ()) self.__name_changes = name_changes @property def value(self): """Calculates and returns the value of view's query.""" data = self.__query(self.__database) table = data if isinstance(data, Table) else Table.from_iter(data) for old, new in self.__name_changes: table.alter_name(old, new) return table
class _Lock: """_Lock(immediate=False, silent=False) -> _Lock""" __slots__ = slots("lock verbose") def __init__(self, immediate=False, silent=False): """Initializes _Lock instance with internal mechanism.""" self.__lock = _thread.allocate_lock() self.__verbose = silent if immediate: self.acquire() def acquire(self, wait=True): """Acquires lock with an optional wait.""" return self.__lock.acquire(wait) def release(self, exc_type=None, exc_value=None, traceback=None): """Release lock if locked or possibly throws error.""" try: self.__lock.release() except _thread.error: if self.__verbose: raise __enter__ = acquire __exit__ = release @property def locked(self): """Returns whether or not lock is currently locked.""" return self.__lock.locked()
class Like(NotLike): "Like(column, pattern, flags=IGNORECASE, advanced=False) -> Like" __slots__ = slots() def __call__(self, row): """Reverses the result from calling a NotLike instance.""" return not super().__call__(row)
class _Where: """_Where(mode, condition) -> _Where""" __slots__ = slots("call rows") def __init__(self, mode, condition): """Initializes _Where support object for simple selections.""" self.__call = {"and": all, "or": any}[mode] self.__rows = condition def __call__(self, row): """Runs test on given row and validates against condition.""" return self.__call(row[k] == v for k, v in self.__rows.items())
class NotLike: """NotLike(column, pattern, flags=IGNORECASE, advanced=False) -> NotLike""" __slots__ = slots("column method") def __init__(self, column, pattern, flags=re.IGNORECASE, advanced=False): "Initializes comparison object for specified column." self.__column = column if not advanced: pattern = "^" + pattern + "$" self.__method = re.compile(pattern, flags).search def __call__(self, row): "Tests if column in row was like the given pattern." return self.__method(row[self.__column]) is None
class RowAdapter: """RowAdapter(row, column_map=None) -> RowAdapter""" __slots__ = slots("row map") def __init__(self, row, column_map=None): """Initializes RowAdapter with data and mapping information.""" self.__row = row self.__map = column_map def __getattr__(self, column): """Returns a column from the row this instance in adapting.""" if self.__map is None: return self.__unmapped(column) if column in self.__map: return self.__row[self.__map[column]] new_map = {} column += "." for name in self.__map: if name.startswith(column): new_map[name[len(column):]] = self.__map[name] assert new_map, "Name did not match any known column: " + repr(column) return type(self)(self.__row, new_map) __getitem__ = __getattr__ def __unmapped(self, column): """Processes a row with column names already filled in.""" if column in self.__row: return self.__row[column] row = {} column += "." for name in self.__row: if name.startswith(column): row[name[len(column):]] = self.__row[name] assert row, "Name did not match any known column: " + repr(column) return type(self)(row)
class Table: """Table(*columns) -> Table""" @classmethod def from_iter(cls, iterator): """Generates a table from a column / rows iterator.""" title, test_row, *rows = iterator table = cls(*zip(title, map(type, test_row))) table.insert(*test_row) for row in rows: table.insert(*row) return table __slots__ = slots("columns data_area row_index") def __init__(self, *columns): """Initializes Table with columns and row storage area.""" self.__columns = _Columns(columns) self.__data_area = {} self.__row_index = 1 def __len__(self): """Returns the number of rows in the table.""" return len(self.__data_area) def __repr__(self): """Creates a complete representation of the table.""" buffer = [ list( map( repr, ["ROW_ID"] + [name for index, name, data_type in self.__columns], ) ) ] width = [0] * len(buffer[0]) for row in sorted(self.__data_area): buffer.append( list( map( repr, [row] + [ self.__data_area[row][index] for index, name, data_type in self.__columns ], ) ) ) for row in buffer: for index, string in enumerate(row): width[index] = max(width[index], len(string)) string = "" for index, value in enumerate(buffer[0]): string += value.ljust(width[index]) + " | " string = string[:-3] + "\n" for index in range(len(buffer[0])): string += "-" * width[index] + "-+-" string = string[:-3] + "\n" for row in buffer[1:]: for index, value in enumerate(row): string += value.ljust(width[index]) + " | " string = string[:-3] + "\n" return string[:-1] def __str__(self): names, *rows = self columns = {name: [] for name in names} for row in rows: for key, value in zip(names, row): columns[key].append(value) lengths = tuple( max(len(str(value)) for value in columns[key] + [key]) for key in names ) template = " ".join(map("{{:{}}}".format, lengths)) lines = [ template.format(*map(str.upper, names)), " ".join(map("-".__mul__, lengths)), ] for row in zip(*map(columns.__getitem__, names)): lines.append(template.format(*row)) return "\n".join(lines) def __iter__(self): """Returns an iterator over the table's columns.""" return self(*self.columns) def __call__(self, *columns): """Returns an iterator over the specified columns.""" indexes = tuple(self.__columns[name][1] for name in columns) yield columns for row in sorted(self.__data_area): yield tuple(self.__data_area[row][index] for index in indexes) def __eq__(self, other): return str(self) == str(other) def first(self, column=None): """Returns the first row or column of specified row.""" return self.__get_location(min, column) def last(self, column=None): """Returns the last row or column of specified row.""" return self.__get_location(max, column) def print(self, end="\n\n", file=None): """Provides a convenient way of printing representation of the table.""" print(repr(self), end=end, file=sys.stdout if file is None else file) def top(self, amount): """Iterates over the top rows specified by amount.""" if amount == -1: amount = len(self.__data_area) elif 0 <= amount < 1: amount = round(amount * len(self.__data_area)) assert isinstance(amount, int), "Amount was not understood!" for row, count in zip(self, range(amount + 1)): yield row def insert(self, *values, **columns): """Inserts provided data into a new row of the database.""" if values: assert len(values) == len(self.__columns), "Bad number of columns!" assert not columns, "Mixed syntax is not accepted!" row = self.__insert_across(values) else: assert columns, "There is nothing to insert!" row = self.__insert_select(columns) self.__data_area[self.__row_index] = row self.__row_index += 1 def alter_add(self, name, data_type): """Adds a column to the table and populates it.""" index = self.__columns.add(name, data_type) started = False try: for row in self.__data_area.values(): row[index] = data_type() started = True except TypeError: if started: raise for row in self.__data_area.values(): row[index] = data_type def alter_drop(self, name): """Removes a column from the table and frees memory.""" index = self.__columns.drop(name) for row in self.__data_area.values(): del row[index] def alter_column(self, name, data_type): """Changes the data-type of a column and refreshes it.""" index = self.__columns.alter(name, data_type) for row in self.__data_area.values(): row[index] = data_type() def alter_name(self, old, new): """Renames a column without altering the rows.""" self.__columns.rename(old, new) def as_(self, *pairs): """Changes the name of multiple columns at a time.""" for old, new in pairs: self.alter_name(old, new) return self def copy(self): """Copies a table while sharing cell instances.""" copy = type(self)() copy.__columns = self.__columns.copy() copy.__data_area = {} for key, value in self.__data_area.items(): copy.__data_area[key] = value.copy() copy.__row_index = self.__row_index return copy def select(self, *column_names): """Select columns and process them with any given functions.""" if not column_names: return self columns, functions = [], [] for item in column_names: if isinstance(item, str): columns.append(item) elif isinstance(item, tuple): functions.append(item) else: raise TypeError(type(item)) original = {name for index, name, data_type in self.__columns} excess = original - set(columns) if functions: return self.__select_with_function(excess, functions) copy = type(self)() copy.__columns = self.__columns.copy() copy.__data_area = self.__data_area copy.__row_index = self.__row_index for column in excess: copy.__columns.drop(column) return copy def distinct(self): """Return copy of table having only distinct rows.""" copy = type(self)() copy.__columns = self.__columns copy.__data_area = self.__data_area.copy() copy.__row_index = self.__row_index valid_indexs = set() distinct_rows = set() for row in copy.__data_area: array = pickle.dumps( tuple( copy.__data_area[row][index] for index, name, data_type in self.__columns ) ) if array not in distinct_rows: valid_indexs.add(row) distinct_rows.add(array) for row in tuple(copy.__data_area): if row not in valid_indexs: del copy.__data_area[row] return copy def update(self, **assignments): """Changes all present rows with given assignments.""" assign = [] for name, value in assignments.items(): data_type, index = self.__columns[name] assert isinstance( value, data_type ), "Wrong datatype: {} ({!r}, {!r})".format(name, value, data_type) assign.append((index, value)) for row in self.__data_area.values(): for index, value in assign: row[index] = value def where(self, test="and", **kw): """Select rows that fit criteria given by the test.""" test = self.__process_test(test, kw) copy = type(self)() copy.__columns = self.__columns copy.__data_area = self.__data_area.copy() copy.__row_index = self.__row_index self.__remove(copy.__data_area, False, test) return copy def delete(self, test="and", **kw): """Delete rows that fit criteria given by the test.""" test = self.__process_test(test, kw) self.__remove(self.__data_area, True, test) return self def truncate(self): """Deletes all of the rows in the table.""" self.__data_area.clear() return self def order_by(self, column, desc=False): """Returns a sorted result of the table.""" return _SortedResults(self, column, desc) def into(self, table): """Inserts external table into this table by column name.""" self_iter = iter(self) self_colu = next(self_iter) for row in self_iter: table.insert(**{name: data for name, data in zip(self_colu, row)}) def left_join(self, table, name, test): """Returns result of a left join on the given table using test.""" return left_join(self, (table, name), test) def sum_(self, column): """Adds up all of the cells in a particular column of the table.""" data_type, index = self.__columns[column] total = data_type() for row in self.__data_area: total += self.__data_area[row][index] return total def avg(self, column): """Averages the cells in the given column of the table.""" size = len(self.__data_area) return self.sum_(column) / size if size else size def max_(self, column): """Finds the largest cell value from the column in the table.""" index = self.__columns[column][1] return max(map(ROW[index], self.__data_area.values())) def min_(self, column): """Finds the smallest cell value from the column in the table.""" index = self.__columns[column][1] return min(map(ROW[index], self.__data_area.values())) def count(self, column=None): """Counts the total number of 'non-null' cells in the given column.""" if column is None: return len(self.__data_area) data_type, index = self.__columns[column] null, total = data_type(), 0 for row in self.__data_area.values(): if row[index] != null: total += 1 return total def group_by(self, *columns): """Creates new tables from this table on matching columns.""" column_map = {name: index for index, name, data_type in self.__columns} index_list = tuple(sorted(column_map.values())) schema = list(self.schema) tables = {} first = True for row_dict in self.__data_area.values(): interest = [] row = list(row_dict[index] for index in index_list) for name in columns: if isinstance(name, str): interest.append(row_dict[column_map[name]]) else: interest.append(name(RowAdapter(row_dict, column_map))) name = name.name if name is not None: data = interest[-1] row.append(data) if first: signature = name, type(data) if signature not in schema: schema.append(signature) first = False key = tuple(interest) if key not in tables: tables[key] = type(self)(*schema) tables[key].insert(*row) return tables.values() def __get_location(self, function, column): """Returns a row or cell based on function and column.""" row = self.__data_area[function(self.__data_area)] if column is None: return tuple(row[index] for index in sorted(row)) return row[self.__columns[column][1]] def __insert_across(self, values): """Inserts values into new row while checking data types.""" row = {} for value, (index, name, data_type) in zip(values, self.__columns): assert isinstance( value, data_type ), "Wrong datatype: {} ({!r}, {!r})".format(name, value, data_type) row[index] = value return row def __insert_select(self, values): """Inserts values into new row and fills in blank cells.""" row = {} for name, value in values.items(): data_type, index = self.__columns[name] assert isinstance( value, data_type ), "Wrong datatype: {} ({!r}, {!r})".format(name, value, data_type) row[index] = value for index, name, data_type in self.__columns: if index not in row: row[index] = data_type() return row def __remove(self, data_area, delete, test): """Removes rows from data area according to criteria.""" column_map = {name: index for index, name, data_type in self.__columns} for row in tuple(data_area): value = test(RowAdapter(data_area[row], column_map)) assert not isinstance(value, RowAdapter), "Test improperly formed!" if bool(value) == delete: del data_area[row] def __select_with_function(self, excess, functions): """Creates virtual rows formed by calling functions on columns.""" table = self.copy() for code, data in functions: if data in table.__columns: data_name = "{}({})".format(code.__name__, data) data_type = type(code(next(rows(table(data)))[0])) table.alter_add(data_name, data_type) dest = table.__columns[data_name][1] sour = table.__columns[data][1] for row in table.__data_area.values(): row[dest] = code(row[sour]) else: sour = code() table.alter_add(data, type(sour)) dest = table.__columns[data][1] for row in table.__data_area.values(): row[dest] = copy.deepcopy(sour) for column in excess: table.alter_drop(column) return table @staticmethod def __process_test(test, kw): """Ensures that test has been properly formed as necessary.""" if kw: test = _Where(test, kw) else: assert callable(test), "Test must be callable!" return test @property def columns(self): """Returns a list of column names from the table.""" columns = sorted(self.__columns, key=lambda info: info[0]) return tuple(map(lambda info: info[1], columns)) @property def schema(self): """Returns table's schema that can be used to create another table.""" return tuple((name, self.__columns[name][0]) for name in self.columns)
class _Columns: """_Columns(columns) -> _Columns""" __slots__ = slots("column_index column_names") def __init__(self, columns): """Initializes Columns instance with names and data types.""" self.__column_index = 1 self.__column_names = UniqueDict() for name, data_type in columns: self.add(name, data_type) def __contains__(self, name): """Checks if the named column already exists.""" return name in self.__column_names def __len__(self): """Returns the number of columns recognizes.""" return len(self.__column_names) def __iter__(self): """Iterates over columns in sorted order.""" cache = [] for name, (data_type, index) in self.__column_names.items(): cache.append((index, name, data_type)) for item in sorted(cache): yield item def __getitem__(self, name): """Returns requested information on the given column name.""" return self.__column_names[name] def __getstate__(self): """Provides support for class instances to be pickled.""" return self.__column_index, self.__column_names def __setstate__(self, state): """Sets the state while object in being unpickled.""" self.__column_index, self.__column_names = state def copy(self): """Creates a copy of the known columns.""" copy = type(self)([]) copy.__column_index = self.__column_index copy.__column_names = self.__column_names.copy() return copy def add(self, name, data_type): """Adds a column name with data type and assigns an index.""" index = self.__column_index self.__column_names[name] = data_type, index self.__column_index += 1 return index def drop(self, name): """Removes all information regarding the named column.""" index = self.__column_names[name][1] del self.__column_names[name] return index def alter(self, name, data_type): """Changes the data type of the named column.""" index = self.__column_names[name][1] self.__column_names.replace(name, (data_type, index)) return index def rename(self, old, new): """Renames a column from old name to new name.""" self.__column_names[new] = self.__column_names[old] del self.__column_names[old]
class Database: __slots__ = slots("path data type view") @classmethod def load(cls, path): """Loads database from path and tests identity.""" with open(path, "rb") as file: obj = pickle.loads(bz2.decompress(file.read())) assert isinstance(obj, cls), "Could not load a database object!" obj.__path = path return obj def __init__(self): """Initializes database object void of tables or views.""" self.__path = None self.__setstate__( Table(("name", str), ("type", type), ("data", (Table, _View)))) def __repr__(self): """Returns the representation of the database.""" return repr(self.__view.value) def __iter__(self): """Iterates over the names of the tables and views in the database.""" for row in rows(self.__data("name")): yield self[row[0]] def __getattr__(self, name): """Allows getting table or view via attribute lookup or index notation.""" t = tuple(self.__data.where(ROW.name == name)("data")) assert len(t) < 3, "Name is ambiguous!" assert len(t) > 1, "Object was not found!" data = t[1][0] if isinstance(data, _View): return data.value return data __getitem__ = __getattr__ def __getstate__(self): """Provides support for pickling and saving the database.""" return self.__data def __setstate__(self, state): """Helps with unpickling and adding needed instance variables.""" self.__data = state self.__type = Table(("type", type), ("name", str)) self.__type.insert(Table, "table") self.__type.insert(_View, "view") self.__view = _View( None, lambda _: left_join( self.__type, "Types", ROW.type == ROW.Types.type).select( "name", "Types.name", ( lambda obj: float( len(obj) if isinstance(obj, Table) else "nan"), "data", ), ), ("Types.name", "type"), ("<lambda>(data)", "size"), ) def save(self, path=None): """Saves the database to path or most recently known path.""" if path is None: assert self.__path is not None, "Path must be provided!" path = self.__path with open(path, "wb") as file: file.write(bz2.compress(pickle.dumps(self))) self.__path = path def create(self, name, schema_or_table_or_query, *name_changes): """Creates either a table or view for use in the database.""" assert not self.__data.where( ROW.name == name), "Name is already used and may not be overloaded!" if isinstance(schema_or_table_or_query, (tuple, list)): assert not name_changes, "Name changes not allowed with schema!" data = Table(*schema_or_table_or_query) elif isinstance(schema_or_table_or_query, Table): assert not name_changes, "Name changes not allowed with table!" data = schema_or_table_or_query else: data = _View(self, schema_or_table_or_query, *name_changes) self.__data.insert(name=name, type=type(data), data=data) return data def drop(self, name): """Deletes a table or view from the database.""" self.__data.delete(ROW.name == name) def print(self, end="\n\n", file=None): """Provides a simple way of showing a representation of the database.""" self.__view.value.print(end, file) def create_or_replace(self, name, schema_or_table_or_query, *name_changes): """Drops table or view before creating one with the same name.""" self.drop(name) self.create(name, schema_or_table_or_query, *name_changes) def inner_join(self, table_a, table_b, test): """Inner joins tables and views by name using test.""" return inner_join(test, **{ table_a: self[table_a], table_b: self[table_b] }) def full_join(self, table_a, table_b, test): """Full joins tables and views by name using test.""" return full_join(test, **{ table_a: self[table_a], table_b: self[table_b] })
class TransactionalDatabase(Database): @classmethod def upgrade(cls, db_old): """Upgrades the base version of a database into the child version.""" assert isinstance(db_old, cls.__base__), "Can only upgrade Database objects!" db_new = cls() db_new.__setstate__(db_old.__getstate__()) db_new.save(db_old._Database__path) db_old.__init__() return db_new __slots__ = slots("lock locked view") def __repr__(self): """Returns an updated representation of the database.""" return repr(self.__view.value) def __setstate__(self, state): """Sets up remaining attributes and prepares for transactions.""" super().__setstate__(state) self.__add_transaction_support() def __getstate__(self): """Reduces internal table to required columns and returns copy.""" self.__del_transaction_support() data = self.__data.copy() self.__extend_data() return data def __getattr__(self, name): """Allows contents to be accessed only if not in transaction.""" table = self.__data.where(name=name) assert len(table) < 2, "Name is abmiguous!" assert len(table) > 0, "Object was not found!" assert not table.first("lock").locked, "A transaction is in place!" if table.first("type") is _View: return table.first("data").value return table.first("data") __getitem__ = __getattr__ def begin_transaction(self, table, wait=False): """Locks and copies table while optionally waiting for unlock.""" table = self.__data.where(name=table) assert table.first("type") is not _View, "Views are not supported!" lock = table.first("lock") if wait: lock.acquire() with self.__lock: # Protects Critical Section data = table.first("data") table.update(copy=copy.deepcopy(data)) else: with self.__lock: assert lock.acquire(False), "Table is locked in a transaction!" data = table.first("data") table.update(copy=copy.deepcopy(data)) return data def commit_transaction(self, table): """Deletes reserve copy and unlocks the table.""" self.__close_transaction(table, self.__commit) def rollback_transaction(self, table): """Restores table with copy, removes copy, and unlocks the table.""" self.__close_transaction(table, self.__rollback) def __add_transaction_support(self): """Add attributes so database can support transactions.""" self.__lock = _thread.allocate_lock() self.__extend_data() self.__locked = _View( None, lambda _: self.__data.select("name", ( lambda lock: lock.locked, "lock")).as_( ("<lambda>(lock)", "locked")), ) self.__view = _View( None, lambda _: left_join(self.__locked.value, "Lock", ROW.name == ROW.Lock.name).select( "name", "type", "size", "Lock.locked"), ("Lock.locked", "locked"), ) def __extend_data(self): """Adds columns to internal table as necessary.""" if ("type", type) not in self.__data.schema: self.__data.alter_add("type", type) for name, data in rows(self.__data("name", "data")): self.__data.where(name=name).update(type=type(data)) self.__data.alter_add("lock", _Lock) self.__data.alter_add("copy", object) def __del_transaction_support(self): """Ensures no pending transactions and removes unsaved columns.""" assert not self.__locked.value.where( locked=True), "You must commit all transactions before pickling!" self.__data.alter_drop("type") self.__data.alter_drop("lock") self.__data.alter_drop("copy") def __close_transaction(self, table, action): """Finishes taking care of a transaction's end.""" table = self.__data.where(name=table) assert table.first("type") is not _View, "Views are not supported!" lock = table.first("lock") # Begin Critical Section with self.__lock: try: lock.release() except _thread.error: raise ValueError("Table was not in a transaction!") action(table) # End Critical Section @staticmethod def __commit(table): """Deletes the reserve copy of a table.""" table.update(copy=object()) @staticmethod def __rollback(table): """Restores table from copy and deletes the copy.""" table.update(data=table.first("copy"), copy=object()) @property def __data(self): """Aliases internal table from Database class.""" return self._Database__data