async def update_user_index(db: aiosqlite.Connection, user: str, index: int, insert: bool = False) -> None: if insert: async with db.execute( '''INSERT INTO "user_status" VALUES (?, ?)''', (user, index)): pass else: async with db.execute( '''UPDATE "user_status" SET "index" = ? WHERE "user_id" = ?''', (index, user)): pass await db.commit()
async def get_enabled(db: aiosqlite.Connection) -> bool: query = 'SELECT enabled FROM vim_enabled ORDER BY ROWID DESC LIMIT 1' async with db.execute(query) as cursor: ret = await cursor.fetchone() if ret is None: return True else: enabled, = ret return bool(enabled)
async def _initialize_db(self, db: Connection) -> None: async with db.execute("SELECT COUNT(*) FROM sqlite_master") as c: count = await c.fetchone() assert count is not None if count[0] > 0: return async with aiofiles.open(db_schema_path()) as f: sql = await f.read() await db.executescript(sql)
async def get_message(conn: aiosqlite.Connection, message_id: int) -> Optional[dict]: async with conn.execute('SELECT * FROM message WHERE id = ?', (message_id, )) as cur: row = await cur.fetchone() if not row: return None row = dict(row) _prepare_message_row_inplace(row) return row
async def test_get_post(client: Client, db: aiosqlite.Connection) -> None: post = await client.create("test title", "test text") async with db.execute( "SELECT title, text, owner, editor FROM posts WHERE id = ?", [post.id] ) as cursor: record = await cursor.fetchone() assert record["title"] == "test title" assert record["text"] == "test text" assert record["owner"] == "test_user" assert record["editor"] == "test_user"
async def __check_permittee_permission(db: aiosqlite.Connection, permission: PermissionId, permittee: PermitteeId) -> bool: async with db.execute( ''' SELECT * FROM CommandPermission WHERE PermissionId=? AND PermitteeId=? ''', (permission.id, permittee.id)) as cursor: if await cursor.fetchone(): return True return False
async def get_messages(conn: aiosqlite.Connection, offset: int = 0, limit: int = 30) -> List[dict]: async with conn.execute( 'SELECT * FROM message ORDER BY created_at DESC LIMIT ? OFFSET ?', (limit, offset)) as cur: data = await cur.fetchall() data = list(map(dict, data)) for row in data: _prepare_message_row_inplace(row) return data
async def get_time_left(db: aiosqlite.Connection) -> int: if not await get_enabled(db): return 0 query = 'SELECT timestamp FROM vim_time_left ORDER BY ROWID DESC LIMIT 1' async with db.execute(query) as cursor: ret = await cursor.fetchone() if ret is None: return 0 else: dt = datetime.datetime.fromisoformat(ret[0]) if dt < datetime.datetime.now(): return 0 else: return (dt - datetime.datetime.now()).seconds
async def _get_message_part_types(conn: aiosqlite.Connection, message_id: int, types: List[str]) -> sqlite3.Row: sql = """ SELECT * FROM message_part WHERE message_id = ? AND type IN ({0}) AND is_attachment = 0 LIMIT 1 """.format(','.join('?' * len(types))) async with conn.execute(sql, (message_id, ) + types) as cur: data = await cur.fetchone() return data
async def test_get_post(client: _TestClient, db: aiosqlite.Connection) -> None: async with db.execute( "INSERT INTO posts (title, text, owner, editor) VALUES (?, ?, ?, ?)", ["title", "text", "user", "user"], ) as cursor: post_id = cursor.lastrowid await db.commit() resp = await client.get(f"/api/{post_id}") assert resp.status == 200 data = await resp.json() assert data == { "data": { "editor": "user", "id": "1", "owner": "user", "text": "text", "title": "title", }, "status": "ok", }
async def _get_user_vim_bits(db: aiosqlite.Connection, ) -> Counter[str]: vim_bits_query = 'SELECT user, SUM(bits) FROM vim_bits GROUP BY user' async with db.execute(vim_bits_query) as cursor: rows = await cursor.fetchall() bits_counts = collections.Counter(dict(rows)) return bits_counts
async def disabled_seconds(db: aiosqlite.Connection) -> int: async with db.execute('SELECT bits FROM vim_bits_disabled') as cursor: rows = await cursor.fetchall() return sum(_bits_to_seconds(bits) for bits, in rows)
async def msg_count(db: aiosqlite.Connection, msg: str) -> int: await ensure_motd_table_exists(db) query = 'SELECT COUNT(1) FROM motd WHERE msg = ?' async with db.execute(query, (msg,)) as cursor: ret, = await cursor.fetchone() return ret
class Database: # Setup def __init__(self, path): self._db = Connection(lambda: sqlite3.connect(path, isolation_level=None)) async def __aenter__(self): await self._db await self._db.execute("PRAGMA foreign_keys = ON;") try: tup = await self._select_one(Version) except sqlite3.OperationalError: await self._create_tables() else: if tup.version != DB_VERSION: await self._upgrade_tables() return self async def __aexit__(self, exc_type, exc_val, exc_tb): await self._db.close() @_transaction async def _create_tables(self, cursor=None): await cursor.execute( """CREATE TABLE Version ( version INTEGER, PRIMARY KEY(version) ) WITHOUT ROWID""" ) await cursor.execute( """CREATE TABLE User ( username TEXT, password TEXT NOT NULL, salt TEXT NOT NULL, token TEXT, PRIMARY KEY(username) ) WITHOUT ROWID""" ) await cursor.execute( """CREATE TABLE Source ( owner TEXT, key TEXT, values_json TEXT, task_json TEXT, due INTEGER NOT NULL, FOREIGN KEY(owner) REFERENCES User(username) ON DELETE CASCADE, PRIMARY KEY(owner, key) ) WITHOUT ROWID""" ) await cursor.execute( """CREATE TABLE Author ( owner TEXT, source TEXT, path TEXT, full_name TEXT NOT NULL, id TEXT, first_name TEXT, last_name TEXT, extra_json TEXT, FOREIGN KEY(owner) REFERENCES User(username) ON DELETE CASCADE, FOREIGN KEY(owner, source) REFERENCES Source(owner, key), PRIMARY KEY(owner, source, path) ) WITHOUT ROWID""" ) await cursor.execute( """CREATE TABLE Publication ( owner TEXT, source TEXT, path TEXT, by_self INTEGER NOT NULL, name TEXT NOT NULL, id TEXT, year INTEGER, ref TEXT, extra_json TEXT, FOREIGN KEY(owner) REFERENCES User(username) ON DELETE CASCADE, FOREIGN KEY(owner, source) REFERENCES Source(owner, key), PRIMARY KEY(owner, source, path) ) WITHOUT ROWID""" ) await cursor.execute( """CREATE TABLE PublicationAuthors ( owner TEXT, source TEXT, pub_path TEXT, author_path TEXT, FOREIGN KEY(owner) REFERENCES User(username) ON DELETE CASCADE, FOREIGN KEY(owner, source) REFERENCES Source(owner, key), FOREIGN KEY(owner, source, pub_path) REFERENCES Publication(owner, source, path), FOREIGN KEY(owner, source, author_path) REFERENCES Author(owner, source, path), PRIMARY KEY(owner, source, pub_path, author_path) ) WITHOUT ROWID""" ) await cursor.execute( """CREATE TABLE Cites ( owner TEXT, source TEXT, pub_path TEXT, cited_by TEXT, FOREIGN KEY(owner) REFERENCES User(username) ON DELETE CASCADE, FOREIGN KEY(owner, source) REFERENCES Source(owner, key), FOREIGN KEY(owner, source, pub_path) REFERENCES Publication(owner, source, path), FOREIGN KEY(owner, source, cited_by) REFERENCES Publication(owner, source, path), PRIMARY KEY(owner, source, pub_path, cited_by) ) WITHOUT ROWID""" ) await cursor.execute( """CREATE TABLE Merge ( owner TEXT, source_a TEXT, source_b TEXT, pub_a TEXT, pub_b TEXT, similarity REAL NOT NULL, FOREIGN KEY(owner) REFERENCES User(username) ON DELETE CASCADE, FOREIGN KEY(owner, source_a) REFERENCES Source(owner, key), FOREIGN KEY(owner, source_b) REFERENCES Source(owner, key), FOREIGN KEY(owner, source_a, pub_a) REFERENCES Publication(owner, source, path), FOREIGN KEY(owner, source_b, pub_b) REFERENCES Publication(owner, source, path), PRIMARY KEY(owner, source_a, source_b, pub_a, pub_b) ) WITHOUT ROWID""" ) await cursor.execute("INSERT INTO Version VALUES (?)", (DB_VERSION,)) @_transaction async def _upgrade_tables(self, cursor=None): pass # Convenience def _select(self, table: type, query: str = "", *args) -> Select: return Select(self._db, table, query, args) async def _select_one(self, table: type, query: str = "", *args): async with Select(self._db, table, query, args) as select: return await select.one() async def _select_all(self, table: type, query: str = "", *args): async with Select(self._db, table, query, args) as select: return await select.all() @_transaction async def _insert(self, *tuples, cursor=None): for tup in tuples: fields = ",".join("?" * len(tup)) await self._db.execute( f"INSERT INTO {tup.__class__.__name__} VALUES ({fields})", tup ) @_transaction async def _insert_or_replace(self, *tuples, cursor=None): for tup in tuples: fields = ",".join("?" * len(tup)) await self._db.execute( f"INSERT OR REPLACE INTO {tup.__class__.__name__} VALUES ({fields})", tup, ) @_transaction async def _execute(self, query, *args, cursor=None): await cursor.execute(query, args) return cursor.rowcount # Public methods async def register_user( self, *, username: str, password: str, salt: str, token: str ): await self._insert( User(username=username, password=password, salt=salt, token=token,) ) async def login_user(self, *, username, token): await self._execute( "UPDATE User SET token = ? WHERE username = ?", token, username ) async def logout_user(self, *, username): await self._execute("UPDATE User SET token = null WHERE username = ?", username) async def delete_user(self, *, username): rowcount = await self._execute("DELETE FROM User WHERE username = ?", username) return rowcount != 0 async def update_user_password(self, *, username, password, salt): await self._execute( "UPDATE User SET password = ?, salt = ? WHERE username = ?", password, salt, username, ) async def get_user_password(self, *, username: str): user = await self._select_one(User, "WHERE username = ?", username) return (user.password, user.salt) if user else None async def get_username(self, *, token: str): user = await self._select_one(User, "WHERE token = ?", token) return user.username if user else None async def has_user(self, *, username: str): user = await self._select_one(User, "WHERE username = ?", username) return user is not None async def next_source_task(self): return await self._select_one(Source, "ORDER BY due ASC LIMIT 1") async def get_source_values(self, username): result = {} async with self._select(Source, "WHERE owner = ?", username) as select: async for source in select: result[source.key] = json.loads(source.values_json) return result @_transaction async def update_source_values(self, username, sources, *, cursor=None): for source, fields in sources.items(): values_json = json.dumps(fields) rowcount = await self._execute( "UPDATE Source SET values_json = ?, due = 0 WHERE owner = ? AND key = ?", values_json, username, source, cursor=cursor, ) if rowcount == 0: await self._insert( Source( owner=username, key=source, values_json=values_json, task_json=None, due=0, ), cursor=cursor, ) @_transaction async def save_crawler_step(self, source, step, *, cursor=None): # Use `_insert_or_replace` under the premise that sources may omit # information entirely, but not provide less information about what # is known (so replacing old data won't produce any loss). await self._insert_or_replace( *( Author( owner=source.owner, source=source.key, path=author.unique_path_name(), full_name=author.full_name, id=author.id, first_name=author.first_name, last_name=author.last_name, extra_json=json.dumps(author.extra), ) for author in step.authors ), cursor=cursor, ) await self._insert_or_replace( *( Publication( owner=source.owner, source=source.key, path=pub.unique_path_name(), by_self=by_self, name=pub.name, id=pub.id, year=pub.year, ref=pub.ref, extra_json=json.dumps(pub.extra), ) for pub, by_self in _adapt_step_publications(step) ), cursor=cursor, ) for pub, _ in _adapt_step_publications(step): await self._insert_or_replace( *( PublicationAuthors( owner=source.owner, source=source.key, pub_path=pub.unique_path_name(), author_path=author_path, ) for author_path in pub.authors ), cursor=cursor, ) for cites_pub_id, citations in step.citations.items(): # TODO bad (maybe the step should have a method to get all the tuples to insert?) pub_path = StepPublication(name="", id=cites_pub_id).unique_path_name() await self._insert_or_replace( *( Cites( owner=source.owner, source=source.key, pub_path=pub_path, cited_by=cit.unique_path_name(), ) for cit in citations ), cursor=cursor, ) await self._execute( "UPDATE Source SET task_json = ?, due = ? WHERE owner = ? AND key = ?", step.stage_as_json(), step.due(), source.owner, source.key, cursor=cursor, ) async def get_usernames(self): usernames = [] async with self._select(User) as select: async for user in select: usernames.append(user.username) return usernames async def get_source_publications(self, username, source): return await self._select_all( Publication, "WHERE owner = ? AND source = ?", username, source ) @_transaction async def save_merges(self, username, merges, *, cursor=None): await self._execute( "DELETE FROM Merge WHERE owner = ?", username, cursor=cursor ) await self._insert( *( Merge( owner=username, source_a=m.source_a, source_b=m.source_b, pub_a=m.pub_a, pub_b=m.pub_b, similarity=m.similarity, ) for m in merges ), cursor=cursor, ) async def get_publications(self, username): publications = {} async with self._db.execute( """ SELECT p.source, p.path, p.by_self, p.name, p.year, p.ref, a.full_name, c.cited_by FROM Publication AS p JOIN PublicationAuthors AS pa ON ( p.owner = pa.owner AND p.source = pa.source AND p.path = pa.pub_path ) JOIN Author AS a ON ( p.owner = a.owner AND p.source = a.source AND pa.author_path = a.path ) LEFT JOIN Cites AS c ON ( p.owner = c.owner AND p.source = c.source AND p.path = c.pub_path ) WHERE p.owner = ? """, (username,), ) as cursor: async for ( source, pub_path, by_self, pub_name, year, ref, author_name, cit_path, ) in cursor: pubs = publications.setdefault(source, {}) if pub_path in pubs: pubs[pub_path]["author_names"].add(author_name) pubs[pub_path]["cites"].add(cit_path) else: pubs[pub_path] = { "ref": ref, "name": pub_name, "author_names": {author_name}, "cites": {cit_path}, "year": year, "by_self": by_self != 0, } # Separate queries make this process a bit less tedious merges = MergeCheck(await self._select_all(Merge, "WHERE owner = ?", username)) used = set() result = [] for source, pubs in publications.items(): for path, pub in pubs.items(): if not pub["by_self"] or (source, path) in used: continue # Base publication value used.add((source, path)) value = { "sources": [{"key": source, "ref": pub["ref"]}], "name": pub["name"], "authors": [{"full_name": a} for a in pub["author_names"]], "cites": 0, "year": pub["year"], } # Count all the cites from the main publication at once, and mark all of them # plus the related ones as used. cites = len(pub["cites"]) used_cites = {(source, c) for c in pub["cites"]} for cit_path in pub["cites"]: for rel_cite_source, rel_cite_path in merges.get_related( source, cit_path ): used_cites.add((rel_cite_source, rel_cite_path)) # Merge publication value for rel_source, rel_path in merges.get_related(source, path): rel_pub = publications[rel_source][rel_path] if not rel_pub["by_self"] or (rel_source, rel_path) in used: continue # might come from a citation used.add((rel_source, rel_path)) value["sources"].append({"key": rel_source, "ref": rel_pub["ref"]}) # Sometimes the year differs (or may be missing). Because there is no single # source of truth, show the year on a best-effort by not showing missing. # Similar can happen with authors (they should be merged). if value["year"] is None: value["year"] = rel_pub["year"] # Update cite count only once per cite (related ones won't count) for cit_path in rel_pub["cites"]: if (rel_source, cit_path) in used_cites: continue cites += 1 used_cites.add((rel_source, cit_path)) for rel_cite_source, rel_cite_path in merges.get_related( rel_source, cit_path ): used_cites.add((rel_cite_source, rel_cite_path)) value["cites"] = cites result.append(value) return result async def _export_table_as_csv(self, table, owner, fields): fields = fields.split() buffer = io.StringIO(newline="") writer = csv.writer(buffer) writer.writerow(fields) async with self._select(table, "WHERE owner = ?", owner) as select: async for row in select: writer.writerow(getattr(row, field) for field in fields) buffer.flush() return buffer.getvalue() async def export_data_as_zip(self, username): buffer = io.BytesIO() with zipfile.ZipFile(buffer, "w") as zf: zf.writestr( "sources.csv", await self._export_table_as_csv(Source, username, "key values_json"), ) zf.writestr( "authors.csv", await self._export_table_as_csv( Author, username, "source path full_name id first_name last_name extra_json", ), ) zf.writestr( "publications.csv", await self._export_table_as_csv( Publication, username, "source path by_self name id year ref extra_json", ), ) zf.writestr( "publication-authors.csv", await self._export_table_as_csv( PublicationAuthors, username, "source pub_path author_path" ), ) zf.writestr( "cites.csv", await self._export_table_as_csv( Cites, username, "source pub_path cited_by" ), ) zf.writestr( "merges.csv", await self._export_table_as_csv( Merge, username, "source_a source_b pub_a pub_b similarity" ), ) return buffer.getvalue()
async def select_sentences(db: Connection) -> None: async with db.execute( r'SELECT * FROM sentences WHERE RE(sentence) ORDER BY RANDOM() LIMIT 100' ) as c: results = await c.fetchall() return results
async def get_messages_count(conn: aiosqlite.Connection) -> int: async with conn.execute('SELECT count(1) FROM message') as cur: cnt = await cur.fetchone() return cnt[0]