def test_recreate(tmpdir, use_path, file_exists): filepath = str(tmpdir / "data.db") if use_path: filepath = pathlib.Path(filepath) if file_exists: Database(filepath)["t1"].insert({"foo": "bar"}) assert ["t1"] == Database(filepath).table_names() Database(filepath, recreate=True)["t2"].insert({"foo": "bar"}) assert ["t2"] == Database(filepath).table_names()
def __init__(self, db_name: str = ""): self._db = Database(db_name) if db_name else Database(memory=True) self.initialize() self.member = self._db.table("member") self.player = self._db.table("player") self.division = self._db.table("division") self.rss_feed = self._db.table("rss_feed") self.channel = self._db.table("channel") self.role_mapping = self._db.table("role_mapping") self.battleorder = self._db.table("battleorder")
def test_insert_simple(tmpdir): json_path = str(tmpdir / "dog.json") db_path = str(tmpdir / "dogs.db") open(json_path, "w").write(json.dumps({"name": "Cleo", "age": 4})) result = CliRunner().invoke(cli.cli, ["insert", db_path, "dogs", json_path]) assert 0 == result.exit_code assert [{"age": 4, "name": "Cleo"}] == Database(db_path).execute_returning_dicts( "select * from dogs" ) db = Database(db_path) assert ["dogs"] == db.table_names() assert [] == db["dogs"].indexes
def test_insert_with_primary_key(db_path, tmpdir): json_path = str(tmpdir / "dog.json") open(json_path, "w").write(json.dumps({"id": 1, "name": "Cleo", "age": 4})) result = CliRunner().invoke( cli.cli, ["insert", db_path, "dogs", json_path, "--pk", "id"] ) assert 0 == result.exit_code assert [{"id": 1, "age": 4, "name": "Cleo"}] == Database( db_path ).execute_returning_dicts("select * from dogs") db = Database(db_path) assert ["id"] == db["dogs"].pks
def test_enable_wal(): runner = CliRunner() dbs = ["test.db", "test2.db"] with runner.isolated_filesystem(): for dbname in dbs: db = Database(dbname) db["t"].create({"pk": int}, pk="pk") assert db.journal_mode == "delete" result = runner.invoke(cli.cli, ["enable-wal"] + dbs) assert 0 == result.exit_code for dbname in dbs: db = Database(dbname) assert db.journal_mode == "wal"
def test_attach(tmpdir): foo_path = str(tmpdir / "foo.db") bar_path = str(tmpdir / "bar.db") db = Database(foo_path) with db.conn: db["foo"].insert({"id": 1, "text": "foo"}) db2 = Database(bar_path) with db2.conn: db2["bar"].insert({"id": 1, "text": "bar"}) db.attach("bar", bar_path) assert db.execute( "select * from foo union all select * from bar.bar").fetchall() == [ (1, "foo"), (1, "bar") ]
def test_enable_fts_with_triggers(db_path): Database(db_path)["Gosh"].insert_all([{"c1": "baz"}]) exit_code = (CliRunner().invoke( cli.cli, ["enable-fts", db_path, "Gosh", "c1", "--fts4", "--create-triggers"], ).exit_code) assert 0 == exit_code def search(q): return (Database(db_path).execute( "select c1 from Gosh_fts where c1 match ?", [q]).fetchall()) assert [("baz", )] == search("baz") Database(db_path)["Gosh"].insert_all([{"c1": "martha"}]) assert [("martha", )] == search("martha")
def test_enable_fts_replace(kwargs): db = Database(memory=True) db["books"].insert( { "id": 1, "title": "Habits of Australian Marsupials", "author": "Marlee Hawkins", }, pk="id", ) db["books"].enable_fts(["title", "author"]) assert not db["books"].triggers assert db["books_fts"].columns_dict.keys() == {"title", "author"} assert "FTS5" in db["books_fts"].schema assert "porter" not in db["books_fts"].schema # Now modify the FTS configuration should_have_changed_columns = "columns" in kwargs if "columns" not in kwargs: kwargs["columns"] = ["title", "author"] db["books"].enable_fts(**kwargs, replace=True) # Check that the new configuration is correct if should_have_changed_columns: assert db["books_fts"].columns_dict.keys() == set(["title"]) if "create_triggers" in kwargs: assert db["books"].triggers if "fts_version" in kwargs: assert "FTS4" in db["books_fts"].schema if "tokenize" in kwargs: assert "porter" in db["books_fts"].schema
def test_tracer(): collected = [] db = Database(memory=True, tracer=lambda sql, params: collected.append((sql, params))) db["dogs"].insert({"name": "Cleopaws"}) db["dogs"].enable_fts(["name"]) db["dogs"].search("Cleopaws") assert collected == [ ("PRAGMA recursive_triggers=on;", None), ("select name from sqlite_master where type = 'view'", None), ("select name from sqlite_master where type = 'table'", None), ("CREATE TABLE [dogs] (\n [name] TEXT\n);\n ", None), ("select name from sqlite_master where type = 'view'", None), ("INSERT INTO [dogs] ([name]) VALUES (?);", ["Cleopaws"]), ("select name from sqlite_master where type = 'view'", None), ( "CREATE VIRTUAL TABLE [dogs_fts] USING FTS5 (\n [name],\n content=[dogs]\n)", None, ), ( "INSERT INTO [dogs_fts] (rowid, [name])\n SELECT rowid, [name] FROM [dogs];", None, ), ("select name from sqlite_master where type = 'view'", None), ]
def test_with_tracer(): collected = [] tracer = lambda sql, params: collected.append((sql, params)) db = Database(memory=True) db["dogs"].insert({"name": "Cleopaws"}) db["dogs"].enable_fts(["name"]) assert len(collected) == 0 with db.tracer(tracer): list(db["dogs"].search("Cleopaws")) assert len(collected) == 5 assert collected == [ ("select name from sqlite_master where type = 'view'", None), ( "SELECT name FROM sqlite_master\n WHERE rootpage = 0\n AND (\n sql LIKE '%VIRTUAL TABLE%USING FTS%content=%dogs%'\n OR (\n tbl_name = \"dogs\"\n AND sql LIKE '%VIRTUAL TABLE%USING FTS%'\n )\n )", None, ), ("select name from sqlite_master where type = 'view'", None), ("select sql from sqlite_master where name = ?", ("dogs_fts", )), ( "with original as (\n select\n rowid,\n *\n from [dogs]\n)\nselect\n [original].*\nfrom\n [original]\n join [dogs_fts] on [original].rowid = [dogs_fts].rowid\nwhere\n [dogs_fts] match :query\norder by\n [dogs_fts].rank", { "query": "Cleopaws" }, ), ] # Outside the with block collected should not be appended to db["dogs"].insert({"name": "Cleopaws"}) assert len(collected) == 5
def test_upsert_alter(db_path, tmpdir): json_path = str(tmpdir / "dogs.json") db = Database(db_path) insert_dogs = [{"id": 1, "name": "Cleo"}] open(json_path, "w").write(json.dumps(insert_dogs)) result = CliRunner().invoke( cli.cli, ["insert", db_path, "dogs", json_path, "--pk", "id"] ) assert 0 == result.exit_code, result.output # Should fail with error code if no --alter upsert_dogs = [{"id": 1, "age": 5}] open(json_path, "w").write(json.dumps(upsert_dogs)) result = CliRunner().invoke( cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id"] ) assert 1 == result.exit_code assert "no such column: age" == str(result.exception) # Should succeed with --alter result = CliRunner().invoke( cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id", "--alter"] ) assert 0 == result.exit_code assert [{"id": 1, "name": "Cleo", "age": 5},] == db.execute_returning_dicts( "select * from dogs order by id" )
def test_upsert(db_path, tmpdir): json_path = str(tmpdir / "dogs.json") db = Database(db_path) insert_dogs = [ {"id": 1, "name": "Cleo", "age": 4}, {"id": 2, "name": "Nixie", "age": 4}, ] open(json_path, "w").write(json.dumps(insert_dogs)) result = CliRunner().invoke( cli.cli, ["insert", db_path, "dogs", json_path, "--pk", "id"] ) assert 0 == result.exit_code, result.output assert 2 == db["dogs"].count # Now run the upsert to update just their ages upsert_dogs = [ {"id": 1, "age": 5}, {"id": 2, "age": 5}, ] open(json_path, "w").write(json.dumps(insert_dogs)) result = CliRunner().invoke( cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id"] ) assert 0 == result.exit_code, result.output assert [ {"id": 1, "name": "Cleo", "age": 4}, {"id": 2, "name": "Nixie", "age": 4}, ] == db.execute_returning_dicts("select * from dogs order by id")
def save_playlist(database, table, date=None, since=None, until=None, delay=1): """ Download daily playlists, for a date or a range """ if not any([date, since, until]): dates = [datetime.date.today()] elif date: dates = [date.date()] elif since and until: dates = [*date_range(since.date(), until.date())] elif since or until: raise ValueError( "Invalid dates. Please provide either a single date, or both since and until arguments." ) if not isinstance(database, Database): database = Database(database) table = database.table(table, extracts={"artist": "artists"}) for date in dates: click.echo(f"Downloading playlist for {date}") songs = scrape(date) table.upsert_all(songs, pk="datetime") if len(dates) > 1: time.sleep(delay)
def top_product_search_matches(query_product, top_output=10): """ Returns a list of the the top_output product names (strings), given a query_product string that the user is filling out from the webapp form. Parameters ---------- query_product: str User typed (or clicked from proposal suggestion) product name top_output: int number of top hits returned Returns ------- list of dictionaries (product names) output -> [<product>,...] <product>-> {"product_name": str} """ db = Database(join(LNHPD.MODEL_FOLDER, LNHPD.SQLITE_FILE)) query_product = LNHPD.__token_removal__(query_product) query = (f"SELECT DISTINCT product_name FROM search " f"WHERE product_name " f"MATCH 'product_name:\"{query_product}\"*' " f"ORDER BY rank, product_name LIMIT ?") return LNHPD.__assoc_query__(db, query, [top_output])
def oscr_to_zip(zip_location, output_db="oscr.db", recreate=True): db = Database(output_db, recreate=recreate) with ZipFile(zip_location) as z: for f in z.filelist: if not f.filename.endswith(".csv"): continue with z.open(f) as csvfile: reader = csv.DictReader( io.TextIOWrapper(csvfile, encoding="utf8")) db["oscr"].insert_all( tqdm.tqdm(dict(clean_row(row)) for row in reader), ignore=True, batch_size=100000, pk="Charity Number", ) db["oscr"].enable_fts(["Charity Name", "Objectives"], replace=True, create_triggers=True) for i in [ ["Charity Status"], ["Constitutional Form"], ["Geographical Spread"], ["Main Operating Location"], ]: db["oscr"].create_index(i, if_not_exists=True) for view_name, view_def in VIEWS.items(): print("Inserting view: {}".format(view_name)) db.create_view(view_name, view_def, replace=True)
def validate_md(user_medical_data): """ Validate all ingredient inside a user_medical_data dict. Parameters ---------- user_medical_data: list of <ingredients> Medical data information, a list of ingredients. [ {"ingredient_name":"name A"}, {"ingredient_name":"name B"}, ... ] Returns ------- list Returns a medical_data list by flagging ingridients that are : compliant, nhp, banned sustances. output -> [<ingredient> ...] <ingredient> -> { "ingredient": str, "_pdl": boolean, "_cdsa": boolean} """ filename = join(LNHPD.MODEL_FOLDER, LNHPD.SQLITE_FILE) db = Database(filename) for ingredient in user_medical_data: ingredient['_pdl'] = LNHPD.__is_pdl__( db, ingredient['ingredient_name']) ingredient['_cdsa'] = LNHPD.__is_cdsa__( db, ingredient['ingredient_name']) return user_medical_data
def get_stats(dir="."): dbs = [] size = 0 count = 0 for f in os.listdir(dir): if not f.endswith(".db"): continue if f == "index.db": continue path = Path(dir) / f dbs.append(Database(path)) for db in dbs: for i, ebook in enumerate(db["ebooks"].rows): uuid = ebook['uuid'] title = ebook['title'] formats = json.loads(ebook['formats']) # print(formats) for f in formats: if f in ebook: if ebook[f]: size += ebook[f] count += 1 # print (f'\r{count} {f} --> {uuid}: {title}', end ='') # print (f'\r{count} : {uuid} --> {f}', end='') print(f'\r{count} formats - ebook : {uuid}', end='') print() print("Total count of formats:", humanize.intcomma(count)) print("Total size:", hsize(size)) print()
def test_add_foreign_key(db_path, args, assert_message): db = Database(db_path) db["authors"].insert_all( [{"id": 1, "name": "Sally"}, {"id": 2, "name": "Asheesh"}], pk="id" ) db["books"].insert_all( [ {"title": "Hedgehogs of the world", "author_id": 1}, {"title": "How to train your wolf", "author_id": 2}, ] ) assert ( 0 == CliRunner().invoke(cli.cli, ["add-foreign-key", db_path] + args).exit_code ), assert_message assert [ ForeignKey( table="books", column="author_id", other_table="authors", other_column="id" ) ] == db["books"].foreign_keys # Error if we try to add it twice: result = CliRunner().invoke( cli.cli, ["add-foreign-key", db_path, "books", "author_id", "authors", "id"] ) assert 0 != result.exit_code assert ( "Error: Foreign key already exists for author_id => authors.id" == result.output.strip() ) # Error if we try against an invalid column result = CliRunner().invoke( cli.cli, ["add-foreign-key", db_path, "books", "author_id", "authors", "bad"] ) assert 0 != result.exit_code assert "Error: No such column: authors.bad" == result.output.strip()
def setup_DB(dbname="test_vle_course_scraper_db.db", newdb=False): """Create a new database and database connection.""" # Need to find a better way to do this global DB # At the moment this doesn't create a new database # if the db already exists we just reuse it. # If we are reusing a database, we should upsert not insert, # but this also means we need to identify primary keys in tables? # Should really find a way to require a confirmation for this? if newdb and os.path.isfile(dbname): print("Deleting old database: {}", dbname) os.remove(dbname) print("Creating database connection: {}".format(dbname)) DB = Database(dbname) print('Create tables...') c = DB.conn.cursor() c.execute(create_htmlxml) c.execute(create_xmlfigures) c.execute(create_imagetest) return DB
def test_sqlite_version(): db = Database(memory=True) version = db.sqlite_version assert isinstance(version, tuple) as_string = ".".join(map(str, version)) actual = next(db.query("select sqlite_version() as v"))["v"] assert actual == as_string
def build(paths, dbname, table): """ Load markdown files into a SQLite database Based on https://github.com/simonw/markdown-to-sqlite, modified to use markdown extensions. """ db = Database(dbname) md = markdown.Markdown( extensions=["fenced_code", "codehilite"], extension_configs={"codehilite": {"guess_lang": "False"}}, ) docs = [] for path in paths: metadata, text = yamldown.load(open(path)) html = md.convert(text) doc = { "_id": hashlib.sha1(str(path).encode("utf8")).hexdigest(), "_path": str(path), "text": text, "html": html, **(metadata or {}), } docs.append(doc) db[table].upsert_all(docs, pk="_id")
def test_upsert(db_path, tmpdir): test_insert_multiple_with_primary_key(db_path, tmpdir) json_path = str(tmpdir / "upsert.json") db = Database(db_path) assert 20 == db["dogs"].count upsert_dogs = [ { "id": 1, "name": "Upserted 1", "age": 4 }, { "id": 2, "name": "Upserted 2", "age": 4 }, { "id": 21, "name": "Fresh insert 21", "age": 6 }, ] open(json_path, "w").write(json.dumps(upsert_dogs)) result = CliRunner().invoke( cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id"]) assert 0 == result.exit_code assert 21 == db["dogs"].count assert upsert_dogs == db.execute_returning_dicts( "select * from dogs where id in (1, 2, 21) order by id")
def top_npn_search_matches(query_npn, top_output=10): """ Returns a list of the the top_output npn (strings), given a query_npn string that the user is filling out from the webapp form. Parameters ---------- query_npn: str User typed (or clicked from proposal suggestion) npn top_output: int number of top hits returned Returns ------- list of dictionaries (npn) output -> [<npn>] <npn> -> {"npn": str} """ db = Database(join(LNHPD.MODEL_FOLDER, LNHPD.SQLITE_FILE)) query_npn = LNHPD.__token_removal__(query_npn) query = (f"SELECT DISTINCT npn FROM search " f"WHERE search " f"MATCH 'npn:\"{query_npn}\"*' " f"ORDER BY rank, npn LIMIT ?") return LNHPD.__assoc_query__(db, query, [top_output])
def test_query_json_with_json_cols(db_path): db = Database(db_path) with db.conn: db["dogs"].insert( { "id": 1, "name": "Cleo", "friends": [{"name": "Pancakes"}, {"name": "Bailey"}], } ) result = CliRunner().invoke( cli.cli, [db_path, "select id, name, friends from dogs"] ) assert ( r""" [{"id": 1, "name": "Cleo", "friends": "[{\"name\": \"Pancakes\"}, {\"name\": \"Bailey\"}]"}] """.strip() == result.output.strip() ) # With --json-cols: result = CliRunner().invoke( cli.cli, [db_path, "select id, name, friends from dogs", "--json-cols"] ) expected = r""" [{"id": 1, "name": "Cleo", "friends": [{"name": "Pancakes"}, {"name": "Bailey"}]}] """.strip() assert expected == result.output.strip() # Test rows command too result_rows = CliRunner().invoke(cli.cli, ["rows", db_path, "dogs", "--json-cols"]) assert expected == result_rows.output.strip()
def __sqlite_initialize__(data): filename = join(LNHPD.MODEL_FOLDER, LNHPD.SQLITE_FILE) with open(join(LNHPD.MODEL_FOLDER, 'lnhpd.json'), 'w', encoding='utf8') as w: json.dump(data, w, separators=(',', ':'), indent=4, ensure_ascii=False) # with open(join(LNHPD.MODEL_FOLDER, 'lnhpd.json'),'r', encoding='utf8') as r: # data = json.load(r) if exists(filename): os.remove(filename) #AUTOCOMPLETE NPN, COMPANY NAME, PRODUCT_NAME db = Database(filename) db["products"].insert_all(data) c = db.conn.cursor() c.execute( '''CREATE VIRTUAL TABLE search USING fts5(product_name, company_name, npn, lnhpd_id);''' ) c.execute(""" INSERT INTO search (product_name, company_name, npn, lnhpd_id) SELECT product_name, company_name, licence_number npn, lnhpd_id FROM products """ ) db.conn.commit() db.conn.close()
def init_index_db(dir="."): path = Path(dir) / "index.db" db_index = Database(path) if not "summary" in db_index.table_names(): db_index["summary"].create( { "uuid": str, "title": str, # "source": str "authors": str, "year": str, "series": str, "language": str, "links": str, # "desc": str, "publisher": str, "tags": str, "identifiers": str, "formats": str } # ) , pk="uuid") # db_index.table("index", pk="uuid") # db_index.table("summary").enable_fts(["title"]) # db_index["summary"].enable_fts(["title", "authors", "series", "uuid", "language", "identifiers", "tags", "publisher", "formats", "pubdate"]) db_index["summary"].enable_fts([ "title", "authors", "series", "language", "identifiers", "tags", "publisher", "formats", "year" ]) return db_index
def top_company_search_matches(query_company, top_output=10): """ Returns a list of the the top_output company names (strings), given a query_company string that the user is filling out from the webapp form. Parameters ---------- query_company: str User typed (or clicked from proposal suggestion) company name top_output: int number of top hits returned Returns ------- list of dictionaries (company names) output -> [<company>] <company> -> {"company_name": str} """ db = Database(join(LNHPD.MODEL_FOLDER, LNHPD.SQLITE_FILE)) query_company = LNHPD.__token_removal__(query_company) query = (f"SELECT DISTINCT company_name FROM search " f"WHERE search " f"MATCH 'company_name:\"{query_company}\"*' " f"ORDER BY rank, company_name LIMIT ?") return LNHPD.__assoc_query__(db, query, [top_output])
def test_insert_csv_tsv(content, option, db_path, tmpdir): db = Database(db_path) file_path = str(tmpdir / "insert.csv-tsv") open(file_path, "w").write(content) result = CliRunner().invoke(cli.cli, ["insert", db_path, "data", file_path, option]) assert 0 == result.exit_code assert [{"foo": "1", "bar": "2", "baz": "3"}] == list(db["data"].rows)
def init_sites_db(dir="."): path = Path(dir) / "sites.db" db = Database(path) if "sites" not in db.table_names(): db["sites"].create({ "uuid": str, "url": str, "type": str, "hostnames": str, "ports": str, "country": int, "isp": str, "version": str, "status": str, "last_online": str, "last_check": str, "error": int, # "schema_version": 1 # # TODO: add the most common formats }, pk="uuid") # }, pk="uuid", not_null=True) # if not "sites" in db.table_names(): # db["sites"].create({ # "uuid": str # }, pk="uuid",) db.table("sites", pk='uuid', batch_size=100, alter=True) return db
def test_insert_alter(db_path, tmpdir): result = CliRunner().invoke( cli.cli, ["insert", db_path, "from_json_nl", "-", "--nl"], input='{"foo": "bar", "n": 1}\n{"foo": "baz", "n": 2}', ) assert 0 == result.exit_code, result.output # Should get an error with incorrect shaped additional data result = CliRunner().invoke( cli.cli, ["insert", db_path, "from_json_nl", "-", "--nl"], input='{"foo": "bar", "baz": 5}', ) assert 0 != result.exit_code, result.output # If we run it again with --alter it should work correctly result = CliRunner().invoke( cli.cli, ["insert", db_path, "from_json_nl", "-", "--nl", "--alter"], input='{"foo": "bar", "baz": 5}', ) assert 0 == result.exit_code, result.output # Sanity check the database itself db = Database(db_path) assert {"foo": str, "n": int, "baz": int} == db["from_json_nl"].columns_dict assert [ {"foo": "bar", "n": 1, "baz": None}, {"foo": "baz", "n": 2, "baz": None}, {"foo": "bar", "baz": 5, "n": None}, ] == db.execute_returning_dicts("select foo, n, baz from from_json_nl")