def cli(dbf_paths, sqlite_db, table, verbose, name, trim): """ Convert DBF files (dBase, FoxPro etc) to SQLite https://github.com/simonw/dbf-to-sqlite """ if table and len(dbf_paths) > 1: raise click.ClickException("--table only works with a single DBF file") db = Database(sqlite_db) for path in dbf_paths: table_name = table if table else Path(path).stem if name: table_name = path if verbose: click.echo('Загрузка {} в таблицу "{}"'.format(path, table_name)) table = dbf.Table(str(path)) table.open() columns = table.field_names db[table_name].insert_all(dict(zip(columns, list(row))) for row in table) table.close() if trim: for col in columns: db.conn.execute("UPDATE ? SET ? =RTrim( ? )",(table_name, col, col,)) db.vacuum()
def test_enable_fts_error_message_on_views(): db = Database(memory=True) db.create_view("hello", "select 1 + 1") with pytest.raises(NotImplementedError) as e: db["hello"].enable_fts() assert e.value.args[ 0] == "enable_fts() is supported on tables but not on views"
def save_playlist(database, table, date=None, since=None, until=None, delay=1): """ Download daily playlists, for a date or a range """ if not any([date, since, until]): dates = [datetime.date.today()] elif date: dates = [date.date()] elif since and until: dates = [*date_range(since.date(), until.date())] elif since or until: raise ValueError( "Invalid dates. Please provide either a single date, or both since and until arguments." ) if not isinstance(database, Database): database = Database(database) table = database.table(table, extracts={"artist": "artists"}) for date in dates: click.echo(f"Downloading playlist for {date}") songs = scrape(date) table.upsert_all(songs, pk="datetime") if len(dates) > 1: time.sleep(delay)
def init_sites_db(dir="."): path = Path(dir) / "sites.db" db = Database(path) if "sites" not in db.table_names(): db["sites"].create({ "uuid": str, "url": str, "type": str, "hostnames": str, "ports": str, "country": int, "isp": str, "version": str, "status": str, "last_online": str, "last_check": str, "error": int, # "schema_version": 1 # # TODO: add the most common formats }, pk="uuid") # }, pk="uuid", not_null=True) # if not "sites" in db.table_names(): # db["sites"].create({ # "uuid": str # }, pk="uuid",) db.table("sites", pk='uuid', batch_size=100, alter=True) return db
def test_insert_replace(db_path, tmpdir): test_insert_multiple_with_primary_key(db_path, tmpdir) json_path = str(tmpdir / "insert-replace.json") db = Database(db_path) assert 20 == db["dogs"].count insert_replace_dogs = [ { "id": 1, "name": "Insert replaced 1", "age": 4 }, { "id": 2, "name": "Insert replaced 2", "age": 4 }, { "id": 21, "name": "Fresh insert 21", "age": 6 }, ] open(json_path, "w").write(json.dumps(insert_replace_dogs)) result = CliRunner().invoke( cli.cli, ["insert", db_path, "dogs", json_path, "--pk", "id", "--replace"]) assert 0 == result.exit_code, result.output assert 21 == db["dogs"].count assert (list( db.query("select * from dogs where id in (1, 2, 21) order by id")) == insert_replace_dogs)
def test_sqlite_version(): db = Database(memory=True) version = db.sqlite_version assert isinstance(version, tuple) as_string = ".".join(map(str, version)) actual = next(db.query("select sqlite_version() as v"))["v"] assert actual == as_string
def test_with_tracer(): collected = [] tracer = lambda sql, params: collected.append((sql, params)) db = Database(memory=True) db["dogs"].insert({"name": "Cleopaws"}) db["dogs"].enable_fts(["name"]) assert len(collected) == 0 with db.tracer(tracer): db["dogs"].search("Cleopaws") assert len(collected) == 2 assert collected == [ ("select name from sqlite_master where type = 'view'", None), ( 'select * from "dogs" where rowid in (\n select rowid from [dogs_fts]\n where [dogs_fts] match :search\n)\norder by rowid', ("Cleopaws", ), ), ] # Outside the with block collected should not be appended to db["dogs"].insert({"name": "Cleopaws"}) assert len(collected) == 2
def test_upsert(db_path, tmpdir): json_path = str(tmpdir / "dogs.json") db = Database(db_path) insert_dogs = [ {"id": 1, "name": "Cleo", "age": 4}, {"id": 2, "name": "Nixie", "age": 4}, ] open(json_path, "w").write(json.dumps(insert_dogs)) result = CliRunner().invoke( cli.cli, ["insert", db_path, "dogs", json_path, "--pk", "id"] ) assert 0 == result.exit_code, result.output assert 2 == db["dogs"].count # Now run the upsert to update just their ages upsert_dogs = [ {"id": 1, "age": 5}, {"id": 2, "age": 5}, ] open(json_path, "w").write(json.dumps(insert_dogs)) result = CliRunner().invoke( cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id"] ) assert 0 == result.exit_code, result.output assert [ {"id": 1, "name": "Cleo", "age": 4}, {"id": 2, "name": "Nixie", "age": 4}, ] == db.execute_returning_dicts("select * from dogs order by id")
def test_with_tracer(): collected = [] tracer = lambda sql, params: collected.append((sql, params)) db = Database(memory=True) db["dogs"].insert({"name": "Cleopaws"}) db["dogs"].enable_fts(["name"]) assert len(collected) == 0 with db.tracer(tracer): list(db["dogs"].search("Cleopaws")) assert len(collected) == 5 assert collected == [ ("select name from sqlite_master where type = 'view'", None), ( "SELECT name FROM sqlite_master\n WHERE rootpage = 0\n AND (\n sql LIKE '%VIRTUAL TABLE%USING FTS%content=%dogs%'\n OR (\n tbl_name = \"dogs\"\n AND sql LIKE '%VIRTUAL TABLE%USING FTS%'\n )\n )", None, ), ("select name from sqlite_master where type = 'view'", None), ("select sql from sqlite_master where name = ?", ("dogs_fts", )), ( "with original as (\n select\n rowid,\n *\n from [dogs]\n)\nselect\n [original].*\nfrom\n [original]\n join [dogs_fts] on [original].rowid = [dogs_fts].rowid\nwhere\n [dogs_fts] match :query\norder by\n [dogs_fts].rank", { "query": "Cleopaws" }, ), ] # Outside the with block collected should not be appended to db["dogs"].insert({"name": "Cleopaws"}) assert len(collected) == 5
def test_upsert_alter(db_path, tmpdir): json_path = str(tmpdir / "dogs.json") db = Database(db_path) insert_dogs = [{"id": 1, "name": "Cleo"}] open(json_path, "w").write(json.dumps(insert_dogs)) result = CliRunner().invoke( cli.cli, ["insert", db_path, "dogs", json_path, "--pk", "id"] ) assert 0 == result.exit_code, result.output # Should fail with error code if no --alter upsert_dogs = [{"id": 1, "age": 5}] open(json_path, "w").write(json.dumps(upsert_dogs)) result = CliRunner().invoke( cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id"] ) assert 1 == result.exit_code assert "no such column: age" == str(result.exception) # Should succeed with --alter result = CliRunner().invoke( cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id", "--alter"] ) assert 0 == result.exit_code assert [{"id": 1, "name": "Cleo", "age": 5},] == db.execute_returning_dicts( "select * from dogs order by id" )
def init_index_db(dir="."): path = Path(dir) / "index.db" db_index = Database(path) if not "summary" in db_index.table_names(): db_index["summary"].create( { "uuid": str, "title": str, # "source": str "authors": str, "year": str, "series": str, "language": str, "links": str, # "desc": str, "publisher": str, "tags": str, "identifiers": str, "formats": str } # ) , pk="uuid") # db_index.table("index", pk="uuid") # db_index.table("summary").enable_fts(["title"]) # db_index["summary"].enable_fts(["title", "authors", "series", "uuid", "language", "identifiers", "tags", "publisher", "formats", "pubdate"]) db_index["summary"].enable_fts([ "title", "authors", "series", "language", "identifiers", "tags", "publisher", "formats", "year" ]) return db_index
def test_insert_truncate(db_path): result = CliRunner().invoke( cli.cli, ["insert", db_path, "from_json_nl", "-", "--nl", "--batch-size=1"], input='{"foo": "bar", "n": 1}\n{"foo": "baz", "n": 2}', ) assert 0 == result.exit_code, result.output db = Database(db_path) assert [ {"foo": "bar", "n": 1}, {"foo": "baz", "n": 2}, ] == db.execute_returning_dicts("select foo, n from from_json_nl") # Truncate and insert new rows result = CliRunner().invoke( cli.cli, [ "insert", db_path, "from_json_nl", "-", "--nl", "--truncate", "--batch-size=1", ], input='{"foo": "bam", "n": 3}\n{"foo": "bat", "n": 4}', ) assert 0 == result.exit_code, result.output assert [ {"foo": "bam", "n": 3}, {"foo": "bat", "n": 4}, ] == db.execute_returning_dicts("select foo, n from from_json_nl")
def oscr_to_zip(zip_location, output_db="oscr.db", recreate=True): db = Database(output_db, recreate=recreate) with ZipFile(zip_location) as z: for f in z.filelist: if not f.filename.endswith(".csv"): continue with z.open(f) as csvfile: reader = csv.DictReader( io.TextIOWrapper(csvfile, encoding="utf8")) db["oscr"].insert_all( tqdm.tqdm(dict(clean_row(row)) for row in reader), ignore=True, batch_size=100000, pk="Charity Number", ) db["oscr"].enable_fts(["Charity Name", "Objectives"], replace=True, create_triggers=True) for i in [ ["Charity Status"], ["Constitutional Form"], ["Geographical Spread"], ["Main Operating Location"], ]: db["oscr"].create_index(i, if_not_exists=True) for view_name, view_def in VIEWS.items(): print("Inserting view: {}".format(view_name)) db.create_view(view_name, view_def, replace=True)
def test_insert_convert_text_returning_iterator(db_path): result = CliRunner().invoke( cli.cli, [ "insert", db_path, "text", "-", "--text", "--convert", '({"word": w} for w in text.split())', ], input="A bunch of words", ) assert result.exit_code == 0, result.output db = Database(db_path) rows = list(db.query("select [word] from [text]")) assert rows == [{ "word": "A" }, { "word": "bunch" }, { "word": "of" }, { "word": "words" }]
def test_insert_alter(db_path, tmpdir): result = CliRunner().invoke( cli.cli, ["insert", db_path, "from_json_nl", "-", "--nl"], input='{"foo": "bar", "n": 1}\n{"foo": "baz", "n": 2}', ) assert 0 == result.exit_code, result.output # Should get an error with incorrect shaped additional data result = CliRunner().invoke( cli.cli, ["insert", db_path, "from_json_nl", "-", "--nl"], input='{"foo": "bar", "baz": 5}', ) assert 0 != result.exit_code, result.output # If we run it again with --alter it should work correctly result = CliRunner().invoke( cli.cli, ["insert", db_path, "from_json_nl", "-", "--nl", "--alter"], input='{"foo": "bar", "baz": 5}', ) assert 0 == result.exit_code, result.output # Sanity check the database itself db = Database(db_path) assert {"foo": str, "n": int, "baz": int} == db["from_json_nl"].columns_dict assert [ {"foo": "bar", "n": 1, "baz": None}, {"foo": "baz", "n": 2, "baz": None}, {"foo": "bar", "baz": 5, "n": None}, ] == db.execute_returning_dicts("select foo, n, baz from from_json_nl")
def test_upsert(db_path, tmpdir): test_insert_multiple_with_primary_key(db_path, tmpdir) json_path = str(tmpdir / "upsert.json") db = Database(db_path) assert 20 == db["dogs"].count upsert_dogs = [ { "id": 1, "name": "Upserted 1", "age": 4 }, { "id": 2, "name": "Upserted 2", "age": 4 }, { "id": 21, "name": "Fresh insert 21", "age": 6 }, ] open(json_path, "w").write(json.dumps(upsert_dogs)) result = CliRunner().invoke( cli.cli, ["upsert", db_path, "dogs", json_path, "--pk", "id"]) assert 0 == result.exit_code assert 21 == db["dogs"].count assert upsert_dogs == db.execute_returning_dicts( "select * from dogs where id in (1, 2, 21) order by id")
def test_recreate(tmpdir, use_path, file_exists): filepath = str(tmpdir / "data.db") if use_path: filepath = pathlib.Path(filepath) if file_exists: Database(filepath)["t1"].insert({"foo": "bar"}) assert ["t1"] == Database(filepath).table_names() Database(filepath, recreate=True)["t2"].insert({"foo": "bar"}) assert ["t2"] == Database(filepath).table_names()
def existing_db(): database = Database(memory=True) database.executescript(""" CREATE TABLE foo (text TEXT); INSERT INTO foo (text) values ("one"); INSERT INTO foo (text) values ("two"); INSERT INTO foo (text) values ("three"); """) return database
def test_drop_table(): runner = CliRunner() with runner.isolated_filesystem(): db = Database("test.db") db["t"].create({"pk": int}, pk="pk") assert "t" in db.table_names() result = runner.invoke(cli.cli, ["drop-table", "test.db", "t",],) assert 0 == result.exit_code assert "t" not in db.table_names()
def test_reset_counts_cli(counts_db_path): db = Database(counts_db_path) db["foo"].enable_counts() db["bar"].enable_counts() assert db.cached_counts() == {"foo": 1, "bar": 2} db["_counts"].update("foo", {"count": 3}) result = CliRunner().invoke(cli.cli, ["reset-counts", counts_db_path]) assert result.exit_code == 0 assert db.cached_counts() == {"foo": 1, "bar": 2}
def init_site_db(site, _uuid="", dir="."): s_uuid = str(uuid.uuid4()) if not _uuid else str(_uuid) f_uuid=s_uuid+".db" path = Path(dir) / f_uuid db = Database(path) if "site" not in db.table_names(): s=db["site"] s.insert( { "uuid": s_uuid, "urls": [site], "schema_version": 1 } ) if "ebooks" not in db.table_names(): db["ebooks"].create({ "uuid": str, "id": int, "library": str, #TODO: manage libraries ids as integer to prevent library renam on remote site "title": str, "authors": str, "series": str, "series_index": int, # "edition": int, "language": str, "desc": str, "identifiers": str, "tags": str, "publisher": str, "pubdate": str, "last_modified": str, "timestamp": str, "formats": str, "cover": int, # "epub": int, # "mobi": int, # "pdf": int, # TODO: add the most common formats to avoid alter tables }, pk="uuid") if "libraries" not in db.table_names(): db["libraries"].create({ "id": int, "names": str }, pk="id") # db.table("ebooks", pk="id") # db.table("ebooks", pk="id", alter=True return db
def test_insert_binary_base64(db_path): result = CliRunner().invoke( cli.cli, ["insert", db_path, "files", "-"], input=r'{"content": {"$base64": true, "encoded": "aGVsbG8="}}', ) assert 0 == result.exit_code, result.output db = Database(db_path) actual = list(db.query("select content from files")) assert actual == [{"content": b"hello"}]
def test_add_column(db_path, col_name, col_type, expected_schema): db = Database(db_path) db.create_table("dogs", {"name": str}) assert "CREATE TABLE [dogs] ( [name] TEXT )" == collapse_whitespace( db["dogs"].schema) args = ["add-column", db_path, "dogs", col_name] if col_type is not None: args.append(col_type) assert 0 == CliRunner().invoke(cli.cli, args).exit_code assert expected_schema == collapse_whitespace(db["dogs"].schema)
def test_insert_text(db_path): result = CliRunner().invoke( cli.cli, ["insert", db_path, "from_text", "-", "--text"], input='First line\nSecond line\n{"foo": "baz"}', ) assert 0 == result.exit_code, result.output db = Database(db_path) assert [{ "text": 'First line\nSecond line\n{"foo": "baz"}' }] == list(db.query("select text from from_text"))
def test_insert_multiple_with_primary_key(db_path, tmpdir): json_path = str(tmpdir / "dogs.json") dogs = [{"id": i, "name": "Cleo {}".format(i), "age": i + 3} for i in range(1, 21)] open(json_path, "w").write(json.dumps(dogs)) result = CliRunner().invoke( cli.cli, ["insert", db_path, "dogs", json_path, "--pk", "id"] ) assert 0 == result.exit_code db = Database(db_path) assert dogs == db.execute_returning_dicts("select * from dogs order by id") assert ["id"] == db["dogs"].pks
def test_insert_simple(tmpdir): json_path = str(tmpdir / "dog.json") db_path = str(tmpdir / "dogs.db") open(json_path, "w").write(json.dumps({"name": "Cleo", "age": 4})) result = CliRunner().invoke(cli.cli, ["insert", db_path, "dogs", json_path]) assert 0 == result.exit_code assert [{"age": 4, "name": "Cleo"}] == Database(db_path).execute_returning_dicts( "select * from dogs" ) db = Database(db_path) assert ["dogs"] == db.table_names() assert [] == db["dogs"].indexes
def test_insert_with_primary_key(db_path, tmpdir): json_path = str(tmpdir / "dog.json") open(json_path, "w").write(json.dumps({"id": 1, "name": "Cleo", "age": 4})) result = CliRunner().invoke( cli.cli, ["insert", db_path, "dogs", json_path, "--pk", "id"] ) assert 0 == result.exit_code assert [{"id": 1, "age": 4, "name": "Cleo"}] == Database( db_path ).execute_returning_dicts("select * from dogs") db = Database(db_path) assert ["id"] == db["dogs"].pks
def test_insert_newline_delimited(db_path): result = CliRunner().invoke( cli.cli, ["insert", db_path, "from_json_nl", "-", "--nl"], input='{"foo": "bar", "n": 1}\n{"foo": "baz", "n": 2}', ) assert 0 == result.exit_code, result.output db = Database(db_path) assert [ {"foo": "bar", "n": 1}, {"foo": "baz", "n": 2}, ] == db.execute_returning_dicts("select foo, n from from_json_nl")
def test_create_view_error_if_view_exists(): runner = CliRunner() with runner.isolated_filesystem(): db = Database("test.db") db.create_view("version", "select sqlite_version() + 1") result = runner.invoke( cli.cli, ["create-view", "test.db", "version", "select sqlite_version()"]) assert 1 == result.exit_code assert ( 'Error: View "version" already exists. Use --replace to delete and replace it.' == result.output.strip())
def test_insert_analyze(db_path): db = Database(db_path) db["rows"].insert({"foo": "x", "n": 3}) db["rows"].create_index(["n"]) assert "sqlite_stat1" not in db.table_names() result = CliRunner().invoke( cli.cli, ["insert", db_path, "rows", "-", "--nl", "--analyze"], input='{"foo": "bar", "n": 1}\n{"foo": "baz", "n": 2}', ) assert 0 == result.exit_code, result.output assert "sqlite_stat1" in db.table_names()