def _mount_mysql(repository): # We don't use this one in tests beyond basic mounting, so no point importing it. mount( repository.to_schema(), "mysql_fdw", dict( host="mysqlorigin", port=3306, username="******", password="******", dbname="mysqlschema", ), tables={ "mushrooms": ( [ TableColumn(1, "mushroom_id", "integer", False), TableColumn(2, "name", "character varying (20)", False), TableColumn(3, "discovery", "timestamp", False), TableColumn(4, "friendly", "boolean", False), TableColumn(5, "binary_data", "bytea", False), TableColumn(6, "varbinary_data", "bytea", False), ], {}, ) }, )
def _mount_mongo(repository): mount( "tmp", "mongo_fdw", dict( server="mongoorigin", port=27017, username="******", password="******", stuff={ "db": "origindb", "coll": "stuff", "schema": { "name": "text", "duration": "numeric", "happy": "boolean" }, }, ), ) repository.import_tables([], R("tmp"), [], foreign_tables=True, do_checkout=True) R("tmp").delete()
def _mount_mysql(repository): # We don't use this one in tests beyond basic mounting, so no point importing it. mount( repository.to_schema(), "mysql_fdw", dict( dict( server="mysqlorigin", port=3306, username="******", password="******", remote_schema="mysqlschema", )), )
def _mount_postgres(repository, tables=None): mount( "tmp", "postgres_fdw", dict( host="pgorigin", port=5432, username="******", password="******", dbname="origindb", remote_schema="public", tables=tables, ), ) repository.import_tables([], R("tmp"), [], foreign_tables=True, do_checkout=True) R("tmp").delete()
def test_socrata_mounting_error(): socrata = MagicMock(spec=Socrata) socrata.datasets.side_effect = Exception( "Unknown response format: text/html; charset=utf-8") with mock.patch("sodapy.Socrata", return_value=socrata): with pytest.raises(RepositoryNotFoundError): mount( "test/pg_mount", "socrata", { "domain": "example.com", "tables": { "some_table": "xzkq-xp2w" }, "app_token": "some_token", }, )
def test_mount_elasticsearch(local_engine_empty): # No ES running in this stack: this is just a test that we can instantiate the FDW. repo = Repository("test", "es_mount") try: mount( repo.to_schema(), "elasticsearch", dict( username=None, password=None, server="elasticsearch", port=9200, table_spec={ "table_1": { "schema": { "id": "text", "@timestamp": "timestamp", "query": "text", "col_1": "text", "col_2": "boolean", }, "index": "index-pattern*", "rowid_column": "id", "query_column": "query", } }, ), ) assert get_engine().get_full_table_schema(repo.to_schema(), "table_1") == [ TableColumn(ordinal=1, name="id", pg_type="text", is_pk=False, comment=None), TableColumn( ordinal=2, name="@timestamp", pg_type="timestamp without time zone", is_pk=False, comment=None, ), TableColumn(ordinal=3, name="query", pg_type="text", is_pk=False, comment=None), TableColumn(ordinal=4, name="col_1", pg_type="text", is_pk=False, comment=None), TableColumn(ordinal=5, name="col_2", pg_type="boolean", is_pk=False, comment=None), ] finally: repo.delete()
def test_socrata_smoke(domain, dataset_id, local_engine_empty): # This relies on the Socrata API being available, but good to smoke test some popular datasets # to make sure the mounting works end-to-end. try: mount( "socrata_mount", "socrata", { "domain": domain, "tables": { "data": dataset_id } }, ) result = local_engine_empty.run_sql( "SELECT * FROM socrata_mount.data LIMIT 10") assert len(result) == 10 finally: local_engine_empty.delete_schema("socrata_mount")
def test_socrata_mounting_slug(local_engine_empty): with open(os.path.join(INGESTION_RESOURCES, "socrata/find_datasets.json"), "r") as f: socrata_meta = json.load(f) socrata = MagicMock(spec=Socrata) socrata.datasets.return_value = socrata_meta with mock.patch("sodapy.Socrata", return_value=socrata): mount( "test/pg_mount", "socrata", { "domain": "example.com", "app_token": "some_token", }, ) assert local_engine_empty.get_all_tables("test/pg_mount") == [ "current_employee_names_salaries_and_position_xzkq_xp2w" ]
def test_socrata_mounting_missing_tables(): with open(os.path.join(INGESTION_RESOURCES, "socrata/find_datasets.json"), "r") as f: socrata_meta = json.load(f) socrata = MagicMock(spec=Socrata) socrata.datasets.return_value = socrata_meta with mock.patch("sodapy.Socrata", return_value=socrata): with pytest.raises(ValueError) as e: mount( "test/pg_mount", "socrata", { "domain": "example.com", "tables": { "some_table": "wrong_id" }, "app_token": "some_token", }, ) assert "Some Socrata tables couldn't be found! Missing tables: xzkq-xp2w" in str( e.value)
def _execute_db_import(conn_string, fdw_name, fdw_params, table_names, target_mountpoint, table_aliases, table_queries) -> ProvenanceLine: tmp_mountpoint = Repository.from_schema(fdw_name + "_tmp_staging") try: handler_kwargs = json.loads(fdw_params) handler_kwargs.update( conn_string_to_dict(conn_string.group() if conn_string else None)) mount(tmp_mountpoint.to_schema(), fdw_name, handler_kwargs) # The foreign database is a moving target, so the new image hash is random. # Maybe in the future, when the object hash is a function of its contents, we can be smarter here... target_hash = "{:064x}".format(getrandbits(256)) target_mountpoint.import_tables( table_aliases, tmp_mountpoint, table_names, target_hash=target_hash, foreign_tables=True, table_queries=table_queries, ) return {"type": "MOUNT"} finally: tmp_mountpoint.delete()
def test_socrata_mounting(local_engine_empty): with open(os.path.join(INGESTION_RESOURCES, "socrata/find_datasets.json"), "r") as f: socrata_meta = json.load(f) socrata = MagicMock(spec=Socrata) socrata.datasets.return_value = socrata_meta with mock.patch("sodapy.Socrata", return_value=socrata): mount( "test/pg_mount", "socrata", { "domain": "example.com", "tables": { "some_table": "xzkq-xp2w" }, "app_token": "some_token", }, ) assert local_engine_empty.get_full_table_schema( "test/pg_mount", "some_table" ) == [ TableColumn(ordinal=1, name=":id", pg_type="text", is_pk=False, comment="Socrata column ID"), TableColumn( ordinal=2, name="full_or_part_time", pg_type="text", is_pk=False, comment= "Whether the employee was employed full- (F) or part-time (P).", ), TableColumn(ordinal=3, name="hourly_rate", pg_type="numeric", is_pk=False, comment=mock.ANY), TableColumn(ordinal=4, name="salary_or_hourly", pg_type="text", is_pk=False, comment=mock.ANY), TableColumn( ordinal=5, name="job_titles", pg_type="text", is_pk=False, comment="Title of employee at the time when the data was updated.", ), TableColumn(ordinal=6, name="typical_hours", pg_type="numeric", is_pk=False, comment=mock.ANY), TableColumn(ordinal=7, name="annual_salary", pg_type="numeric", is_pk=False, comment=mock.ANY), TableColumn(ordinal=8, name=_long_name_col_sg, pg_type="text", is_pk=False, comment=mock.ANY), TableColumn( ordinal=9, name="department", pg_type="text", is_pk=False, comment="Department where employee worked.", ), ] assert local_engine_empty.run_sql( "SELECT option_value FROM information_schema.foreign_table_options " "WHERE foreign_table_name = 'some_table' " "AND foreign_table_schema = 'test/pg_mount' " "AND option_name = 'column_map'") == [ (f'{{"{_long_name_col_sg}": "{_long_name_col}"}}', ) ]
def _callback(schema, connection, handler_options): handler_options.update(conn_string_to_dict(connection)) mount(schema, mount_handler=handler_name, handler_kwargs=handler_options)