def test_export_database_command_invalid_database(self, mock_g): """Test that an error is raised when exporting an invalid database""" mock_g.user = security_manager.find_user("admin") command = ExportDatabasesCommand([-1]) contents = command.run() with self.assertRaises(DatabaseNotFoundError): next(contents)
def test_export_database_command_no_access(self, mock_g): """Test that users can't export databases they don't have access to""" mock_g.user = security_manager.find_user("gamma") example_db = get_example_database() command = ExportDatabasesCommand([example_db.id]) contents = command.run() with self.assertRaises(DatabaseNotFoundError): next(contents)
def test_export_database_command_no_related(self, mock_g): """ Test that only databases are exported when export_related=False. """ mock_g.user = security_manager.find_user("admin") example_db = get_example_database() db_uuid = example_db.uuid command = ExportDatabasesCommand([example_db.id], export_related=False) contents = dict(command.run()) prefixes = {path.split("/")[0] for path in contents} assert "metadata.yaml" in prefixes assert "databases" in prefixes assert "datasets" not in prefixes
def test_export_models_command(self, mock_g): """Make sure metadata.yaml has the correct content.""" mock_g.user = security_manager.find_user("admin") example_db = get_example_database() with freeze_time("2020-01-01T00:00:00Z"): command = ExportDatabasesCommand([example_db.id]) contents = dict(command.run()) metadata = yaml.safe_load(contents["metadata.yaml"]) assert metadata == ({ "version": "1.0.0", "type": "Database", "timestamp": "2020-01-01T00:00:00+00:00", })
def export(self, **kwargs: Any) -> Response: """Export database(s) with associated datasets --- get: description: Download database(s) and associated dataset(s) as a zip file parameters: - in: query name: q content: application/json: schema: $ref: '#/components/schemas/get_export_ids_schema' responses: 200: description: A zip file with database(s) and dataset(s) as YAML content: application/zip: schema: type: string format: binary 401: $ref: '#/components/responses/401' 404: $ref: '#/components/responses/404' 500: $ref: '#/components/responses/500' """ token = request.args.get("token") requested_ids = kwargs["rison"] timestamp = datetime.now().strftime("%Y%m%dT%H%M%S") root = f"database_export_{timestamp}" filename = f"{root}.zip" buf = BytesIO() with ZipFile(buf, "w") as bundle: try: for file_name, file_content in ExportDatabasesCommand( requested_ids ).run(): with bundle.open(f"{root}/{file_name}", "w") as fp: fp.write(file_content.encode()) except DatabaseNotFoundError: return self.response_404() buf.seek(0) response = send_file( buf, mimetype="application/zip", as_attachment=True, attachment_filename=filename, ) if token: response.set_cookie(token, "done", max_age=600) return response
def test_export_database_command_key_order(self, mock_g): """Test that they keys in the YAML have the same order as export_fields""" mock_g.user = security_manager.find_user("admin") example_db = get_example_database() command = ExportDatabasesCommand([example_db.id]) contents = dict(command.run()) metadata = yaml.safe_load(contents["databases/examples.yaml"]) assert list(metadata.keys()) == [ "database_name", "sqlalchemy_uri", "cache_timeout", "expose_in_sqllab", "allow_run_async", "allow_ctas", "allow_cvas", "allow_csv_upload", "extra", "uuid", "version", ]
def test_export_database_command(self, mock_g): mock_g.user = security_manager.find_user("admin") example_db = get_example_database() db_uuid = example_db.uuid command = ExportDatabasesCommand([example_db.id]) contents = dict(command.run()) # TODO: this list shouldn't depend on the order in which unit tests are run # or on the backend; for now use a stable subset core_files = { "metadata.yaml", "databases/examples.yaml", "datasets/examples/energy_usage.yaml", "datasets/examples/wb_health_population.yaml", "datasets/examples/birth_names.yaml", } expected_extra = { "engine_params": {}, "metadata_cache_timeout": {}, "metadata_params": {}, "schemas_allowed_for_csv_upload": [], } if backend() == "presto": expected_extra = { "engine_params": { "connect_args": { "poll_interval": 0.1 } } } assert core_files.issubset(set(contents.keys())) if example_db.backend == "postgresql": ds_type = "TIMESTAMP WITHOUT TIME ZONE" elif example_db.backend == "hive": ds_type = "TIMESTAMP" elif example_db.backend == "presto": ds_type = "VARCHAR(255)" else: ds_type = "DATETIME" if example_db.backend == "mysql": big_int_type = "BIGINT(20)" else: big_int_type = "BIGINT" metadata = yaml.safe_load(contents["databases/examples.yaml"]) assert metadata == ({ "allow_csv_upload": True, "allow_ctas": True, "allow_cvas": True, "allow_run_async": False, "cache_timeout": None, "database_name": "examples", "expose_in_sqllab": True, "extra": expected_extra, "sqlalchemy_uri": example_db.sqlalchemy_uri, "uuid": str(example_db.uuid), "version": "1.0.0", }) metadata = yaml.safe_load( contents["datasets/examples/birth_names.yaml"]) metadata.pop("uuid") metadata["columns"].sort(key=lambda x: x["column_name"]) expected_metadata = { "cache_timeout": None, "columns": [ { "column_name": "ds", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": True, "python_date_format": None, "type": ds_type, "verbose_name": None, }, { "column_name": "gender", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": "STRING" if example_db.backend == "hive" else "VARCHAR(16)", "verbose_name": None, }, { "column_name": "name", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": "STRING" if example_db.backend == "hive" else "VARCHAR(255)", "verbose_name": None, }, { "column_name": "num", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": big_int_type, "verbose_name": None, }, { "column_name": "num_california", "description": None, "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": None, "verbose_name": None, }, { "column_name": "state", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": "STRING" if example_db.backend == "hive" else "VARCHAR(10)", "verbose_name": None, }, { "column_name": "num_boys", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": big_int_type, "verbose_name": None, }, { "column_name": "num_girls", "description": None, "expression": None, "filterable": True, "groupby": True, "is_active": True, "is_dttm": False, "python_date_format": None, "type": big_int_type, "verbose_name": None, }, ], "database_uuid": str(db_uuid), "default_endpoint": None, "description": "", "extra": None, "fetch_values_predicate": None, "filter_select_enabled": True, "main_dttm_col": "ds", "metrics": [ { "d3format": None, "description": None, "expression": "COUNT(*)", "extra": None, "metric_name": "count", "metric_type": "count", "verbose_name": "COUNT(*)", "warning_text": None, }, { "d3format": None, "description": None, "expression": "SUM(num)", "extra": None, "metric_name": "sum__num", "metric_type": None, "verbose_name": None, "warning_text": None, }, ], "offset": 0, "params": None, "schema": None, "sql": None, "table_name": "birth_names", "template_params": None, "version": "1.0.0", } expected_metadata["columns"].sort(key=lambda x: x["column_name"]) assert metadata == expected_metadata
def test_export_database_command(self, mock_g): mock_g.user = security_manager.find_user("admin") example_db = get_example_database() command = ExportDatabasesCommand([example_db.id]) contents = dict(command.run()) # TODO: this list shouldn't depend on the order in which unit tests are run # or on the backend; for now use a stable subset core_files = { "metadata.yaml", "databases/examples.yaml", "datasets/examples/energy_usage.yaml", "datasets/examples/wb_health_population.yaml", "datasets/examples/birth_names.yaml", } expected_extra = { "engine_params": {}, "metadata_cache_timeout": {}, "metadata_params": {}, "schemas_allowed_for_csv_upload": [], } if backend() == "presto": expected_extra = { "engine_params": { "connect_args": { "poll_interval": 0.1 } } } assert core_files.issubset(set(contents.keys())) metadata = yaml.safe_load(contents["databases/examples.yaml"]) assert metadata == ({ "allow_csv_upload": True, "allow_ctas": True, "allow_cvas": True, "allow_run_async": False, "cache_timeout": None, "database_name": "examples", "expose_in_sqllab": True, "extra": expected_extra, "sqlalchemy_uri": example_db.sqlalchemy_uri, "uuid": str(example_db.uuid), "version": "1.0.0", }) metadata = yaml.safe_load( contents["datasets/examples/birth_names.yaml"]) metadata.pop("uuid") assert metadata == { "table_name": "birth_names", "main_dttm_col": None, "description": "Adding a DESCRip", "default_endpoint": "", "offset": 66, "cache_timeout": 55, "schema": "", "sql": "", "params": None, "template_params": None, "filter_select_enabled": True, "fetch_values_predicate": None, "extra": None, "metrics": [ { "metric_name": "ratio", "verbose_name": "Ratio Boys/Girls", "metric_type": None, "expression": "sum(sum_boys) / sum(sum_girls)", "description": "This represents the ratio of boys/girls", "d3format": ".2%", "extra": None, "warning_text": "no warning", }, { "metric_name": "sum__num", "verbose_name": "Babies", "metric_type": None, "expression": "SUM(num)", "description": "", "d3format": "", "extra": None, "warning_text": "", }, { "metric_name": "count", "verbose_name": "", "metric_type": None, "expression": "count(1)", "description": None, "d3format": None, "extra": None, "warning_text": None, }, ], "columns": [ { "column_name": "num_california", "verbose_name": None, "is_dttm": False, "is_active": None, "type": "NUMBER", "groupby": False, "filterable": False, "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", "description": None, "python_date_format": None, }, { "column_name": "ds", "verbose_name": "", "is_dttm": True, "is_active": None, "type": "DATETIME", "groupby": True, "filterable": True, "expression": "", "description": None, "python_date_format": None, }, { "column_name": "sum_girls", "verbose_name": None, "is_dttm": False, "is_active": None, "type": "BIGINT(20)", "groupby": False, "filterable": False, "expression": "", "description": None, "python_date_format": None, }, { "column_name": "gender", "verbose_name": None, "is_dttm": False, "is_active": None, "type": "VARCHAR(16)", "groupby": True, "filterable": True, "expression": "", "description": None, "python_date_format": None, }, { "column_name": "state", "verbose_name": None, "is_dttm": None, "is_active": None, "type": "VARCHAR(10)", "groupby": True, "filterable": True, "expression": None, "description": None, "python_date_format": None, }, { "column_name": "sum_boys", "verbose_name": None, "is_dttm": None, "is_active": None, "type": "BIGINT(20)", "groupby": True, "filterable": True, "expression": None, "description": None, "python_date_format": None, }, { "column_name": "num", "verbose_name": None, "is_dttm": None, "is_active": None, "type": "BIGINT(20)", "groupby": True, "filterable": True, "expression": None, "description": None, "python_date_format": None, }, { "column_name": "name", "verbose_name": None, "is_dttm": None, "is_active": None, "type": "VARCHAR(255)", "groupby": True, "filterable": True, "expression": None, "description": None, "python_date_format": None, }, ], "version": "1.0.0", "database_uuid": str(example_db.uuid), }