def test_import_v1_dataset_validation(self): """Test different validations applied when importing a dataset""" # metadata.yaml must be present contents = { "datasets/imported_dataset.yaml": yaml.safe_dump(dataset_config), } command = v1.ImportDatasetsCommand(contents) with pytest.raises(IncorrectVersionError) as excinfo: command.run() assert str(excinfo.value) == "Missing metadata.yaml" # version should be 1.0.0 contents["metadata.yaml"] = yaml.safe_dump({ "version": "2.0.0", "type": "SqlaTable", "timestamp": "2020-11-04T21:27:44.423819+00:00", }) command = v1.ImportDatasetsCommand(contents) with pytest.raises(IncorrectVersionError) as excinfo: command.run() assert str(excinfo.value) == "Must be equal to 1.0.0." # type should be SqlaTable contents["metadata.yaml"] = yaml.safe_dump(database_metadata_config) command = v1.ImportDatasetsCommand(contents) with pytest.raises(CommandInvalidError) as excinfo: command.run() assert str(excinfo.value) == "Error importing dataset" assert excinfo.value.normalized_messages() == { "metadata.yaml": { "type": ["Must be equal to SqlaTable."] } } # must also validate databases broken_config = database_config.copy() del broken_config["database_name"] contents["metadata.yaml"] = yaml.safe_dump(dataset_metadata_config) contents["databases/imported_database.yaml"] = yaml.safe_dump( broken_config) command = v1.ImportDatasetsCommand(contents) with pytest.raises(CommandInvalidError) as excinfo: command.run() assert str(excinfo.value) == "Error importing dataset" assert excinfo.value.normalized_messages() == { "databases/imported_database.yaml": { "database_name": ["Missing data for required field."], } }
def test_import_v1_dataset_existing_database(self): """Test that a dataset can be imported when the database already exists""" # first import database... contents = { "metadata.yaml": yaml.safe_dump(database_metadata_config), "databases/imported_database.yaml": yaml.safe_dump(database_config), } command = ImportDatabasesCommand(contents) command.run() database = ( db.session.query(Database).filter_by(uuid=database_config["uuid"]).one() ) assert len(database.tables) == 0 # ...then dataset contents = { "metadata.yaml": yaml.safe_dump(dataset_metadata_config), "datasets/imported_dataset.yaml": yaml.safe_dump(dataset_config), "databases/imported_database.yaml": yaml.safe_dump(database_config), } command = v1.ImportDatasetsCommand(contents, overwrite=True) command.run() database = ( db.session.query(Database).filter_by(uuid=database_config["uuid"]).one() ) assert len(database.tables) == 1 db.session.delete(database.tables[0]) db.session.delete(database) db.session.commit()
def test_import_v1_dataset(self): """Test that we can import a dataset""" contents = { "metadata.yaml": yaml.safe_dump(dataset_metadata_config), "databases/imported_database.yaml": yaml.safe_dump(database_config), "datasets/imported_dataset.yaml": yaml.safe_dump(dataset_config), } command = v1.ImportDatasetsCommand(contents) command.run() dataset = (db.session.query(SqlaTable).filter_by( uuid=dataset_config["uuid"]).one()) assert dataset.table_name == "imported_dataset" assert dataset.main_dttm_col is None assert dataset.description == "This is a dataset that was exported" assert dataset.default_endpoint == "" assert dataset.offset == 66 assert dataset.cache_timeout == 55 assert dataset.schema == "" assert dataset.sql == "" assert dataset.params is None assert dataset.template_params is None assert dataset.filter_select_enabled assert dataset.fetch_values_predicate is None assert dataset.extra is None # database is also imported assert str( dataset.database.uuid) == "b8a1ccd3-779d-4ab7-8ad8-9ab119d7fe89" assert len(dataset.metrics) == 1 metric = dataset.metrics[0] assert metric.metric_name == "count" assert metric.verbose_name == "" assert metric.metric_type is None assert metric.expression == "count(1)" assert metric.description is None assert metric.d3format is None assert metric.extra is None assert metric.warning_text is None assert len(dataset.columns) == 1 column = dataset.columns[0] assert column.column_name == "cnt" assert column.verbose_name == "Count of something" assert not column.is_dttm assert column.is_active # imported columns are set to active assert column.type == "NUMBER" assert not column.groupby assert column.filterable assert column.expression == "" assert column.description is None assert column.python_date_format is None db.session.delete(dataset) db.session.delete(dataset.database) db.session.commit()
def test_import_v1_dataset_multiple(self): """Test that a dataset can be imported multiple times""" contents = { "metadata.yaml": yaml.safe_dump(dataset_metadata_config), "databases/imported_database.yaml": yaml.safe_dump(database_config), "datasets/imported_dataset.yaml": yaml.safe_dump(dataset_config), } command = v1.ImportDatasetsCommand(contents, overwrite=True) command.run() command.run() dataset = ( db.session.query(SqlaTable).filter_by(uuid=dataset_config["uuid"]).one() ) assert dataset.table_name == "imported_dataset" # test that columns and metrics sync, ie, old ones not the import # are removed new_config = dataset_config.copy() new_config["metrics"][0]["metric_name"] = "count2" new_config["columns"][0]["column_name"] = "cnt2" contents = { "metadata.yaml": yaml.safe_dump(dataset_metadata_config), "databases/imported_database.yaml": yaml.safe_dump(database_config), "datasets/imported_dataset.yaml": yaml.safe_dump(new_config), } command = v1.ImportDatasetsCommand(contents, overwrite=True) command.run() dataset = ( db.session.query(SqlaTable).filter_by(uuid=dataset_config["uuid"]).one() ) assert len(dataset.metrics) == 1 assert dataset.metrics[0].metric_name == "count2" assert len(dataset.columns) == 1 assert dataset.columns[0].column_name == "cnt2" db.session.delete(dataset) db.session.delete(dataset.database) db.session.commit()
def test_import_v1_dataset(self, mock_g): """Test that we can import a dataset""" mock_g.user = security_manager.find_user("admin") contents = { "metadata.yaml": yaml.safe_dump(dataset_metadata_config), "databases/imported_database.yaml": yaml.safe_dump(database_config), "datasets/imported_dataset.yaml": yaml.safe_dump(dataset_config), } command = v1.ImportDatasetsCommand(contents) command.run() dataset = (db.session.query(SqlaTable).filter_by( uuid=dataset_config["uuid"]).one()) assert dataset.table_name == "imported_dataset" assert dataset.main_dttm_col is None assert dataset.description == "This is a dataset that was exported" assert dataset.default_endpoint == "" assert dataset.offset == 66 assert dataset.cache_timeout == 55 assert dataset.schema == "" assert dataset.sql == "" assert dataset.params is None assert dataset.template_params == "{}" assert dataset.filter_select_enabled assert dataset.fetch_values_predicate is None assert ( dataset.extra == '{"certification": {"certified_by": "Data Platform Team", "details": "This table is the source of truth."}, "warning_markdown": "This is a warning."}' ) # user should be included as one of the owners assert dataset.owners == [mock_g.user] # database is also imported assert str( dataset.database.uuid) == "b8a1ccd3-779d-4ab7-8ad8-9ab119d7fe89" assert len(dataset.metrics) == 1 metric = dataset.metrics[0] assert metric.metric_name == "count" assert metric.verbose_name == "" assert metric.metric_type is None assert metric.expression == "count(1)" assert metric.description is None assert metric.d3format is None assert metric.extra == "{}" assert metric.warning_text is None assert len(dataset.columns) == 1 column = dataset.columns[0] assert column.column_name == "cnt" assert column.verbose_name == "Count of something" assert not column.is_dttm assert column.is_active # imported columns are set to active assert column.type == "NUMBER" assert not column.groupby assert column.filterable assert column.expression == "" assert column.description is None assert column.python_date_format is None dataset.owners = [] dataset.database.owners = [] db.session.delete(dataset) db.session.delete(dataset.database) db.session.commit()