def test_create_virtual_sqlatable( app_context: None, mocker: MockFixture, session: Session, sample_columns: Dict["TableColumn", Dict[str, Any]], sample_metrics: Dict["SqlMetric", Dict[str, Any]], columns_default: Dict[str, Any], ) -> None: """ Test shadow write when creating a new ``SqlaTable``. When a new virtual ``SqlaTable`` is created, new models should also be created for ``Dataset`` and ``Column``. """ # patch session mocker.patch( "superset.security.SupersetSecurityManager.get_session", return_value=session ) from superset.columns.models import Column from superset.columns.schemas import ColumnSchema from superset.connectors.sqla.models import SqlaTable from superset.datasets.models import Dataset from superset.datasets.schemas import DatasetSchema from superset.models.core import Database from superset.tables.models import Table engine = session.get_bind() Dataset.metadata.create_all(engine) # pylint: disable=no-member user1 = get_test_user(1, "abc") physical_table_columns: List[Dict[str, Any]] = [ dict( name="ds", is_temporal=True, type="TIMESTAMP", expression="ds", is_physical=True, ), dict(name="num_boys", type="INTEGER", expression="num_boys", is_physical=True), dict(name="revenue", type="INTEGER", expression="revenue", is_physical=True), dict(name="expenses", type="INTEGER", expression="expenses", is_physical=True), ] # create a physical ``Table`` that the virtual dataset points to database = Database(database_name="my_database", sqlalchemy_uri="sqlite://") table = Table( name="some_table", schema="my_schema", catalog=None, database=database, columns=[ Column(**props, created_by=user1, changed_by=user1) for props in physical_table_columns ], ) session.add(table) session.commit() assert session.query(Table).count() == 1 assert session.query(Dataset).count() == 0 # create virtual dataset columns = list(sample_columns.keys()) metrics = list(sample_metrics.keys()) expected_table_columns = list(sample_columns.values()) expected_metric_columns = list(sample_metrics.values()) sqla_table = SqlaTable( created_by=user1, changed_by=user1, owners=[user1], table_name="old_dataset", columns=columns, metrics=metrics, main_dttm_col="ds", default_endpoint="https://www.youtube.com/watch?v=dQw4w9WgXcQ", # not used database=database, offset=-8, description="This is the description", is_featured=1, cache_timeout=3600, schema="my_schema", sql=""" SELECT ds, num_boys, revenue, expenses, revenue - expenses AS profit FROM some_table""", params=json.dumps( { "remote_id": 64, "database_name": "examples", "import_time": 1606677834, } ), perm=None, filter_select_enabled=1, fetch_values_predicate="foo IN (1, 2)", is_sqllab_view=0, # no longer used? template_params=json.dumps({"answer": "42"}), schema_perm=None, extra=json.dumps({"warning_markdown": "*WARNING*"}), ) session.add(sqla_table) session.flush() # should not add a new table assert session.query(Table).count() == 1 assert session.query(Dataset).count() == 1 # ignore these keys when comparing results ignored_keys = {"created_on", "changed_on"} column_schema = ColumnSchema() actual_columns = [ {k: v for k, v in column_schema.dump(column).items() if k not in ignored_keys} for column in session.query(Column).all() ] num_physical_columns = len(physical_table_columns) num_dataset_table_columns = len(columns) num_dataset_metric_columns = len(metrics) assert ( len(actual_columns) == num_physical_columns + num_dataset_table_columns + num_dataset_metric_columns ) for i, column in enumerate(table.columns): assert actual_columns[i] == { **columns_default, **physical_table_columns[i], "id": i + 1, "uuid": str(column.uuid), "tables": [1], } offset = num_physical_columns for i, column in enumerate(sqla_table.columns): assert actual_columns[i + offset] == { **columns_default, **expected_table_columns[i], "id": i + offset + 1, "uuid": str(column.uuid), "is_physical": False, "datasets": [1], } offset = num_physical_columns + num_dataset_table_columns for i, metric in enumerate(sqla_table.metrics): assert actual_columns[i + offset] == { **columns_default, **expected_metric_columns[i], "id": i + offset + 1, "uuid": str(metric.uuid), "datasets": [1], } # check that dataset was created, and has a reference to the table dataset_schema = DatasetSchema() datasets = [ {k: v for k, v in dataset_schema.dump(dataset).items() if k not in ignored_keys} for dataset in session.query(Dataset).all() ] assert len(datasets) == 1 assert datasets[0] == { "id": 1, "database": 1, "uuid": str(sqla_table.uuid), "name": "old_dataset", "changed_by": 1, "created_by": 1, "owners": [1], "columns": [5, 6, 7, 8, 9, 10], "is_physical": False, "tables": [1], "extra_json": "{}", "external_url": None, "is_managed_externally": False, "expression": """ SELECT ds, num_boys, revenue, expenses, revenue - expenses AS profit FROM some_table""", }
def test_create_physical_sqlatable( app_context: None, session: Session, sample_columns: Dict["TableColumn", Dict[str, Any]], sample_metrics: Dict["SqlMetric", Dict[str, Any]], columns_default: Dict[str, Any], ) -> None: """ Test shadow write when creating a new ``SqlaTable``. When a new physical ``SqlaTable`` is created, new models should also be created for ``Dataset``, ``Table``, and ``Column``. """ from superset.columns.models import Column from superset.columns.schemas import ColumnSchema from superset.connectors.sqla.models import SqlaTable from superset.datasets.models import Dataset from superset.datasets.schemas import DatasetSchema from superset.models.core import Database from superset.tables.models import Table from superset.tables.schemas import TableSchema engine = session.get_bind() Dataset.metadata.create_all(engine) # pylint: disable=no-member user1 = get_test_user(1, "abc") columns = list(sample_columns.keys()) metrics = list(sample_metrics.keys()) expected_table_columns = list(sample_columns.values()) expected_metric_columns = list(sample_metrics.values()) sqla_table = SqlaTable( table_name="old_dataset", columns=columns, metrics=metrics, main_dttm_col="ds", default_endpoint="https://www.youtube.com/watch?v=dQw4w9WgXcQ", # not used database=Database(database_name="my_database", sqlalchemy_uri="sqlite://"), offset=-8, description="This is the description", is_featured=1, cache_timeout=3600, schema="my_schema", sql=None, params=json.dumps( { "remote_id": 64, "database_name": "examples", "import_time": 1606677834, } ), created_by=user1, changed_by=user1, owners=[user1], perm=None, filter_select_enabled=1, fetch_values_predicate="foo IN (1, 2)", is_sqllab_view=0, # no longer used? template_params=json.dumps({"answer": "42"}), schema_perm=None, extra=json.dumps({"warning_markdown": "*WARNING*"}), ) session.add(sqla_table) session.flush() # ignore these keys when comparing results ignored_keys = {"created_on", "changed_on"} # check that columns were created column_schema = ColumnSchema() actual_columns = [ {k: v for k, v in column_schema.dump(column).items() if k not in ignored_keys} for column in session.query(Column).all() ] num_physical_columns = len( [col for col in expected_table_columns if col.get("is_physical") == True] ) num_dataset_table_columns = len(columns) num_dataset_metric_columns = len(metrics) assert ( len(actual_columns) == num_physical_columns + num_dataset_table_columns + num_dataset_metric_columns ) # table columns are created before dataset columns are created offset = 0 for i in range(num_physical_columns): assert actual_columns[i + offset] == { **columns_default, **expected_table_columns[i], "id": i + offset + 1, # physical columns for table have its own uuid "uuid": actual_columns[i + offset]["uuid"], "is_physical": True, # table columns do not have creators "created_by": None, "tables": [1], } offset += num_physical_columns for i, column in enumerate(sqla_table.columns): assert actual_columns[i + offset] == { **columns_default, **expected_table_columns[i], "id": i + offset + 1, # columns for dataset reuses the same uuid of TableColumn "uuid": str(column.uuid), "datasets": [1], } offset += num_dataset_table_columns for i, metric in enumerate(sqla_table.metrics): assert actual_columns[i + offset] == { **columns_default, **expected_metric_columns[i], "id": i + offset + 1, "uuid": str(metric.uuid), "datasets": [1], } # check that table was created table_schema = TableSchema() tables = [ { k: v for k, v in table_schema.dump(table).items() if k not in (ignored_keys | {"uuid"}) } for table in session.query(Table).all() ] assert len(tables) == 1 assert tables[0] == { "id": 1, "database": 1, "created_by": 1, "changed_by": 1, "datasets": [1], "columns": [1, 2, 3], "extra_json": "{}", "catalog": None, "schema": "my_schema", "name": "old_dataset", "is_managed_externally": False, "external_url": None, } # check that dataset was created dataset_schema = DatasetSchema() datasets = [ {k: v for k, v in dataset_schema.dump(dataset).items() if k not in ignored_keys} for dataset in session.query(Dataset).all() ] assert len(datasets) == 1 assert datasets[0] == { "id": 1, "uuid": str(sqla_table.uuid), "created_by": 1, "changed_by": 1, "owners": [1], "name": "old_dataset", "columns": [4, 5, 6, 7, 8, 9], "is_physical": True, "database": 1, "tables": [1], "extra_json": "{}", "expression": "old_dataset", "is_managed_externally": False, "external_url": None, }