예제 #1
0
def test_create_virtual_sqlatable(
    app_context: None,
    mocker: MockFixture,
    session: Session,
    sample_columns: Dict["TableColumn", Dict[str, Any]],
    sample_metrics: Dict["SqlMetric", Dict[str, Any]],
    columns_default: Dict[str, Any],
) -> None:
    """
    Test shadow write when creating a new ``SqlaTable``.

    When a new virtual ``SqlaTable`` is created, new models should also be created for
    ``Dataset`` and ``Column``.
    """
    # patch session
    mocker.patch(
        "superset.security.SupersetSecurityManager.get_session", return_value=session
    )

    from superset.columns.models import Column
    from superset.columns.schemas import ColumnSchema
    from superset.connectors.sqla.models import SqlaTable
    from superset.datasets.models import Dataset
    from superset.datasets.schemas import DatasetSchema
    from superset.models.core import Database
    from superset.tables.models import Table

    engine = session.get_bind()
    Dataset.metadata.create_all(engine)  # pylint: disable=no-member
    user1 = get_test_user(1, "abc")
    physical_table_columns: List[Dict[str, Any]] = [
        dict(
            name="ds",
            is_temporal=True,
            type="TIMESTAMP",
            expression="ds",
            is_physical=True,
        ),
        dict(name="num_boys", type="INTEGER", expression="num_boys", is_physical=True),
        dict(name="revenue", type="INTEGER", expression="revenue", is_physical=True),
        dict(name="expenses", type="INTEGER", expression="expenses", is_physical=True),
    ]
    # create a physical ``Table`` that the virtual dataset points to
    database = Database(database_name="my_database", sqlalchemy_uri="sqlite://")
    table = Table(
        name="some_table",
        schema="my_schema",
        catalog=None,
        database=database,
        columns=[
            Column(**props, created_by=user1, changed_by=user1)
            for props in physical_table_columns
        ],
    )
    session.add(table)
    session.commit()

    assert session.query(Table).count() == 1
    assert session.query(Dataset).count() == 0

    # create virtual dataset
    columns = list(sample_columns.keys())
    metrics = list(sample_metrics.keys())
    expected_table_columns = list(sample_columns.values())
    expected_metric_columns = list(sample_metrics.values())

    sqla_table = SqlaTable(
        created_by=user1,
        changed_by=user1,
        owners=[user1],
        table_name="old_dataset",
        columns=columns,
        metrics=metrics,
        main_dttm_col="ds",
        default_endpoint="https://www.youtube.com/watch?v=dQw4w9WgXcQ",  # not used
        database=database,
        offset=-8,
        description="This is the description",
        is_featured=1,
        cache_timeout=3600,
        schema="my_schema",
        sql="""
SELECT
  ds,
  num_boys,
  revenue,
  expenses,
  revenue - expenses AS profit
FROM
  some_table""",
        params=json.dumps(
            {
                "remote_id": 64,
                "database_name": "examples",
                "import_time": 1606677834,
            }
        ),
        perm=None,
        filter_select_enabled=1,
        fetch_values_predicate="foo IN (1, 2)",
        is_sqllab_view=0,  # no longer used?
        template_params=json.dumps({"answer": "42"}),
        schema_perm=None,
        extra=json.dumps({"warning_markdown": "*WARNING*"}),
    )
    session.add(sqla_table)
    session.flush()

    # should not add a new table
    assert session.query(Table).count() == 1
    assert session.query(Dataset).count() == 1

    # ignore these keys when comparing results
    ignored_keys = {"created_on", "changed_on"}
    column_schema = ColumnSchema()
    actual_columns = [
        {k: v for k, v in column_schema.dump(column).items() if k not in ignored_keys}
        for column in session.query(Column).all()
    ]
    num_physical_columns = len(physical_table_columns)
    num_dataset_table_columns = len(columns)
    num_dataset_metric_columns = len(metrics)
    assert (
        len(actual_columns)
        == num_physical_columns + num_dataset_table_columns + num_dataset_metric_columns
    )

    for i, column in enumerate(table.columns):
        assert actual_columns[i] == {
            **columns_default,
            **physical_table_columns[i],
            "id": i + 1,
            "uuid": str(column.uuid),
            "tables": [1],
        }

    offset = num_physical_columns
    for i, column in enumerate(sqla_table.columns):
        assert actual_columns[i + offset] == {
            **columns_default,
            **expected_table_columns[i],
            "id": i + offset + 1,
            "uuid": str(column.uuid),
            "is_physical": False,
            "datasets": [1],
        }

    offset = num_physical_columns + num_dataset_table_columns
    for i, metric in enumerate(sqla_table.metrics):
        assert actual_columns[i + offset] == {
            **columns_default,
            **expected_metric_columns[i],
            "id": i + offset + 1,
            "uuid": str(metric.uuid),
            "datasets": [1],
        }

    # check that dataset was created, and has a reference to the table
    dataset_schema = DatasetSchema()
    datasets = [
        {k: v for k, v in dataset_schema.dump(dataset).items() if k not in ignored_keys}
        for dataset in session.query(Dataset).all()
    ]
    assert len(datasets) == 1
    assert datasets[0] == {
        "id": 1,
        "database": 1,
        "uuid": str(sqla_table.uuid),
        "name": "old_dataset",
        "changed_by": 1,
        "created_by": 1,
        "owners": [1],
        "columns": [5, 6, 7, 8, 9, 10],
        "is_physical": False,
        "tables": [1],
        "extra_json": "{}",
        "external_url": None,
        "is_managed_externally": False,
        "expression": """
SELECT
  ds,
  num_boys,
  revenue,
  expenses,
  revenue - expenses AS profit
FROM
  some_table""",
    }
예제 #2
0
def test_create_physical_sqlatable(
    app_context: None,
    session: Session,
    sample_columns: Dict["TableColumn", Dict[str, Any]],
    sample_metrics: Dict["SqlMetric", Dict[str, Any]],
    columns_default: Dict[str, Any],
) -> None:
    """
    Test shadow write when creating a new ``SqlaTable``.

    When a new physical ``SqlaTable`` is created, new models should also be created for
    ``Dataset``, ``Table``, and ``Column``.
    """
    from superset.columns.models import Column
    from superset.columns.schemas import ColumnSchema
    from superset.connectors.sqla.models import SqlaTable
    from superset.datasets.models import Dataset
    from superset.datasets.schemas import DatasetSchema
    from superset.models.core import Database
    from superset.tables.models import Table
    from superset.tables.schemas import TableSchema

    engine = session.get_bind()
    Dataset.metadata.create_all(engine)  # pylint: disable=no-member
    user1 = get_test_user(1, "abc")
    columns = list(sample_columns.keys())
    metrics = list(sample_metrics.keys())
    expected_table_columns = list(sample_columns.values())
    expected_metric_columns = list(sample_metrics.values())

    sqla_table = SqlaTable(
        table_name="old_dataset",
        columns=columns,
        metrics=metrics,
        main_dttm_col="ds",
        default_endpoint="https://www.youtube.com/watch?v=dQw4w9WgXcQ",  # not used
        database=Database(database_name="my_database", sqlalchemy_uri="sqlite://"),
        offset=-8,
        description="This is the description",
        is_featured=1,
        cache_timeout=3600,
        schema="my_schema",
        sql=None,
        params=json.dumps(
            {
                "remote_id": 64,
                "database_name": "examples",
                "import_time": 1606677834,
            }
        ),
        created_by=user1,
        changed_by=user1,
        owners=[user1],
        perm=None,
        filter_select_enabled=1,
        fetch_values_predicate="foo IN (1, 2)",
        is_sqllab_view=0,  # no longer used?
        template_params=json.dumps({"answer": "42"}),
        schema_perm=None,
        extra=json.dumps({"warning_markdown": "*WARNING*"}),
    )
    session.add(sqla_table)
    session.flush()

    # ignore these keys when comparing results
    ignored_keys = {"created_on", "changed_on"}

    # check that columns were created
    column_schema = ColumnSchema()
    actual_columns = [
        {k: v for k, v in column_schema.dump(column).items() if k not in ignored_keys}
        for column in session.query(Column).all()
    ]
    num_physical_columns = len(
        [col for col in expected_table_columns if col.get("is_physical") == True]
    )
    num_dataset_table_columns = len(columns)
    num_dataset_metric_columns = len(metrics)
    assert (
        len(actual_columns)
        == num_physical_columns + num_dataset_table_columns + num_dataset_metric_columns
    )

    # table columns are created before dataset columns are created
    offset = 0
    for i in range(num_physical_columns):
        assert actual_columns[i + offset] == {
            **columns_default,
            **expected_table_columns[i],
            "id": i + offset + 1,
            # physical columns for table have its own uuid
            "uuid": actual_columns[i + offset]["uuid"],
            "is_physical": True,
            # table columns do not have creators
            "created_by": None,
            "tables": [1],
        }

    offset += num_physical_columns
    for i, column in enumerate(sqla_table.columns):
        assert actual_columns[i + offset] == {
            **columns_default,
            **expected_table_columns[i],
            "id": i + offset + 1,
            # columns for dataset reuses the same uuid of TableColumn
            "uuid": str(column.uuid),
            "datasets": [1],
        }

    offset += num_dataset_table_columns
    for i, metric in enumerate(sqla_table.metrics):
        assert actual_columns[i + offset] == {
            **columns_default,
            **expected_metric_columns[i],
            "id": i + offset + 1,
            "uuid": str(metric.uuid),
            "datasets": [1],
        }

    # check that table was created
    table_schema = TableSchema()
    tables = [
        {
            k: v
            for k, v in table_schema.dump(table).items()
            if k not in (ignored_keys | {"uuid"})
        }
        for table in session.query(Table).all()
    ]
    assert len(tables) == 1
    assert tables[0] == {
        "id": 1,
        "database": 1,
        "created_by": 1,
        "changed_by": 1,
        "datasets": [1],
        "columns": [1, 2, 3],
        "extra_json": "{}",
        "catalog": None,
        "schema": "my_schema",
        "name": "old_dataset",
        "is_managed_externally": False,
        "external_url": None,
    }

    # check that dataset was created
    dataset_schema = DatasetSchema()
    datasets = [
        {k: v for k, v in dataset_schema.dump(dataset).items() if k not in ignored_keys}
        for dataset in session.query(Dataset).all()
    ]
    assert len(datasets) == 1
    assert datasets[0] == {
        "id": 1,
        "uuid": str(sqla_table.uuid),
        "created_by": 1,
        "changed_by": 1,
        "owners": [1],
        "name": "old_dataset",
        "columns": [4, 5, 6, 7, 8, 9],
        "is_physical": True,
        "database": 1,
        "tables": [1],
        "extra_json": "{}",
        "expression": "old_dataset",
        "is_managed_externally": False,
        "external_url": None,
    }