def test_cascade_delete_table(app_context: None, session: Session) -> None:
    """
    Test that deleting ``Table`` also deletes its columns.
    """
    from superset.columns.models import Column
    from superset.models.core import Database
    from superset.tables.models import Table

    engine = session.get_bind()
    Table.metadata.create_all(engine)  # pylint: disable=no-member

    table = Table(
        name="my_table",
        schema="my_schema",
        catalog="my_catalog",
        database=Database(database_name="my_database",
                          sqlalchemy_uri="sqlite://"),
        columns=[
            Column(name="longitude", expression="longitude"),
            Column(name="latitude", expression="latitude"),
        ],
    )
    session.add(table)
    session.flush()

    columns = session.query(Column).all()
    assert len(columns) == 2

    session.delete(table)
    session.flush()

    # test that columns were deleted
    columns = session.query(Column).all()
    assert len(columns) == 0
Example #2
0
def test_dataset_model(app_context: None, session: Session) -> None:
    """
    Test basic attributes of a ``Dataset``.
    """
    from superset.columns.models import Column
    from superset.datasets.models import Dataset
    from superset.models.core import Database
    from superset.tables.models import Table

    engine = session.get_bind()
    Dataset.metadata.create_all(engine)  # pylint: disable=no-member

    table = Table(
        name="my_table",
        schema="my_schema",
        catalog="my_catalog",
        database=Database(database_name="my_database", sqlalchemy_uri="sqlite://"),
        columns=[
            Column(name="longitude", expression="longitude"),
            Column(name="latitude", expression="latitude"),
        ],
    )
    session.add(table)
    session.flush()

    dataset = Dataset(
        database=table.database,
        name="positions",
        expression="""
SELECT array_agg(array[longitude,latitude]) AS position
FROM my_catalog.my_schema.my_table
""",
        tables=[table],
        columns=[
            Column(
                name="position",
                expression="array_agg(array[longitude,latitude])",
            ),
        ],
    )
    session.add(dataset)
    session.flush()

    assert dataset.id == 1
    assert dataset.uuid is not None

    assert dataset.name == "positions"
    assert (
        dataset.expression
        == """
SELECT array_agg(array[longitude,latitude]) AS position
FROM my_catalog.my_schema.my_table
"""
    )

    assert [table.name for table in dataset.tables] == ["my_table"]
    assert [column.name for column in dataset.columns] == ["position"]
Example #3
0
def test_cascade_delete_dataset(app_context: None, session: Session) -> None:
    """
    Test that deleting ``Dataset`` also deletes its columns.
    """
    from superset.columns.models import Column
    from superset.datasets.models import Dataset
    from superset.models.core import Database
    from superset.tables.models import Table

    engine = session.get_bind()
    Dataset.metadata.create_all(engine)  # pylint: disable=no-member

    table = Table(
        name="my_table",
        schema="my_schema",
        catalog="my_catalog",
        database=Database(database_name="my_database",
                          sqlalchemy_uri="sqlite://"),
        columns=[
            Column(name="longitude", expression="longitude"),
            Column(name="latitude", expression="latitude"),
        ],
    )
    session.add(table)
    session.flush()

    dataset = Dataset(
        name="positions",
        expression="""
SELECT array_agg(array[longitude,latitude]) AS position
FROM my_catalog.my_schema.my_table
""",
        database=table.database,
        tables=[table],
        columns=[
            Column(
                name="position",
                expression="array_agg(array[longitude,latitude])",
            ),
        ],
    )
    session.add(dataset)
    session.flush()

    columns = session.query(Column).all()
    assert len(columns) == 3

    session.delete(dataset)
    session.flush()

    # test that dataset columns were deleted (but not table columns)
    columns = session.query(Column).all()
    assert len(columns) == 2
Example #4
0
def test_column_model(app_context: None, session: Session) -> None:
    """
    Test basic attributes of a ``Column``.
    """
    from superset.columns.models import Column

    engine = session.get_bind()
    Column.metadata.create_all(engine)  # pylint: disable=no-member

    column = Column(
        name="ds",
        type="TIMESTAMP",
        expression="ds",
    )

    session.add(column)
    session.flush()

    assert column.id == 1
    assert column.uuid is not None

    assert column.name == "ds"
    assert column.type == "TIMESTAMP"
    assert column.expression == "ds"

    # test that default values are set correctly
    assert column.description is None
    assert column.warning_text is None
    assert column.unit is None
    assert column.is_temporal is False
    assert column.is_spatial is False
    assert column.is_partition is False
    assert column.is_aggregation is False
    assert column.is_additive is False
    assert column.is_increase_desired is True
def test_table_model(session: Session) -> None:
    """
    Test basic attributes of a ``Table``.
    """
    from superset.columns.models import Column
    from superset.models.core import Database
    from superset.tables.models import Table

    engine = session.get_bind()
    Table.metadata.create_all(engine)  # pylint: disable=no-member

    table = Table(
        name="my_table",
        schema="my_schema",
        catalog="my_catalog",
        database=Database(database_name="my_database", sqlalchemy_uri="test://"),
        columns=[
            Column(
                name="ds",
                type="TIMESTAMP",
                expression="ds",
            )
        ],
    )
    session.add(table)
    session.flush()

    assert table.id == 1
    assert table.uuid is not None
    assert table.database_id == 1
    assert table.catalog == "my_catalog"
    assert table.schema == "my_schema"
    assert table.name == "my_table"
    assert [column.name for column in table.columns] == ["ds"]
Example #6
0
 def update_or_create_column(column_meta: Dict[str, Any]) -> Column:
     column_name: str = column_meta["name"]
     if column_name in existing_columns:
         column = existing_columns[column_name]
     else:
         column = Column(name=column_name)
     column.type = column_meta["type"]
     column.is_temporal = column_meta["is_dttm"]
     column.expression = quote_identifier(column_name)
     column.is_aggregation = False
     column.is_physical = True
     column.is_spatial = False
     column.is_partition = False  # TODO: update with accurate is_partition
     return column
Example #7
0
def session_with_data(session: Session) -> Iterator[Session]:
    from superset.columns.models import Column
    from superset.connectors.sqla.models import SqlaTable, TableColumn
    from superset.datasets.models import Dataset
    from superset.models.core import Database
    from superset.models.sql_lab import Query, SavedQuery
    from superset.tables.models import Table

    engine = session.get_bind()
    SqlaTable.metadata.create_all(engine)  # pylint: disable=no-member

    db = Database(database_name="my_database", sqlalchemy_uri="sqlite://")

    columns = [
        TableColumn(column_name="a", type="INTEGER"),
    ]

    sqla_table = SqlaTable(
        table_name="my_sqla_table",
        columns=columns,
        metrics=[],
        database=db,
    )

    query_obj = Query(
        client_id="foo",
        database=db,
        tab_name="test_tab",
        sql_editor_id="test_editor_id",
        sql="select * from bar",
        select_sql="select * from bar",
        executed_sql="select * from bar",
        limit=100,
        select_as_cta=False,
        rows=100,
        error_message="none",
        results_key="abc",
    )

    saved_query = SavedQuery(database=db, sql="select * from foo")

    table = Table(
        name="my_table",
        schema="my_schema",
        catalog="my_catalog",
        database=db,
        columns=[],
    )

    dataset = Dataset(
        database=table.database,
        name="positions",
        expression="""
SELECT array_agg(array[longitude,latitude]) AS position
FROM my_catalog.my_schema.my_table
""",
        tables=[table],
        columns=[
            Column(
                name="position",
                expression="array_agg(array[longitude,latitude])",
            ),
        ],
    )

    session.add(dataset)
    session.add(table)
    session.add(saved_query)
    session.add(query_obj)
    session.add(db)
    session.add(sqla_table)
    session.flush()
    yield session
def test_create_virtual_sqlatable(mocker: MockFixture, app_context: None,
                                  session: Session) -> None:
    """
    Test shadow write when creating a new ``SqlaTable``.

    When a new virtual ``SqlaTable`` is created, new models should also be created for
    ``Dataset`` and ``Column``.
    """
    # patch session
    mocker.patch("superset.security.SupersetSecurityManager.get_session",
                 return_value=session)

    from superset.columns.models import Column
    from superset.columns.schemas import ColumnSchema
    from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
    from superset.datasets.models import Dataset
    from superset.datasets.schemas import DatasetSchema
    from superset.models.core import Database
    from superset.tables.models import Table

    engine = session.get_bind()
    Dataset.metadata.create_all(engine)  # pylint: disable=no-member

    # create the ``Table`` that the virtual dataset points to
    database = Database(database_name="my_database",
                        sqlalchemy_uri="sqlite://")
    table = Table(
        name="some_table",
        schema="my_schema",
        catalog=None,
        database=database,
        columns=[
            Column(name="ds", is_temporal=True, type="TIMESTAMP"),
            Column(name="user_id", type="INTEGER"),
            Column(name="revenue", type="INTEGER"),
            Column(name="expenses", type="INTEGER"),
        ],
    )
    session.add(table)
    session.commit()

    # create virtual dataset
    columns = [
        TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"),
        TableColumn(column_name="user_id", type="INTEGER"),
        TableColumn(column_name="revenue", type="INTEGER"),
        TableColumn(column_name="expenses", type="INTEGER"),
        TableColumn(column_name="profit",
                    type="INTEGER",
                    expression="revenue-expenses"),
    ]
    metrics = [
        SqlMetric(metric_name="cnt", expression="COUNT(*)"),
    ]

    sqla_table = SqlaTable(
        table_name="old_dataset",
        columns=columns,
        metrics=metrics,
        main_dttm_col="ds",
        default_endpoint=
        "https://www.youtube.com/watch?v=dQw4w9WgXcQ",  # not used
        database=database,
        offset=-8,
        description="This is the description",
        is_featured=1,
        cache_timeout=3600,
        schema="my_schema",
        sql="""
SELECT
  ds,
  user_id,
  revenue,
  expenses,
  revenue - expenses AS profit
FROM
  some_table""",
        params=json.dumps({
            "remote_id": 64,
            "database_name": "examples",
            "import_time": 1606677834,
        }),
        perm=None,
        filter_select_enabled=1,
        fetch_values_predicate="foo IN (1, 2)",
        is_sqllab_view=0,  # no longer used?
        template_params=json.dumps({"answer": "42"}),
        schema_perm=None,
        extra=json.dumps({"warning_markdown": "*WARNING*"}),
    )
    session.add(sqla_table)
    session.flush()

    # ignore these keys when comparing results
    ignored_keys = {"created_on", "changed_on", "uuid"}

    # check that columns were created
    column_schema = ColumnSchema()
    column_schemas = [{
        k: v
        for k, v in column_schema.dump(column).items() if k not in ignored_keys
    } for column in session.query(Column).all()]
    assert column_schemas == [
        {
            "type": "TIMESTAMP",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": None,
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "ds",
            "is_physical": True,
            "changed_by": None,
            "is_temporal": True,
            "id": 1,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": None,
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "user_id",
            "is_physical": True,
            "changed_by": None,
            "is_temporal": False,
            "id": 2,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": None,
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "revenue",
            "is_physical": True,
            "changed_by": None,
            "is_temporal": False,
            "id": 3,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": None,
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "expenses",
            "is_physical": True,
            "changed_by": None,
            "is_temporal": False,
            "id": 4,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "TIMESTAMP",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "ds",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "ds",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": True,
            "id": 5,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "user_id",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "user_id",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 6,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "revenue",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "revenue",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 7,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "expenses",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "expenses",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 8,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "revenue-expenses",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "profit",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 9,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "Unknown",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "COUNT(*)",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "cnt",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 10,
            "is_aggregation": True,
            "external_url": None,
            "is_managed_externally": False,
        },
    ]

    # check that dataset was created, and has a reference to the table
    dataset_schema = DatasetSchema()
    datasets = [{
        k: v
        for k, v in dataset_schema.dump(dataset).items()
        if k not in ignored_keys
    } for dataset in session.query(Dataset).all()]
    assert datasets == [{
        "id":
        1,
        "sqlatable_id":
        1,
        "name":
        "old_dataset",
        "changed_by":
        None,
        "created_by":
        None,
        "columns": [5, 6, 7, 8, 9, 10],
        "is_physical":
        False,
        "tables": [1],
        "extra_json":
        "{}",
        "external_url":
        None,
        "is_managed_externally":
        False,
        "expression":
        """
SELECT
  ds,
  user_id,
  revenue,
  expenses,
  revenue - expenses AS profit
FROM
  some_table""",
    }]
def test_update_virtual_sqlatable_references(mocker: MockFixture,
                                             app_context: None,
                                             session: Session) -> None:
    """
    Test that changing the SQL of a virtual ``SqlaTable`` updates ``Dataset``.

    When the SQL is modified the list of referenced tables should be updated in the new
    ``Dataset`` model.
    """
    # patch session
    mocker.patch("superset.security.SupersetSecurityManager.get_session",
                 return_value=session)

    from superset.columns.models import Column
    from superset.connectors.sqla.models import SqlaTable, TableColumn
    from superset.datasets.models import Dataset
    from superset.models.core import Database
    from superset.tables.models import Table

    engine = session.get_bind()
    Dataset.metadata.create_all(engine)  # pylint: disable=no-member

    database = Database(database_name="my_database",
                        sqlalchemy_uri="sqlite://")
    table1 = Table(
        name="table_a",
        schema="my_schema",
        catalog=None,
        database=database,
        columns=[Column(name="a", type="INTEGER")],
    )
    table2 = Table(
        name="table_b",
        schema="my_schema",
        catalog=None,
        database=database,
        columns=[Column(name="b", type="INTEGER")],
    )
    session.add(table1)
    session.add(table2)
    session.commit()

    # create virtual dataset
    columns = [TableColumn(column_name="a", type="INTEGER")]

    sqla_table = SqlaTable(
        table_name="old_dataset",
        columns=columns,
        database=database,
        schema="my_schema",
        sql="SELECT a FROM table_a",
    )
    session.add(sqla_table)
    session.flush()

    # check that new dataset has table1
    dataset = session.query(Dataset).one()
    assert dataset.tables == [table1]

    # change SQL
    sqla_table.sql = "SELECT a, b FROM table_a JOIN table_b"
    session.flush()

    # check that new dataset has both tables
    dataset = session.query(Dataset).one()
    assert dataset.tables == [table1, table2]
    assert dataset.expression == "SELECT a, b FROM table_a JOIN table_b"
Example #10
0
def test_create_virtual_sqlatable(
    app_context: None,
    mocker: MockFixture,
    session: Session,
    sample_columns: Dict["TableColumn", Dict[str, Any]],
    sample_metrics: Dict["SqlMetric", Dict[str, Any]],
    columns_default: Dict[str, Any],
) -> None:
    """
    Test shadow write when creating a new ``SqlaTable``.

    When a new virtual ``SqlaTable`` is created, new models should also be created for
    ``Dataset`` and ``Column``.
    """
    # patch session
    mocker.patch(
        "superset.security.SupersetSecurityManager.get_session", return_value=session
    )

    from superset.columns.models import Column
    from superset.columns.schemas import ColumnSchema
    from superset.connectors.sqla.models import SqlaTable
    from superset.datasets.models import Dataset
    from superset.datasets.schemas import DatasetSchema
    from superset.models.core import Database
    from superset.tables.models import Table

    engine = session.get_bind()
    Dataset.metadata.create_all(engine)  # pylint: disable=no-member
    user1 = get_test_user(1, "abc")
    physical_table_columns: List[Dict[str, Any]] = [
        dict(
            name="ds",
            is_temporal=True,
            type="TIMESTAMP",
            expression="ds",
            is_physical=True,
        ),
        dict(name="num_boys", type="INTEGER", expression="num_boys", is_physical=True),
        dict(name="revenue", type="INTEGER", expression="revenue", is_physical=True),
        dict(name="expenses", type="INTEGER", expression="expenses", is_physical=True),
    ]
    # create a physical ``Table`` that the virtual dataset points to
    database = Database(database_name="my_database", sqlalchemy_uri="sqlite://")
    table = Table(
        name="some_table",
        schema="my_schema",
        catalog=None,
        database=database,
        columns=[
            Column(**props, created_by=user1, changed_by=user1)
            for props in physical_table_columns
        ],
    )
    session.add(table)
    session.commit()

    assert session.query(Table).count() == 1
    assert session.query(Dataset).count() == 0

    # create virtual dataset
    columns = list(sample_columns.keys())
    metrics = list(sample_metrics.keys())
    expected_table_columns = list(sample_columns.values())
    expected_metric_columns = list(sample_metrics.values())

    sqla_table = SqlaTable(
        created_by=user1,
        changed_by=user1,
        owners=[user1],
        table_name="old_dataset",
        columns=columns,
        metrics=metrics,
        main_dttm_col="ds",
        default_endpoint="https://www.youtube.com/watch?v=dQw4w9WgXcQ",  # not used
        database=database,
        offset=-8,
        description="This is the description",
        is_featured=1,
        cache_timeout=3600,
        schema="my_schema",
        sql="""
SELECT
  ds,
  num_boys,
  revenue,
  expenses,
  revenue - expenses AS profit
FROM
  some_table""",
        params=json.dumps(
            {
                "remote_id": 64,
                "database_name": "examples",
                "import_time": 1606677834,
            }
        ),
        perm=None,
        filter_select_enabled=1,
        fetch_values_predicate="foo IN (1, 2)",
        is_sqllab_view=0,  # no longer used?
        template_params=json.dumps({"answer": "42"}),
        schema_perm=None,
        extra=json.dumps({"warning_markdown": "*WARNING*"}),
    )
    session.add(sqla_table)
    session.flush()

    # should not add a new table
    assert session.query(Table).count() == 1
    assert session.query(Dataset).count() == 1

    # ignore these keys when comparing results
    ignored_keys = {"created_on", "changed_on"}
    column_schema = ColumnSchema()
    actual_columns = [
        {k: v for k, v in column_schema.dump(column).items() if k not in ignored_keys}
        for column in session.query(Column).all()
    ]
    num_physical_columns = len(physical_table_columns)
    num_dataset_table_columns = len(columns)
    num_dataset_metric_columns = len(metrics)
    assert (
        len(actual_columns)
        == num_physical_columns + num_dataset_table_columns + num_dataset_metric_columns
    )

    for i, column in enumerate(table.columns):
        assert actual_columns[i] == {
            **columns_default,
            **physical_table_columns[i],
            "id": i + 1,
            "uuid": str(column.uuid),
            "tables": [1],
        }

    offset = num_physical_columns
    for i, column in enumerate(sqla_table.columns):
        assert actual_columns[i + offset] == {
            **columns_default,
            **expected_table_columns[i],
            "id": i + offset + 1,
            "uuid": str(column.uuid),
            "is_physical": False,
            "datasets": [1],
        }

    offset = num_physical_columns + num_dataset_table_columns
    for i, metric in enumerate(sqla_table.metrics):
        assert actual_columns[i + offset] == {
            **columns_default,
            **expected_metric_columns[i],
            "id": i + offset + 1,
            "uuid": str(metric.uuid),
            "datasets": [1],
        }

    # check that dataset was created, and has a reference to the table
    dataset_schema = DatasetSchema()
    datasets = [
        {k: v for k, v in dataset_schema.dump(dataset).items() if k not in ignored_keys}
        for dataset in session.query(Dataset).all()
    ]
    assert len(datasets) == 1
    assert datasets[0] == {
        "id": 1,
        "database": 1,
        "uuid": str(sqla_table.uuid),
        "name": "old_dataset",
        "changed_by": 1,
        "created_by": 1,
        "owners": [1],
        "columns": [5, 6, 7, 8, 9, 10],
        "is_physical": False,
        "tables": [1],
        "extra_json": "{}",
        "external_url": None,
        "is_managed_externally": False,
        "expression": """
SELECT
  ds,
  num_boys,
  revenue,
  expenses,
  revenue - expenses AS profit
FROM
  some_table""",
    }