コード例 #1
0
ファイル: conftest.py プロジェクト: dodopizza/superset
def sample_metrics() -> Dict["SqlMetric", Dict[str, Any]]:
    from superset.connectors.sqla.models import SqlMetric

    return {
        SqlMetric(metric_name="cnt",
                  expression="COUNT(*)",
                  metric_type="COUNT"): {
            "name": "cnt",
            "expression": "COUNT(*)",
            "extra_json": '{"metric_type": "COUNT"}',
            "type": "UNKNOWN",
            "is_additive": True,
            "is_aggregation": True,
            "is_filterable": False,
            "is_physical": False,
        },
        SqlMetric(metric_name="avg revenue",
                  expression="AVG(revenue)",
                  metric_type="AVG"): {
            "name": "avg revenue",
            "expression": "AVG(revenue)",
            "extra_json": '{"metric_type": "AVG"}',
            "type": "UNKNOWN",
            "is_additive": False,
            "is_aggregation": True,
            "is_filterable": False,
            "is_physical": False,
        },
    }
コード例 #2
0
    def create_table(self,
                     name,
                     schema='',
                     id=0,
                     cols_names=[],
                     metric_names=[]):
        database_name = 'main'
        name = '{0}{1}'.format(NAME_PREFIX, name)
        params = {DBREF: id, 'database_name': database_name}

        dict_rep = {
            'database_id': get_main_database(db.session).id,
            'table_name': name,
            'schema': schema,
            'id': id,
            'params': json.dumps(params),
            'columns': [{
                'column_name': c
            } for c in cols_names],
            'metrics': [{
                'metric_name': c
            } for c in metric_names],
        }

        table = SqlaTable(
            id=id,
            schema=schema,
            table_name=name,
            params=json.dumps(params),
        )
        for col_name in cols_names:
            table.columns.append(TableColumn(column_name=col_name))
        for metric_name in metric_names:
            table.metrics.append(SqlMetric(metric_name=metric_name))
        return table, dict_rep
コード例 #3
0
    def create_table(
        self, name, schema=None, id=0, cols_names=[], cols_uuids=None, metric_names=[]
    ):
        database_name = "main"
        name = "{0}{1}".format(NAME_PREFIX, name)
        params = {DBREF: id, "database_name": database_name}

        if cols_uuids is None:
            cols_uuids = [None] * len(cols_names)

        dict_rep = {
            "database_id": get_example_database().id,
            "table_name": name,
            "schema": schema,
            "id": id,
            "params": json.dumps(params),
            "columns": [
                {"column_name": c, "uuid": u} for c, u in zip(cols_names, cols_uuids)
            ],
            "metrics": [{"metric_name": c, "expression": ""} for c in metric_names],
        }

        table = SqlaTable(
            id=id, schema=schema, table_name=name, params=json.dumps(params)
        )
        for col_name, uuid in zip(cols_names, cols_uuids):
            table.columns.append(TableColumn(column_name=col_name, uuid=uuid))
        for metric_name in metric_names:
            table.metrics.append(SqlMetric(metric_name=metric_name, expression=""))
        return table, dict_rep
コード例 #4
0
    def test_query_cache_key_changes_when_metric_is_updated(self):
        self.login(username="******")
        payload = get_query_context("birth_names")

        # make temporary change and revert it to refresh the changed_on property
        datasource = DatasourceDAO.get_datasource(
            session=db.session,
            datasource_type=DatasourceType(payload["datasource"]["type"]),
            datasource_id=payload["datasource"]["id"],
        )

        datasource.metrics.append(SqlMetric(metric_name="foo", expression="select 1;"))
        db.session.commit()

        # construct baseline query_cache_key
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        cache_key_original = query_context.query_cache_key(query_object)

        # wait a second since mysql records timestamps in second granularity
        time.sleep(1)

        datasource.metrics[0].expression = "select 2;"
        db.session.commit()

        # create new QueryContext with unchanged attributes, extract new query_cache_key
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        cache_key_new = query_context.query_cache_key(query_object)

        datasource.metrics = []
        db.session.commit()

        # the new cache_key should be different due to updated datasource
        self.assertNotEqual(cache_key_original, cache_key_new)
コード例 #5
0
def _add_table_metrics(datasource: SqlaTable) -> None:
    # By accessing the attribute first, we make sure `datasource.columns` and
    # `datasource.metrics` are already loaded. Otherwise accessing them later
    # may trigger an unnecessary and unexpected `after_update` event.
    columns, metrics = datasource.columns, datasource.metrics

    if not any(col.column_name == "num_california" for col in columns):
        col_state = str(column("state").compile(db.engine))
        col_num = str(column("num").compile(db.engine))
        columns.append(
            TableColumn(
                column_name="num_california",
                expression=
                f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END",
            ))

    if not any(col.metric_name == "sum__num" for col in metrics):
        col = str(column("num").compile(db.engine))
        metrics.append(
            SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

    for col in columns:
        if col.column_name == "ds":
            col.is_dttm = True
            break
コード例 #6
0
ファイル: utils.py プロジェクト: zuxqoj/incubator-superset
def decode_dashboards(o):
    """
    Function to be passed into json.loads obj_hook parameter
    Recreates the dashboard object from a json representation.
    """
    import superset.models.core as models
    from superset.connectors.sqla.models import (
        SqlaTable, SqlMetric, TableColumn,
    )

    if '__Dashboard__' in o:
        d = models.Dashboard()
        d.__dict__.update(o['__Dashboard__'])
        return d
    elif '__Slice__' in o:
        d = models.Slice()
        d.__dict__.update(o['__Slice__'])
        return d
    elif '__TableColumn__' in o:
        d = TableColumn()
        d.__dict__.update(o['__TableColumn__'])
        return d
    elif '__SqlaTable__' in o:
        d = SqlaTable()
        d.__dict__.update(o['__SqlaTable__'])
        return d
    elif '__SqlMetric__' in o:
        d = SqlMetric()
        d.__dict__.update(o['__SqlMetric__'])
        return d
    elif '__datetime__' in o:
        return datetime.strptime(o['__datetime__'], '%Y-%m-%dT%H:%M:%S')
    else:
        return o
コード例 #7
0
def decode_dashboards(  # pylint: disable=too-many-return-statements
        o: Dict[str, Any]) -> Any:
    """
    Function to be passed into json.loads obj_hook parameter
    Recreates the dashboard object from a json representation.
    """
    from superset.connectors.druid.models import (
        DruidCluster,
        DruidColumn,
        DruidDatasource,
        DruidMetric,
    )

    if "__Dashboard__" in o:
        return Dashboard(**o["__Dashboard__"])
    if "__Slice__" in o:
        return Slice(**o["__Slice__"])
    if "__TableColumn__" in o:
        return TableColumn(**o["__TableColumn__"])
    if "__SqlaTable__" in o:
        return SqlaTable(**o["__SqlaTable__"])
    if "__SqlMetric__" in o:
        return SqlMetric(**o["__SqlMetric__"])
    if "__DruidCluster__" in o:
        return DruidCluster(**o["__DruidCluster__"])
    if "__DruidColumn__" in o:
        return DruidColumn(**o["__DruidColumn__"])
    if "__DruidDatasource__" in o:
        return DruidDatasource(**o["__DruidDatasource__"])
    if "__DruidMetric__" in o:
        return DruidMetric(**o["__DruidMetric__"])
    if "__datetime__" in o:
        return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S")

    return o
コード例 #8
0
def _create_energy_table(df: DataFrame, table_name: str):
    database = get_example_database()

    table_description = "Energy consumption"
    schema = {"source": String(255), "target": String(255), "value": Float()}
    table = create_table_for_dashboard(df, table_name, database, schema,
                                       table_description)
    table.fetch_metadata()

    if not any(col.metric_name == "sum__value" for col in table.metrics):
        col = str(column("value").compile(db.engine))
        table.metrics.append(
            SqlMetric(metric_name="sum__value", expression=f"SUM({col})"))

    db.session.merge(table)
    db.session.commit()
    table.fetch_metadata()

    for slice_data in _get_energy_slices():
        _create_and_commit_energy_slice(
            table,
            slice_data["slice_title"],
            slice_data["viz_type"],
            slice_data["params"],
        )
コード例 #9
0
 def create_table(self, name, schema=None, id=0, cols_names=[], metric_names=[]):
     params = {"remote_id": id, "database_name": "examples"}
     table = SqlaTable(
         id=id, schema=schema, table_name=name, params=json.dumps(params)
     )
     for col_name in cols_names:
         table.columns.append(TableColumn(column_name=col_name))
     for metric_name in metric_names:
         table.metrics.append(SqlMetric(metric_name=metric_name, expression=""))
     return table
コード例 #10
0
def physical_dataset():
    from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn

    example_database = get_example_database()
    engine = example_database.get_sqla_engine()
    # sqlite can only execute one statement at a time
    engine.execute("""
        CREATE TABLE IF NOT EXISTS physical_dataset(
          col1 INTEGER,
          col2 VARCHAR(255),
          col3 DECIMAL(4,2),
          col4 VARCHAR(255),
          col5 TIMESTAMP
        );
        """)
    engine.execute("""
        INSERT INTO physical_dataset values
        (0, 'a', 1.0, NULL, '2000-01-01 00:00:00'),
        (1, 'b', 1.1, NULL, '2000-01-02 00:00:00'),
        (2, 'c', 1.2, NULL, '2000-01-03 00:00:00'),
        (3, 'd', 1.3, NULL, '2000-01-04 00:00:00'),
        (4, 'e', 1.4, NULL, '2000-01-05 00:00:00'),
        (5, 'f', 1.5, NULL, '2000-01-06 00:00:00'),
        (6, 'g', 1.6, NULL, '2000-01-07 00:00:00'),
        (7, 'h', 1.7, NULL, '2000-01-08 00:00:00'),
        (8, 'i', 1.8, NULL, '2000-01-09 00:00:00'),
        (9, 'j', 1.9, NULL, '2000-01-10 00:00:00');
    """)

    dataset = SqlaTable(
        table_name="physical_dataset",
        database=example_database,
    )
    TableColumn(column_name="col1", type="INTEGER", table=dataset)
    TableColumn(column_name="col2", type="VARCHAR(255)", table=dataset)
    TableColumn(column_name="col3", type="DECIMAL(4,2)", table=dataset)
    TableColumn(column_name="col4", type="VARCHAR(255)", table=dataset)
    TableColumn(column_name="col5",
                type="TIMESTAMP",
                is_dttm=True,
                table=dataset)
    SqlMetric(metric_name="count", expression="count(*)", table=dataset)
    db.session.merge(dataset)
    db.session.commit()

    yield dataset

    engine.execute("""
        DROP TABLE physical_dataset;
    """)
    dataset = db.session.query(SqlaTable).filter_by(
        table_name="physical_dataset").all()
    for ds in dataset:
        db.session.delete(ds)
    db.session.commit()
コード例 #11
0
class SqlaConnectorTestCase(BaseConnectorTestCase):
    columns = [
        TableColumn(column_name='region', type='VARCHAR(20)'),
        TableColumn(column_name='district', type='VARCHAR(20)'),
        TableColumn(column_name='project', type='VARCHAR(20)'),
        TableColumn(column_name='received', type='DATE', is_dttm=True),
        TableColumn(column_name='value', type='BIGINT'),
    ]
    metrics = [
        SqlMetric(metric_name='sum__value', metric_type='sum',
                  expression='SUM(value)'),
        SqlMetric(metric_name='avg__value', metric_type='avg',
                  expression='AVG(value)'),
        SqlMetric(metric_name='ratio', metric_type='avg',
                  expression='AVG(value/value2)'),
        SqlMetric(metric_name='value_percentage', metric_type='custom',
                  expression='SUM(value)/SUM(value + value2)'),
        SqlMetric(metric_name='category_percentage', metric_type='custom',
                  expression="SUM(CASE WHEN category='CategoryA' THEN 1 ELSE 0 END)/"
                             'CAST(COUNT(*) AS REAL)'),
    ]
    def setUp(self):
        super(SqlaConnectorTestCase, self).setUp()
        sqlalchemy_uri = 'sqlite:////tmp/test.db'
        database = Database(
            database_name='test_database',
            sqlalchemy_uri=sqlalchemy_uri)
        self.connection = database.get_sqla_engine().connect()
        self.datasource = SqlaTable(table_name='test_datasource',
                                    database=database,
                                    columns=self.columns,
                                    metrics=self.metrics)
        with database.get_sqla_engine().begin() as connection:
            self.df.to_sql(self.datasource.table_name,
                           connection,
                           if_exists='replace',
                           index=False,
                           dtype={'received': Date})
コード例 #12
0
 def create_table(
         self, name, schema='', id=0, cols_names=[], metric_names=[]):
     params = {'remote_id': id, 'database_name': 'main'}
     table = SqlaTable(
         id=id,
         schema=schema,
         table_name=name,
         params=json.dumps(params)
     )
     for col_name in cols_names:
         table.columns.append(
             TableColumn(column_name=col_name))
     for metric_name in metric_names:
         table.metrics.append(SqlMetric(metric_name=metric_name))
     return table
コード例 #13
0
def text_column_table():
    with app.app_context():
        table = SqlaTable(
            table_name="text_column_table",
            sql=("SELECT 'foo' as foo "
                 "UNION SELECT '' "
                 "UNION SELECT NULL "
                 "UNION SELECT 'null' "
                 "UNION SELECT '\"text in double quotes\"' "
                 "UNION SELECT '''text in single quotes''' "
                 "UNION SELECT 'double quotes \" in text' "
                 "UNION SELECT 'single quotes '' in text' "),
            database=get_example_database(),
        )
        TableColumn(column_name="foo", type="VARCHAR(255)", table=table)
        SqlMetric(metric_name="count", expression="count(*)", table=table)
        yield table
コード例 #14
0
def test_update_sqlatable_metric(mocker: MockFixture, app_context: None,
                                 session: Session) -> None:
    """
    Test that updating a ``SqlaTable`` also updates the corresponding ``Dataset``.

    For this test we check that updating the SQL expression in a metric belonging to a
    ``SqlaTable`` is reflected in the ``Dataset`` metric.
    """
    # patch session
    mocker.patch("superset.security.SupersetSecurityManager.get_session",
                 return_value=session)

    from superset.columns.models import Column
    from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
    from superset.datasets.models import Dataset
    from superset.models.core import Database
    from superset.tables.models import Table

    engine = session.get_bind()
    Dataset.metadata.create_all(engine)  # pylint: disable=no-member

    columns = [
        TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"),
    ]
    metrics = [
        SqlMetric(metric_name="cnt", expression="COUNT(*)"),
    ]
    sqla_table = SqlaTable(
        table_name="old_dataset",
        columns=columns,
        metrics=metrics,
        database=Database(database_name="my_database",
                          sqlalchemy_uri="sqlite://"),
    )
    session.add(sqla_table)
    session.flush()

    # check that the metric was created
    column = session.query(Column).filter_by(is_physical=False).one()
    assert column.expression == "COUNT(*)"

    # change the metric definition
    sqla_table.metrics[0].expression = "MAX(ds)"
    session.flush()

    assert column.expression == "MAX(ds)"
コード例 #15
0
def test_should_generate_closed_and_open_time_filter_range():
    with app.app_context():
        if backend() != "postgresql":
            pytest.skip(
                f"{backend()} has different dialect for datetime column")

        table = SqlaTable(
            table_name="temporal_column_table",
            sql=("SELECT '2021-12-31'::timestamp as datetime_col "
                 "UNION SELECT '2022-01-01'::timestamp "
                 "UNION SELECT '2022-03-10'::timestamp "
                 "UNION SELECT '2023-01-01'::timestamp "
                 "UNION SELECT '2023-03-10'::timestamp "),
            database=get_example_database(),
        )
        TableColumn(
            column_name="datetime_col",
            type="TIMESTAMP",
            table=table,
            is_dttm=True,
        )
        SqlMetric(metric_name="count", expression="count(*)", table=table)
        result_object = table.query({
            "metrics": ["count"],
            "is_timeseries": False,
            "filter": [],
            "from_dttm": datetime(2022, 1, 1),
            "to_dttm": datetime(2023, 1, 1),
            "granularity": "datetime_col",
        })
        """ >>> result_object.query
                SELECT count(*) AS count
                FROM
                  (SELECT '2021-12-31'::timestamp as datetime_col
                   UNION SELECT '2022-01-01'::timestamp
                   UNION SELECT '2022-03-10'::timestamp
                   UNION SELECT '2023-01-01'::timestamp
                   UNION SELECT '2023-03-10'::timestamp) AS virtual_table
                WHERE datetime_col >= TO_TIMESTAMP('2022-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
                  AND datetime_col < TO_TIMESTAMP('2023-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
        """
        assert result_object.df.iloc[0]["count"] == 2
コード例 #16
0
ファイル: v0.py プロジェクト: dodopizza/superset
def decode_dashboards(o: Dict[str, Any]) -> Any:
    """
    Function to be passed into json.loads obj_hook parameter
    Recreates the dashboard object from a json representation.
    """

    if "__Dashboard__" in o:
        return Dashboard(**o["__Dashboard__"])
    if "__Slice__" in o:
        return Slice(**o["__Slice__"])
    if "__TableColumn__" in o:
        return TableColumn(**o["__TableColumn__"])
    if "__SqlaTable__" in o:
        return SqlaTable(**o["__SqlaTable__"])
    if "__SqlMetric__" in o:
        return SqlMetric(**o["__SqlMetric__"])
    if "__datetime__" in o:
        return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S")

    return o
コード例 #17
0
def virtual_dataset():
    from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn

    dataset = SqlaTable(
        table_name="virtual_dataset",
        sql=
        ("SELECT 0 as col1, 'a' as col2, 1.0 as col3, NULL as col4, '2000-01-01 00:00:00' as col5 "
         "UNION ALL "
         "SELECT 1, 'b', 1.1, NULL, '2000-01-02 00:00:00' "
         "UNION ALL "
         "SELECT 2 as col1, 'c' as col2, 1.2, NULL, '2000-01-03 00:00:00' "
         "UNION ALL "
         "SELECT 3 as col1, 'd' as col2, 1.3, NULL, '2000-01-04 00:00:00' "
         "UNION ALL "
         "SELECT 4 as col1, 'e' as col2, 1.4, NULL, '2000-01-05 00:00:00' "
         "UNION ALL "
         "SELECT 5 as col1, 'f' as col2, 1.5, NULL, '2000-01-06 00:00:00' "
         "UNION ALL "
         "SELECT 6 as col1, 'g' as col2, 1.6, NULL, '2000-01-07 00:00:00' "
         "UNION ALL "
         "SELECT 7 as col1, 'h' as col2, 1.7, NULL, '2000-01-08 00:00:00' "
         "UNION ALL "
         "SELECT 8 as col1, 'i' as col2, 1.8, NULL, '2000-01-09 00:00:00' "
         "UNION ALL "
         "SELECT 9 as col1, 'j' as col2, 1.9, NULL, '2000-01-10 00:00:00' "),
        database=get_example_database(),
    )
    TableColumn(column_name="col1", type="INTEGER", table=dataset)
    TableColumn(column_name="col2", type="VARCHAR(255)", table=dataset)
    TableColumn(column_name="col3", type="DECIMAL(4,2)", table=dataset)
    TableColumn(column_name="col4", type="VARCHAR(255)", table=dataset)
    # Different database dialect datetime type is not consistent, so temporarily use varchar
    TableColumn(column_name="col5", type="VARCHAR(255)", table=dataset)

    SqlMetric(metric_name="count", expression="count(*)", table=dataset)
    db.session.merge(dataset)

    yield dataset

    db.session.delete(dataset)
    db.session.commit()
コード例 #18
0
def load_birth_names(only_metadata: bool = False,
                     force: bool = False,
                     sample: bool = False) -> None:
    """Loading birth name dataset from a zip file in the repo"""
    # pylint: disable=too-many-locals
    tbl_name = "birth_names"
    database = get_example_database()
    table_exists = database.has_table_by_name(tbl_name)

    if not only_metadata and (not table_exists or force):
        load_data(tbl_name, database, sample=sample)

    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not obj:
        print(f"Creating table [{tbl_name}] reference")
        obj = TBL(table_name=tbl_name)
        db.session.add(obj)
    obj.main_dttm_col = "ds"
    obj.database = database
    obj.filter_select_enabled = True
    obj.fetch_metadata()

    if not any(col.column_name == "num_california" for col in obj.columns):
        col_state = str(column("state").compile(db.engine))
        col_num = str(column("num").compile(db.engine))
        obj.columns.append(
            TableColumn(
                column_name="num_california",
                expression=
                f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END",
            ))

    if not any(col.metric_name == "sum__num" for col in obj.metrics):
        col = str(column("num").compile(db.engine))
        obj.metrics.append(
            SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

    db.session.commit()

    slices, _ = create_slices(obj)
    create_dashboard(slices)
コード例 #19
0
def decode_dashboards(o):
    """
    Function to be passed into json.loads obj_hook parameter
    Recreates the dashboard object from a json representation.
    """
    import superset.models.core as models

    if "__Dashboard__" in o:
        return Dashboard(**o["__Dashboard__"])
    elif "__Slice__" in o:
        return Slice(**o["__Slice__"])
    elif "__TableColumn__" in o:
        return TableColumn(**o["__TableColumn__"])
    elif "__SqlaTable__" in o:
        return SqlaTable(**o["__SqlaTable__"])
    elif "__SqlMetric__" in o:
        return SqlMetric(**o["__SqlMetric__"])
    elif "__datetime__" in o:
        return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S")
    else:
        return o
コード例 #20
0
def _add_table_metrics(datasource: "BaseDatasource") -> None:
    if not any(col.column_name == "num_california"
               for col in datasource.columns):
        col_state = str(column("state").compile(db.engine))
        col_num = str(column("num").compile(db.engine))
        datasource.columns.append(
            TableColumn(
                column_name="num_california",
                expression=
                f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END",
            ))

    if not any(col.metric_name == "sum__num" for col in datasource.metrics):
        col = str(column("num").compile(db.engine))
        datasource.metrics.append(
            SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

    for col in datasource.columns:
        if col.column_name == "ds":
            col.is_dttm = True  # type: ignore
            break
コード例 #21
0
def virtual_dataset_comma_in_column_value():
    from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn

    dataset = SqlaTable(
        table_name="virtual_dataset",
        sql=("SELECT 'col1,row1' as col1, 'col2, row1' as col2 "
             "UNION ALL "
             "SELECT 'col1,row2' as col1, 'col2, row2' as col2 "
             "UNION ALL "
             "SELECT 'col1,row3' as col1, 'col2, row3' as col2 "),
        database=get_example_database(),
    )
    TableColumn(column_name="col1", type="VARCHAR(255)", table=dataset)
    TableColumn(column_name="col2", type="VARCHAR(255)", table=dataset)

    SqlMetric(metric_name="count", expression="count(*)", table=dataset)
    db.session.merge(dataset)

    yield dataset

    db.session.delete(dataset)
    db.session.commit()
コード例 #22
0
    def create_table(self, name, schema="", id=0, cols_names=[], metric_names=[]):
        database_name = "main"
        name = "{0}{1}".format(NAME_PREFIX, name)
        params = {DBREF: id, "database_name": database_name}

        dict_rep = {
            "database_id": get_main_database().id,
            "table_name": name,
            "schema": schema,
            "id": id,
            "params": json.dumps(params),
            "columns": [{"column_name": c} for c in cols_names],
            "metrics": [{"metric_name": c, "expression": ""} for c in metric_names],
        }

        table = SqlaTable(
            id=id, schema=schema, table_name=name, params=json.dumps(params)
        )
        for col_name in cols_names:
            table.columns.append(TableColumn(column_name=col_name))
        for metric_name in metric_names:
            table.metrics.append(SqlMetric(metric_name=metric_name, expression=""))
        return table, dict_rep
コード例 #23
0
def decode_dashboards(o):
    """
    Function to be passed into json.loads obj_hook parameter
    Recreates the dashboard object from a json representation.
    """
    import superset.models.core as models
    from superset import security_manager
    from superset.connectors.sqla.models import (
        SqlaTable,
        SqlMetric,
        TableColumn,
    )

    if '__Dashboard__' in o:
        return models.Dashboard(**o['__Dashboard__'])
    elif '__Slice__' in o:
        return models.Slice(**o['__Slice__'])
    elif '__TableColumn__' in o:
        return TableColumn(**o['__TableColumn__'])
    elif '__SqlaTable__' in o:
        return SqlaTable(**o['__SqlaTable__'])
    elif '__SqlMetric__' in o:
        return SqlMetric(**o['__SqlMetric__'])
    elif '__Database__' in o:
        return models.Database(**o['__Database__'])
    elif '__User__' in o:
        return security_manager.user_model(**o['__User__'])
    elif '__Role__' in o:
        return security_manager.role_model(**o['__Role__'])
    elif '__ViewMenu__' in o:
        return security_manager.viewmenu_model(**o['__ViewMenu__'])
    elif '__PermissionView__' in o:
        return security_manager.permissionview_model(**o['__PermissionView__'])
    elif '__datetime__' in o:
        return datetime.strptime(o['__datetime__'], '%Y-%m-%dT%H:%M:%S')
    else:
        return o
コード例 #24
0
def _create_energy_table():
    table = create_table_metadata(
        table_name=ENERGY_USAGE_TBL_NAME,
        database=get_example_database(),
        table_description="Energy consumption",
    )
    table.fetch_metadata()

    if not any(col.metric_name == "sum__value" for col in table.metrics):
        col = str(column("value").compile(db.engine))
        table.metrics.append(
            SqlMetric(metric_name="sum__value", expression=f"SUM({col})"))

    db.session.merge(table)
    db.session.commit()
    table.fetch_metadata()

    for slice_data in _get_energy_slices():
        _create_and_commit_energy_slice(
            table,
            slice_data["slice_title"],
            slice_data["viz_type"],
            slice_data["params"],
        )
コード例 #25
0
def load_country_map_data(only_metadata: bool = False,
                          force: bool = False) -> None:
    """Loading data for map with country map"""
    tbl_name = "birth_france_by_region"
    database = utils.get_example_database()
    table_exists = database.has_table_by_name(tbl_name)

    if not only_metadata and (not table_exists or force):
        csv_bytes = get_example_data("birth_france_data_for_country_map.csv",
                                     make_bytes=False)
        data = pd.read_csv(csv_bytes, encoding="utf-8")
        data["dttm"] = datetime.datetime.now().date()
        data.to_sql(  # pylint: disable=no-member
            tbl_name,
            database.get_sqla_engine(),
            if_exists="replace",
            chunksize=500,
            dtype={
                "DEPT_ID": String(10),
                "2003": BigInteger,
                "2004": BigInteger,
                "2005": BigInteger,
                "2006": BigInteger,
                "2007": BigInteger,
                "2008": BigInteger,
                "2009": BigInteger,
                "2010": BigInteger,
                "2011": BigInteger,
                "2012": BigInteger,
                "2013": BigInteger,
                "2014": BigInteger,
                "dttm": Date(),
            },
            index=False,
        )
        print("Done loading table!")
        print("-" * 80)

    print("Creating table reference")
    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not obj:
        obj = TBL(table_name=tbl_name)
    obj.main_dttm_col = "dttm"
    obj.database = database
    if not any(col.metric_name == "avg__2004" for col in obj.metrics):
        col = str(column("2004").compile(db.engine))
        obj.metrics.append(
            SqlMetric(metric_name="avg__2004", expression=f"AVG({col})"))
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        "granularity_sqla": "",
        "since": "",
        "until": "",
        "viz_type": "country_map",
        "entity": "DEPT_ID",
        "metric": {
            "expressionType": "SIMPLE",
            "column": {
                "type": "INT",
                "column_name": "2004"
            },
            "aggregate": "AVG",
            "label": "Boys",
            "optionName": "metric_112342",
        },
        "row_limit": 500000,
    }

    print("Creating a slice")
    slc = Slice(
        slice_name="Birth in France by department in 2016",
        viz_type="country_map",
        datasource_type="table",
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
コード例 #26
0
def load_energy(only_metadata: bool = False,
                force: bool = False,
                sample: bool = False) -> None:
    """Loads an energy related dataset to use with sankey and graphs"""
    tbl_name = "energy_usage"
    database = utils.get_example_database()
    table_exists = database.has_table_by_name(tbl_name)

    if not only_metadata and (not table_exists or force):
        data = get_example_data("energy.json.gz")
        pdf = pd.read_json(data)
        pdf = pdf.head(100) if sample else pdf
        pdf.to_sql(
            tbl_name,
            database.get_sqla_engine(),
            if_exists="replace",
            chunksize=500,
            dtype={
                "source": String(255),
                "target": String(255),
                "value": Float()
            },
            index=False,
            method="multi",
        )

    print("Creating table [wb_health_population] reference")
    table = get_table_connector_registry()
    tbl = db.session.query(table).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = table(table_name=tbl_name)
    tbl.description = "Energy consumption"
    tbl.database = database
    tbl.filter_select_enabled = True

    if not any(col.metric_name == "sum__value" for col in tbl.metrics):
        col = str(column("value").compile(db.engine))
        tbl.metrics.append(
            SqlMetric(metric_name="sum__value", expression=f"SUM({col})"))

    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()

    slc = Slice(
        slice_name="Energy Sankey",
        viz_type="sankey",
        datasource_type="table",
        datasource_id=tbl.id,
        params=textwrap.dedent("""\
        {
            "collapsed_fieldsets": "",
            "groupby": [
                "source",
                "target"
            ],
            "metric": "sum__value",
            "row_limit": "5000",
            "slice_name": "Energy Sankey",
            "viz_type": "sankey"
        }
        """),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)

    slc = Slice(
        slice_name="Energy Force Layout",
        viz_type="graph_chart",
        datasource_type="table",
        datasource_id=tbl.id,
        params=textwrap.dedent("""\
        {
            "source": "source",
            "target": "target",
            "edgeLength": 400,
            "repulsion": 1000,
            "layout": "force",
            "metric": "sum__value",
            "row_limit": "5000",
            "slice_name": "Force",
            "viz_type": "graph_chart"
        }
        """),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)

    slc = Slice(
        slice_name="Heatmap",
        viz_type="heatmap",
        datasource_type="table",
        datasource_id=tbl.id,
        params=textwrap.dedent("""\
        {
            "all_columns_x": "source",
            "all_columns_y": "target",
            "canvas_image_rendering": "pixelated",
            "collapsed_fieldsets": "",
            "linear_color_scheme": "blue_white_yellow",
            "metric": "sum__value",
            "normalize_across": "heatmap",
            "slice_name": "Heatmap",
            "viz_type": "heatmap",
            "xscale_interval": "1",
            "yscale_interval": "1"
        }
        """),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
コード例 #27
0
ファイル: birth_names.py プロジェクト: MK0820/LaiZu
def load_birth_names(only_metadata=False, force=False):
    """Loading birth name dataset from a zip file in the repo"""
    # pylint: disable=too-many-locals
    tbl_name = "birth_names"
    database = get_example_database()
    table_exists = database.has_table_by_name(tbl_name)

    if not only_metadata and (not table_exists or force):
        pdf = pd.read_json(get_example_data("birth_names.json.gz"))
        pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
        pdf.to_sql(
            tbl_name,
            database.get_sqla_engine(),
            if_exists="replace",
            chunksize=500,
            dtype={
                "ds": DateTime,
                "gender": String(16),
                "state": String(10),
                "name": String(255),
            },
            index=False,
        )
        print("Done loading table!")
        print("-" * 80)

    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not obj:
        print(f"Creating table [{tbl_name}] reference")
        obj = TBL(table_name=tbl_name)
        db.session.add(obj)
    obj.main_dttm_col = "ds"
    obj.database = database
    obj.filter_select_enabled = True

    if not any(col.column_name == "num_california" for col in obj.columns):
        col_state = str(column("state").compile(db.engine))
        col_num = str(column("num").compile(db.engine))
        obj.columns.append(
            TableColumn(
                column_name="num_california",
                expression=
                f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END",
            ))

    if not any(col.metric_name == "sum__num" for col in obj.metrics):
        col = str(column("num").compile(db.engine))
        obj.metrics.append(
            SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    defaults = {
        "compare_lag":
        "10",
        "compare_suffix":
        "o10Y",
        "limit":
        "25",
        "granularity_sqla":
        "ds",
        "groupby": [],
        "metric":
        "sum__num",
        "metrics": [{
            "expressionType": "SIMPLE",
            "column": {
                "column_name": "num",
                "type": "BIGINT"
            },
            "aggregate": "SUM",
            "label": "Births",
            "optionName": "metric_11",
        }],
        "row_limit":
        config.get("ROW_LIMIT"),
        "since":
        "100 years ago",
        "until":
        "now",
        "viz_type":
        "table",
        "where":
        "",
        "markup_type":
        "markdown",
    }

    admin = security_manager.find_user("admin")

    print("Creating some slices")
    slices = [
        Slice(
            slice_name="Girls",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=["name"],
                filters=[{
                    "col": "gender",
                    "op": "in",
                    "val": ["girl"]
                }],
                row_limit=50,
                timeseries_limit_metric="sum__num",
            ),
        ),
        Slice(
            slice_name="Boys",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=["name"],
                filters=[{
                    "col": "gender",
                    "op": "in",
                    "val": ["boy"]
                }],
                row_limit=50,
            ),
        ),
        Slice(
            slice_name="Participants",
            viz_type="big_number",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="big_number",
                granularity_sqla="ds",
                compare_lag="5",
                compare_suffix="over 5Y",
            ),
        ),
        Slice(
            slice_name="Genders",
            viz_type="pie",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(defaults, viz_type="pie",
                                  groupby=["gender"]),
        ),
        Slice(
            slice_name="Genders by State",
            viz_type="dist_bar",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                adhoc_filters=[{
                    "clause": "WHERE",
                    "expressionType": "SIMPLE",
                    "filterOptionName": "2745eae5",
                    "comparator": ["other"],
                    "operator": "not in",
                    "subject": "state",
                }],
                viz_type="dist_bar",
                metrics=[
                    {
                        "expressionType": "SIMPLE",
                        "column": {
                            "column_name": "sum_boys",
                            "type": "BIGINT(20)"
                        },
                        "aggregate": "SUM",
                        "label": "Boys",
                        "optionName": "metric_11",
                    },
                    {
                        "expressionType": "SIMPLE",
                        "column": {
                            "column_name": "sum_girls",
                            "type": "BIGINT(20)"
                        },
                        "aggregate": "SUM",
                        "label": "Girls",
                        "optionName": "metric_12",
                    },
                ],
                groupby=["state"],
            ),
        ),
        Slice(
            slice_name="Trends",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="line",
                groupby=["name"],
                granularity_sqla="ds",
                rich_tooltip=True,
                show_legend=True,
            ),
        ),
        Slice(
            slice_name="Average and Sum Trends",
            viz_type="dual_line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="dual_line",
                metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num",
                        "type": "BIGINT(20)"
                    },
                    "aggregate": "AVG",
                    "label": "AVG(num)",
                    "optionName": "metric_vgops097wej_g8uff99zhk7",
                },
                metric_2="sum__num",
                granularity_sqla="ds",
            ),
        ),
        Slice(
            slice_name="Title",
            viz_type="markup",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="markup",
                markup_type="html",
                code="""\
    <div style='text-align:center'>
        <h1>Birth Names Dashboard</h1>
        <p>
            The source dataset came from
            <a href='https://github.com/hadley/babynames' target='_blank'>[here]</a>
        </p>
        <img src="/static/assets/images/babies.png">
    </div>
    """,
            ),
        ),
        Slice(
            slice_name="Name Cloud",
            viz_type="word_cloud",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="word_cloud",
                size_from="10",
                series="name",
                size_to="70",
                rotation="square",
                limit="100",
            ),
        ),
        Slice(
            slice_name="Pivot Table",
            viz_type="pivot_table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(defaults,
                                  viz_type="pivot_table",
                                  groupby=["name"],
                                  columns=["state"]),
        ),
        Slice(
            slice_name="Number of Girls",
            viz_type="big_number_total",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="big_number_total",
                granularity_sqla="ds",
                filters=[{
                    "col": "gender",
                    "op": "in",
                    "val": ["girl"]
                }],
                subheader="total female participants",
            ),
        ),
        Slice(
            slice_name="Number of California Births",
            viz_type="big_number_total",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
                viz_type="big_number_total",
                granularity_sqla="ds",
            ),
        ),
        Slice(
            slice_name="Top 10 California Names Timeseries",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metrics=[{
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                }],
                viz_type="line",
                granularity_sqla="ds",
                groupby=["name"],
                timeseries_limit_metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
                limit="10",
            ),
        ),
        Slice(
            slice_name="Names Sorted by Num in California",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=["name"],
                row_limit=50,
                timeseries_limit_metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
            ),
        ),
        Slice(
            slice_name="Num Births Trend",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(defaults, viz_type="line"),
        ),
        Slice(
            slice_name="Daily Totals",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            created_by=admin,
            params=get_slice_json(
                defaults,
                groupby=["ds"],
                since="40 years ago",
                until="now",
                viz_type="table",
            ),
        ),
    ]
    for slc in slices:
        merge_slice(slc)

    print("Creating a dashboard")
    dash = db.session.query(Dash).filter_by(slug="births").first()

    if not dash:
        dash = Dash()
        db.session.add(dash)
    dash.published = True
    js = textwrap.dedent(
        # pylint: disable=line-too-long
        """\
{
    "CHART-0dd270f0": {
        "meta": {
            "chartId": 51,
            "width": 2,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-0dd270f0",
        "children": []
    },
    "CHART-a3c21bcc": {
        "meta": {
            "chartId": 52,
            "width": 2,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-a3c21bcc",
        "children": []
    },
    "CHART-976960a5": {
        "meta": {
            "chartId": 53,
            "width": 2,
            "height": 25
        },
        "type": "CHART",
        "id": "CHART-976960a5",
        "children": []
    },
    "CHART-58575537": {
        "meta": {
            "chartId": 54,
            "width": 2,
            "height": 25
        },
        "type": "CHART",
        "id": "CHART-58575537",
        "children": []
    },
    "CHART-e9cd8f0b": {
        "meta": {
            "chartId": 55,
            "width": 8,
            "height": 38
        },
        "type": "CHART",
        "id": "CHART-e9cd8f0b",
        "children": []
    },
    "CHART-e440d205": {
        "meta": {
            "chartId": 56,
            "width": 8,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e440d205",
        "children": []
    },
    "CHART-59444e0b": {
        "meta": {
            "chartId": 57,
            "width": 3,
            "height": 38
        },
        "type": "CHART",
        "id": "CHART-59444e0b",
        "children": []
    },
    "CHART-e2cb4997": {
        "meta": {
            "chartId": 59,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e2cb4997",
        "children": []
    },
    "CHART-e8774b49": {
        "meta": {
            "chartId": 60,
            "width": 12,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e8774b49",
        "children": []
    },
    "CHART-985bfd1e": {
        "meta": {
            "chartId": 61,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-985bfd1e",
        "children": []
    },
    "CHART-17f13246": {
        "meta": {
            "chartId": 62,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-17f13246",
        "children": []
    },
    "CHART-729324f6": {
        "meta": {
            "chartId": 63,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-729324f6",
        "children": []
    },
    "COLUMN-25a865d6": {
        "meta": {
            "width": 4,
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "COLUMN",
        "id": "COLUMN-25a865d6",
        "children": [
            "ROW-cc97c6ac",
            "CHART-e2cb4997"
        ]
    },
    "COLUMN-4557b6ba": {
        "meta": {
            "width": 8,
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "COLUMN",
        "id": "COLUMN-4557b6ba",
        "children": [
            "ROW-d2e78e59",
            "CHART-e9cd8f0b"
        ]
    },
    "GRID_ID": {
        "type": "GRID",
        "id": "GRID_ID",
        "children": [
            "ROW-8515ace3",
            "ROW-1890385f",
            "ROW-f0b64094",
            "ROW-be9526b8"
        ]
    },
    "HEADER_ID": {
        "meta": {
            "text": "Births"
        },
        "type": "HEADER",
        "id": "HEADER_ID"
    },
    "MARKDOWN-00178c27": {
        "meta": {
            "width": 5,
            "code": "<div style=\\"text-align:center\\">\\n <h1>Birth Names Dashboard</h1>\\n <p>\\n The source dataset came from\\n <a href=\\"https://github.com/hadley/babynames\\" target=\\"_blank\\">[here]</a>\\n </p>\\n <img src=\\"/static/assets/images/babies.png\\" style=\\"width:55%;\\">\\n</div>\\n",
            "height": 38
        },
        "type": "MARKDOWN",
        "id": "MARKDOWN-00178c27",
        "children": []
    },
    "ROOT_ID": {
        "type": "ROOT",
        "id": "ROOT_ID",
        "children": [
            "GRID_ID"
        ]
    },
    "ROW-1890385f": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-1890385f",
        "children": [
            "CHART-e440d205",
            "CHART-0dd270f0",
            "CHART-a3c21bcc"
        ]
    },
    "ROW-8515ace3": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-8515ace3",
        "children": [
            "COLUMN-25a865d6",
            "COLUMN-4557b6ba"
        ]
    },
    "ROW-be9526b8": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-be9526b8",
        "children": [
            "CHART-985bfd1e",
            "CHART-17f13246",
            "CHART-729324f6"
        ]
    },
    "ROW-cc97c6ac": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-cc97c6ac",
        "children": [
            "CHART-976960a5",
            "CHART-58575537"
        ]
    },
    "ROW-d2e78e59": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-d2e78e59",
        "children": [
            "MARKDOWN-00178c27",
            "CHART-59444e0b"
        ]
    },
    "ROW-f0b64094": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-f0b64094",
        "children": [
            "CHART-e8774b49"
        ]
    },
    "DASHBOARD_VERSION_KEY": "v2"
}
        """

        # pylint: enable=line-too-long
    )
    pos = json.loads(js)
    # dashboard v2 doesn't allow add markup slice
    dash.slices = [slc for slc in slices if slc.viz_type != "markup"]
    update_slice_ids(pos, dash.slices)
    dash.dashboard_title = "Births"
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = "births"
    db.session.commit()
コード例 #28
0
def load_world_bank_health_n_pop(only_metadata=False, force=False):  # pylint: disable=too-many-locals
    """Loads the world bank health dataset, slices and a dashboard"""
    tbl_name = "wb_health_population"
    database = utils.get_example_database()
    table_exists = database.has_table_by_name(tbl_name)

    if not only_metadata and (not table_exists or force):
        data = get_example_data("countries.json.gz")
        pdf = pd.read_json(data)
        pdf.columns = [col.replace(".", "_") for col in pdf.columns]
        pdf.year = pd.to_datetime(pdf.year)
        pdf.to_sql(
            tbl_name,
            database.get_sqla_engine(),
            if_exists="replace",
            chunksize=50,
            dtype={
                "year": DateTime(),
                "country_code": String(3),
                "country_name": String(255),
                "region": String(255),
            },
            index=False,
        )

    print("Creating table [wb_health_population] reference")
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = utils.readfile(
        os.path.join(EXAMPLES_FOLDER, "countries.md"))
    tbl.main_dttm_col = "year"
    tbl.database = database
    tbl.filter_select_enabled = True

    metrics = [
        "sum__SP_POP_TOTL",
        "sum__SH_DYN_AIDS",
        "sum__SH_DYN_AIDS",
        "sum__SP_RUR_TOTL_ZS",
        "sum__SP_DYN_LE00_IN",
        "sum__SP_RUR_TOTL",
    ]
    for metric in metrics:
        if not any(col.metric_name == metric for col in tbl.metrics):
            aggr_func = metric[:3]
            col = str(column(metric[5:]).compile(db.engine))
            tbl.metrics.append(
                SqlMetric(metric_name=metric,
                          expression=f"{aggr_func}({col})"))

    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()

    defaults = {
        "compare_lag": "10",
        "compare_suffix": "o10Y",
        "limit": "25",
        "granularity_sqla": "year",
        "groupby": [],
        "metric": "sum__SP_POP_TOTL",
        "metrics": ["sum__SP_POP_TOTL"],
        "row_limit": config["ROW_LIMIT"],
        "since": "2014-01-01",
        "until": "2014-01-02",
        "time_range": "2014-01-01 : 2014-01-02",
        "markup_type": "markdown",
        "country_fieldtype": "cca3",
        "secondary_metric": {
            "aggregate": "SUM",
            "column": {
                "column_name": "SP_RUR_TOTL",
                "optionName": "_col_SP_RUR_TOTL",
                "type": "DOUBLE",
            },
            "expressionType": "SIMPLE",
            "hasCustomLabel": True,
            "label": "Rural Population",
        },
        "entity": "country_code",
        "show_bubbles": True,
    }

    print("Creating slices")
    slices = [
        Slice(
            slice_name="Region Filter",
            viz_type="filter_box",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="filter_box",
                date_filter=False,
                filter_configs=[
                    {
                        "asc": False,
                        "clearable": True,
                        "column": "region",
                        "key": "2s98dfu",
                        "metric": "sum__SP_POP_TOTL",
                        "multiple": True,
                    },
                    {
                        "asc": False,
                        "clearable": True,
                        "key": "li3j2lk",
                        "column": "country_name",
                        "metric": "sum__SP_POP_TOTL",
                        "multiple": True,
                    },
                ],
            ),
        ),
        Slice(
            slice_name="World's Population",
            viz_type="big_number",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="2000",
                viz_type="big_number",
                compare_lag="10",
                metric="sum__SP_POP_TOTL",
                compare_suffix="over 10Y",
            ),
        ),
        Slice(
            slice_name="Most Populated Countries",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="table",
                metrics=["sum__SP_POP_TOTL"],
                groupby=["country_name"],
            ),
        ),
        Slice(
            slice_name="Growth Rate",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="line",
                since="1960-01-01",
                metrics=["sum__SP_POP_TOTL"],
                num_period_compare="10",
                groupby=["country_name"],
            ),
        ),
        Slice(
            slice_name="% Rural",
            viz_type="world_map",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="world_map",
                metric="sum__SP_RUR_TOTL_ZS",
                num_period_compare="10",
            ),
        ),
        Slice(
            slice_name="Life Expectancy VS Rural %",
            viz_type="bubble",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="bubble",
                since="2011-01-01",
                until="2011-01-02",
                series="region",
                limit=0,
                entity="country_name",
                x="sum__SP_RUR_TOTL_ZS",
                y="sum__SP_DYN_LE00_IN",
                size="sum__SP_POP_TOTL",
                max_bubble_size="50",
                adhoc_filters=[{
                    "clause":
                    "WHERE",
                    "expressionType":
                    "SIMPLE",
                    "filterOptionName":
                    "2745eae5",
                    "comparator": [
                        "TCA",
                        "MNP",
                        "DMA",
                        "MHL",
                        "MCO",
                        "SXM",
                        "CYM",
                        "TUV",
                        "IMY",
                        "KNA",
                        "ASM",
                        "ADO",
                        "AMA",
                        "PLW",
                    ],
                    "operator":
                    "not in",
                    "subject":
                    "country_code",
                }],
            ),
        ),
        Slice(
            slice_name="Rural Breakdown",
            viz_type="sunburst",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="sunburst",
                groupby=["region", "country_name"],
                since="2011-01-01",
                until="2011-01-01",
            ),
        ),
        Slice(
            slice_name="World's Pop Growth",
            viz_type="area",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="1960-01-01",
                until="now",
                viz_type="area",
                groupby=["region"],
            ),
        ),
        Slice(
            slice_name="Box plot",
            viz_type="box_plot",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="1960-01-01",
                until="now",
                whisker_options="Min/max (no outliers)",
                x_ticks_layout="staggered",
                viz_type="box_plot",
                groupby=["region"],
            ),
        ),
        Slice(
            slice_name="Treemap",
            viz_type="treemap",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="1960-01-01",
                until="now",
                viz_type="treemap",
                metrics=["sum__SP_POP_TOTL"],
                groupby=["region", "country_code"],
            ),
        ),
        Slice(
            slice_name="Parallel Coordinates",
            viz_type="para",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="2011-01-01",
                until="2011-01-01",
                viz_type="para",
                limit=100,
                metrics=[
                    "sum__SP_POP_TOTL", "sum__SP_RUR_TOTL_ZS",
                    "sum__SH_DYN_AIDS"
                ],
                secondary_metric="sum__SP_POP_TOTL",
                series="country_name",
            ),
        ),
    ]
    misc_dash_slices.add(slices[-1].slice_name)
    for slc in slices:
        merge_slice(slc)

    print("Creating a World's Health Bank dashboard")
    dash_name = "World Bank's Data"
    slug = "world_health"
    dash = db.session.query(Dashboard).filter_by(slug=slug).first()

    if not dash:
        dash = Dashboard()
    dash.published = True
    js = textwrap.dedent("""\
{
    "CHART-36bfc934": {
        "children": [],
        "id": "CHART-36bfc934",
        "meta": {
            "chartId": 40,
            "height": 25,
            "sliceName": "Region Filter",
            "width": 2
        },
        "type": "CHART"
    },
    "CHART-37982887": {
        "children": [],
        "id": "CHART-37982887",
        "meta": {
            "chartId": 41,
            "height": 25,
            "sliceName": "World's Population",
            "width": 2
        },
        "type": "CHART"
    },
    "CHART-17e0f8d8": {
        "children": [],
        "id": "CHART-17e0f8d8",
        "meta": {
            "chartId": 42,
            "height": 92,
            "sliceName": "Most Populated Countries",
            "width": 3
        },
        "type": "CHART"
    },
    "CHART-2ee52f30": {
        "children": [],
        "id": "CHART-2ee52f30",
        "meta": {
            "chartId": 43,
            "height": 38,
            "sliceName": "Growth Rate",
            "width": 6
        },
        "type": "CHART"
    },
    "CHART-2d5b6871": {
        "children": [],
        "id": "CHART-2d5b6871",
        "meta": {
            "chartId": 44,
            "height": 52,
            "sliceName": "% Rural",
            "width": 7
        },
        "type": "CHART"
    },
    "CHART-0fd0d252": {
        "children": [],
        "id": "CHART-0fd0d252",
        "meta": {
            "chartId": 45,
            "height": 50,
            "sliceName": "Life Expectancy VS Rural %",
            "width": 8
        },
        "type": "CHART"
    },
    "CHART-97f4cb48": {
        "children": [],
        "id": "CHART-97f4cb48",
        "meta": {
            "chartId": 46,
            "height": 38,
            "sliceName": "Rural Breakdown",
            "width": 3
        },
        "type": "CHART"
    },
    "CHART-b5e05d6f": {
        "children": [],
        "id": "CHART-b5e05d6f",
        "meta": {
            "chartId": 47,
            "height": 50,
            "sliceName": "World's Pop Growth",
            "width": 4
        },
        "type": "CHART"
    },
    "CHART-e76e9f5f": {
        "children": [],
        "id": "CHART-e76e9f5f",
        "meta": {
            "chartId": 48,
            "height": 50,
            "sliceName": "Box plot",
            "width": 4
        },
        "type": "CHART"
    },
    "CHART-a4808bba": {
        "children": [],
        "id": "CHART-a4808bba",
        "meta": {
            "chartId": 49,
            "height": 50,
            "sliceName": "Treemap",
            "width": 8
        },
        "type": "CHART"
    },
    "COLUMN-071bbbad": {
        "children": [
            "ROW-1e064e3c",
            "ROW-afdefba9"
        ],
        "id": "COLUMN-071bbbad",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT",
            "width": 9
        },
        "type": "COLUMN"
    },
    "COLUMN-fe3914b8": {
        "children": [
            "CHART-36bfc934",
            "CHART-37982887"
        ],
        "id": "COLUMN-fe3914b8",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT",
            "width": 2
        },
        "type": "COLUMN"
    },
    "GRID_ID": {
        "children": [
            "ROW-46632bc2",
            "ROW-3fa26c5d",
            "ROW-812b3f13"
        ],
        "id": "GRID_ID",
        "type": "GRID"
    },
    "HEADER_ID": {
        "id": "HEADER_ID",
        "meta": {
            "text": "World's Bank Data"
        },
        "type": "HEADER"
    },
    "ROOT_ID": {
        "children": [
            "GRID_ID"
        ],
        "id": "ROOT_ID",
        "type": "ROOT"
    },
    "ROW-1e064e3c": {
        "children": [
            "COLUMN-fe3914b8",
            "CHART-2d5b6871"
        ],
        "id": "ROW-1e064e3c",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-3fa26c5d": {
        "children": [
            "CHART-b5e05d6f",
            "CHART-0fd0d252"
        ],
        "id": "ROW-3fa26c5d",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-46632bc2": {
        "children": [
            "COLUMN-071bbbad",
            "CHART-17e0f8d8"
        ],
        "id": "ROW-46632bc2",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-812b3f13": {
        "children": [
            "CHART-a4808bba",
            "CHART-e76e9f5f"
        ],
        "id": "ROW-812b3f13",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-afdefba9": {
        "children": [
            "CHART-2ee52f30",
            "CHART-97f4cb48"
        ],
        "id": "ROW-afdefba9",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "DASHBOARD_VERSION_KEY": "v2"
}
    """)
    pos = json.loads(js)
    update_slice_ids(pos, slices)

    dash.dashboard_title = dash_name
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = slug

    dash.slices = slices[:-1]
    db.session.merge(dash)
    db.session.commit()
コード例 #29
0
def load_birth_names(only_metadata: bool = False,
                     force: bool = False,
                     sample: bool = False) -> None:
    """Loading birth name dataset from a zip file in the repo"""
    # pylint: disable=too-many-locals
    tbl_name = "birth_names"
    database = get_example_database()
    table_exists = database.has_table_by_name(tbl_name)

    if not only_metadata and (not table_exists or force):
        load_data(tbl_name, database, sample=sample)

    obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not obj:
        print(f"Creating table [{tbl_name}] reference")
        obj = TBL(table_name=tbl_name)
        db.session.add(obj)
    obj.main_dttm_col = "ds"
    obj.database = database
    obj.filter_select_enabled = True

    if not any(col.column_name == "num_california" for col in obj.columns):
        col_state = str(column("state").compile(db.engine))
        col_num = str(column("num").compile(db.engine))
        obj.columns.append(
            TableColumn(
                column_name="num_california",
                expression=
                f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END",
            ))

    if not any(col.metric_name == "sum__num" for col in obj.metrics):
        col = str(column("num").compile(db.engine))
        obj.metrics.append(
            SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    metrics = [{
        "expressionType": "SIMPLE",
        "column": {
            "column_name": "num",
            "type": "BIGINT"
        },
        "aggregate": "SUM",
        "label": "Births",
        "optionName": "metric_11",
    }]
    metric = "sum__num"

    defaults = {
        "compare_lag": "10",
        "compare_suffix": "o10Y",
        "limit": "25",
        "granularity_sqla": "ds",
        "groupby": [],
        "row_limit": config["ROW_LIMIT"],
        "since": "100 years ago",
        "until": "now",
        "viz_type": "table",
        "markup_type": "markdown",
    }

    admin = security_manager.find_user("admin")

    print("Creating some slices")
    slices = [
        Slice(
            slice_name="Participants",
            viz_type="big_number",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="big_number",
                granularity_sqla="ds",
                compare_lag="5",
                compare_suffix="over 5Y",
                metric=metric,
            ),
        ),
        Slice(
            slice_name="Genders",
            viz_type="pie",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(defaults,
                                  viz_type="pie",
                                  groupby=["gender"],
                                  metric=metric),
        ),
        Slice(
            slice_name="Trends",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="line",
                groupby=["name"],
                granularity_sqla="ds",
                rich_tooltip=True,
                show_legend=True,
                metrics=metrics,
            ),
        ),
        Slice(
            slice_name="Genders by State",
            viz_type="dist_bar",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                adhoc_filters=[{
                    "clause": "WHERE",
                    "expressionType": "SIMPLE",
                    "filterOptionName": "2745eae5",
                    "comparator": ["other"],
                    "operator": "NOT IN",
                    "subject": "state",
                }],
                viz_type="dist_bar",
                metrics=[
                    {
                        "expressionType": "SIMPLE",
                        "column": {
                            "column_name": "sum_boys",
                            "type": "BIGINT(20)"
                        },
                        "aggregate": "SUM",
                        "label": "Boys",
                        "optionName": "metric_11",
                    },
                    {
                        "expressionType": "SIMPLE",
                        "column": {
                            "column_name": "sum_girls",
                            "type": "BIGINT(20)"
                        },
                        "aggregate": "SUM",
                        "label": "Girls",
                        "optionName": "metric_12",
                    },
                ],
                groupby=["state"],
            ),
        ),
        Slice(
            slice_name="Girls",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=["name"],
                adhoc_filters=[gen_filter("gender", "girl")],
                row_limit=50,
                timeseries_limit_metric="sum__num",
                metrics=metrics,
            ),
        ),
        Slice(
            slice_name="Girl Name Cloud",
            viz_type="word_cloud",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="word_cloud",
                size_from="10",
                series="name",
                size_to="70",
                rotation="square",
                limit="100",
                adhoc_filters=[gen_filter("gender", "girl")],
                metric=metric,
            ),
        ),
        Slice(
            slice_name="Boys",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=["name"],
                adhoc_filters=[gen_filter("gender", "boy")],
                row_limit=50,
                metrics=metrics,
            ),
        ),
        Slice(
            slice_name="Boy Name Cloud",
            viz_type="word_cloud",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="word_cloud",
                size_from="10",
                series="name",
                size_to="70",
                rotation="square",
                limit="100",
                adhoc_filters=[gen_filter("gender", "boy")],
                metric=metric,
            ),
        ),
        Slice(
            slice_name="Top 10 Girl Name Share",
            viz_type="area",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                adhoc_filters=[gen_filter("gender", "girl")],
                comparison_type="values",
                groupby=["name"],
                limit=10,
                stacked_style="expand",
                time_grain_sqla="P1D",
                viz_type="area",
                x_axis_forma="smart_date",
                metrics=metrics,
            ),
        ),
        Slice(
            slice_name="Top 10 Boy Name Share",
            viz_type="area",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                adhoc_filters=[gen_filter("gender", "boy")],
                comparison_type="values",
                groupby=["name"],
                limit=10,
                stacked_style="expand",
                time_grain_sqla="P1D",
                viz_type="area",
                x_axis_forma="smart_date",
                metrics=metrics,
            ),
        ),
    ]
    misc_slices = [
        Slice(
            slice_name="Average and Sum Trends",
            viz_type="dual_line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="dual_line",
                metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num",
                        "type": "BIGINT(20)"
                    },
                    "aggregate": "AVG",
                    "label": "AVG(num)",
                    "optionName": "metric_vgops097wej_g8uff99zhk7",
                },
                metric_2="sum__num",
                granularity_sqla="ds",
                metrics=metrics,
            ),
        ),
        Slice(
            slice_name="Num Births Trend",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(defaults, viz_type="line", metrics=metrics),
        ),
        Slice(
            slice_name="Daily Totals",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            created_by=admin,
            params=get_slice_json(
                defaults,
                groupby=["ds"],
                since="40 years ago",
                until="now",
                viz_type="table",
                metrics=metrics,
            ),
        ),
        Slice(
            slice_name="Number of California Births",
            viz_type="big_number_total",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
                viz_type="big_number_total",
                granularity_sqla="ds",
            ),
        ),
        Slice(
            slice_name="Top 10 California Names Timeseries",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metrics=[{
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                }],
                viz_type="line",
                granularity_sqla="ds",
                groupby=["name"],
                timeseries_limit_metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
                limit="10",
            ),
        ),
        Slice(
            slice_name="Names Sorted by Num in California",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metrics=metrics,
                groupby=["name"],
                row_limit=50,
                timeseries_limit_metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
            ),
        ),
        Slice(
            slice_name="Number of Girls",
            viz_type="big_number_total",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metric=metric,
                viz_type="big_number_total",
                granularity_sqla="ds",
                adhoc_filters=[gen_filter("gender", "girl")],
                subheader="total female participants",
            ),
        ),
        Slice(
            slice_name="Pivot Table",
            viz_type="pivot_table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="pivot_table",
                groupby=["name"],
                columns=["state"],
                metrics=metrics,
            ),
        ),
    ]
    for slc in slices:
        merge_slice(slc)

    for slc in misc_slices:
        merge_slice(slc)
        misc_dash_slices.add(slc.slice_name)

    print("Creating a dashboard")
    dash = db.session.query(Dashboard).filter_by(slug="births").first()

    if not dash:
        dash = Dashboard()
        db.session.add(dash)
    dash.published = True
    dash.json_metadata = textwrap.dedent("""\
    {
        "label_colors": {
            "Girls": "#FF69B4",
            "Boys": "#ADD8E6",
            "girl": "#FF69B4",
            "boy": "#ADD8E6"
        }
    }""")
    js = textwrap.dedent(
        # pylint: disable=line-too-long
        """\
        {
          "CHART-6GdlekVise": {
            "children": [],
            "id": "CHART-6GdlekVise",
            "meta": {
              "chartId": 5547,
              "height": 50,
              "sliceName": "Top 10 Girl Name Share",
              "width": 5
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-eh0w37bWbR"
            ],
            "type": "CHART"
          },
          "CHART-6n9jxb30JG": {
            "children": [],
            "id": "CHART-6n9jxb30JG",
            "meta": {
              "chartId": 5540,
              "height": 36,
              "sliceName": "Genders by State",
              "width": 5
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW--EyBZQlDi"
            ],
            "type": "CHART"
          },
          "CHART-Jj9qh1ol-N": {
            "children": [],
            "id": "CHART-Jj9qh1ol-N",
            "meta": {
              "chartId": 5545,
              "height": 50,
              "sliceName": "Boy Name Cloud",
              "width": 4
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-kzWtcvo8R1"
            ],
            "type": "CHART"
          },
          "CHART-ODvantb_bF": {
            "children": [],
            "id": "CHART-ODvantb_bF",
            "meta": {
              "chartId": 5548,
              "height": 50,
              "sliceName": "Top 10 Boy Name Share",
              "width": 5
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-kzWtcvo8R1"
            ],
            "type": "CHART"
          },
          "CHART-PAXUUqwmX9": {
            "children": [],
            "id": "CHART-PAXUUqwmX9",
            "meta": {
              "chartId": 5538,
              "height": 34,
              "sliceName": "Genders",
              "width": 3
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-2n0XgiHDgs"
            ],
            "type": "CHART"
          },
          "CHART-_T6n_K9iQN": {
            "children": [],
            "id": "CHART-_T6n_K9iQN",
            "meta": {
              "chartId": 5539,
              "height": 36,
              "sliceName": "Trends",
              "width": 7
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW--EyBZQlDi"
            ],
            "type": "CHART"
          },
          "CHART-eNY0tcE_ic": {
            "children": [],
            "id": "CHART-eNY0tcE_ic",
            "meta": {
              "chartId": 5537,
              "height": 34,
              "sliceName": "Participants",
              "width": 3
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-2n0XgiHDgs"
            ],
            "type": "CHART"
          },
          "CHART-g075mMgyYb": {
            "children": [],
            "id": "CHART-g075mMgyYb",
            "meta": {
              "chartId": 5541,
              "height": 50,
              "sliceName": "Girls",
              "width": 3
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-eh0w37bWbR"
            ],
            "type": "CHART"
          },
          "CHART-n-zGGE6S1y": {
            "children": [],
            "id": "CHART-n-zGGE6S1y",
            "meta": {
              "chartId": 5542,
              "height": 50,
              "sliceName": "Girl Name Cloud",
              "width": 4
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-eh0w37bWbR"
            ],
            "type": "CHART"
          },
          "CHART-vJIPjmcbD3": {
            "children": [],
            "id": "CHART-vJIPjmcbD3",
            "meta": {
              "chartId": 5543,
              "height": 50,
              "sliceName": "Boys",
              "width": 3
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-kzWtcvo8R1"
            ],
            "type": "CHART"
          },
          "DASHBOARD_VERSION_KEY": "v2",
          "GRID_ID": {
            "children": [
              "ROW-2n0XgiHDgs",
              "ROW--EyBZQlDi",
              "ROW-eh0w37bWbR",
              "ROW-kzWtcvo8R1"
            ],
            "id": "GRID_ID",
            "parents": [
              "ROOT_ID"
            ],
            "type": "GRID"
          },
          "HEADER_ID": {
            "id": "HEADER_ID",
            "meta": {
              "text": "Births"
            },
            "type": "HEADER"
          },
          "MARKDOWN-zaflB60tbC": {
            "children": [],
            "id": "MARKDOWN-zaflB60tbC",
            "meta": {
              "code": "<div style=\\"text-align:center\\">  <h1>Birth Names Dashboard</h1>  <img src=\\"/static/assets/images/babies.png\\" style=\\"width:50%;\\"></div>",
              "height": 34,
              "width": 6
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID",
              "ROW-2n0XgiHDgs"
            ],
            "type": "MARKDOWN"
          },
          "ROOT_ID": {
            "children": [
              "GRID_ID"
            ],
            "id": "ROOT_ID",
            "type": "ROOT"
          },
          "ROW--EyBZQlDi": {
            "children": [
              "CHART-_T6n_K9iQN",
              "CHART-6n9jxb30JG"
            ],
            "id": "ROW--EyBZQlDi",
            "meta": {
              "background": "BACKGROUND_TRANSPARENT"
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID"
            ],
            "type": "ROW"
          },
          "ROW-2n0XgiHDgs": {
            "children": [
              "CHART-eNY0tcE_ic",
              "MARKDOWN-zaflB60tbC",
              "CHART-PAXUUqwmX9"
            ],
            "id": "ROW-2n0XgiHDgs",
            "meta": {
              "background": "BACKGROUND_TRANSPARENT"
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID"
            ],
            "type": "ROW"
          },
          "ROW-eh0w37bWbR": {
            "children": [
              "CHART-g075mMgyYb",
              "CHART-n-zGGE6S1y",
              "CHART-6GdlekVise"
            ],
            "id": "ROW-eh0w37bWbR",
            "meta": {
              "background": "BACKGROUND_TRANSPARENT"
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID"
            ],
            "type": "ROW"
          },
          "ROW-kzWtcvo8R1": {
            "children": [
              "CHART-vJIPjmcbD3",
              "CHART-Jj9qh1ol-N",
              "CHART-ODvantb_bF"
            ],
            "id": "ROW-kzWtcvo8R1",
            "meta": {
              "background": "BACKGROUND_TRANSPARENT"
            },
            "parents": [
              "ROOT_ID",
              "GRID_ID"
            ],
            "type": "ROW"
          }
        }
        """

        # pylint: enable=line-too-long
    )
    pos = json.loads(js)
    # dashboard v2 doesn't allow add markup slice
    dash.slices = [slc for slc in slices if slc.viz_type != "markup"]
    update_slice_ids(pos, dash.slices)
    dash.dashboard_title = "USA Births Names"
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = "births"
    db.session.commit()
コード例 #30
0
def test_create_virtual_sqlatable(mocker: MockFixture, app_context: None,
                                  session: Session) -> None:
    """
    Test shadow write when creating a new ``SqlaTable``.

    When a new virtual ``SqlaTable`` is created, new models should also be created for
    ``Dataset`` and ``Column``.
    """
    # patch session
    mocker.patch("superset.security.SupersetSecurityManager.get_session",
                 return_value=session)

    from superset.columns.models import Column
    from superset.columns.schemas import ColumnSchema
    from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
    from superset.datasets.models import Dataset
    from superset.datasets.schemas import DatasetSchema
    from superset.models.core import Database
    from superset.tables.models import Table

    engine = session.get_bind()
    Dataset.metadata.create_all(engine)  # pylint: disable=no-member

    # create the ``Table`` that the virtual dataset points to
    database = Database(database_name="my_database",
                        sqlalchemy_uri="sqlite://")
    table = Table(
        name="some_table",
        schema="my_schema",
        catalog=None,
        database=database,
        columns=[
            Column(name="ds", is_temporal=True, type="TIMESTAMP"),
            Column(name="user_id", type="INTEGER"),
            Column(name="revenue", type="INTEGER"),
            Column(name="expenses", type="INTEGER"),
        ],
    )
    session.add(table)
    session.commit()

    # create virtual dataset
    columns = [
        TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"),
        TableColumn(column_name="user_id", type="INTEGER"),
        TableColumn(column_name="revenue", type="INTEGER"),
        TableColumn(column_name="expenses", type="INTEGER"),
        TableColumn(column_name="profit",
                    type="INTEGER",
                    expression="revenue-expenses"),
    ]
    metrics = [
        SqlMetric(metric_name="cnt", expression="COUNT(*)"),
    ]

    sqla_table = SqlaTable(
        table_name="old_dataset",
        columns=columns,
        metrics=metrics,
        main_dttm_col="ds",
        default_endpoint=
        "https://www.youtube.com/watch?v=dQw4w9WgXcQ",  # not used
        database=database,
        offset=-8,
        description="This is the description",
        is_featured=1,
        cache_timeout=3600,
        schema="my_schema",
        sql="""
SELECT
  ds,
  user_id,
  revenue,
  expenses,
  revenue - expenses AS profit
FROM
  some_table""",
        params=json.dumps({
            "remote_id": 64,
            "database_name": "examples",
            "import_time": 1606677834,
        }),
        perm=None,
        filter_select_enabled=1,
        fetch_values_predicate="foo IN (1, 2)",
        is_sqllab_view=0,  # no longer used?
        template_params=json.dumps({"answer": "42"}),
        schema_perm=None,
        extra=json.dumps({"warning_markdown": "*WARNING*"}),
    )
    session.add(sqla_table)
    session.flush()

    # ignore these keys when comparing results
    ignored_keys = {"created_on", "changed_on", "uuid"}

    # check that columns were created
    column_schema = ColumnSchema()
    column_schemas = [{
        k: v
        for k, v in column_schema.dump(column).items() if k not in ignored_keys
    } for column in session.query(Column).all()]
    assert column_schemas == [
        {
            "type": "TIMESTAMP",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": None,
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "ds",
            "is_physical": True,
            "changed_by": None,
            "is_temporal": True,
            "id": 1,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": None,
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "user_id",
            "is_physical": True,
            "changed_by": None,
            "is_temporal": False,
            "id": 2,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": None,
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "revenue",
            "is_physical": True,
            "changed_by": None,
            "is_temporal": False,
            "id": 3,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": None,
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "expenses",
            "is_physical": True,
            "changed_by": None,
            "is_temporal": False,
            "id": 4,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "TIMESTAMP",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "ds",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "ds",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": True,
            "id": 5,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "user_id",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "user_id",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 6,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "revenue",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "revenue",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 7,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "expenses",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "expenses",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 8,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "INTEGER",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "revenue-expenses",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "profit",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 9,
            "is_aggregation": False,
            "external_url": None,
            "is_managed_externally": False,
        },
        {
            "type": "Unknown",
            "is_additive": False,
            "extra_json": "{}",
            "is_partition": False,
            "expression": "COUNT(*)",
            "unit": None,
            "warning_text": None,
            "created_by": None,
            "is_increase_desired": True,
            "description": None,
            "is_spatial": False,
            "name": "cnt",
            "is_physical": False,
            "changed_by": None,
            "is_temporal": False,
            "id": 10,
            "is_aggregation": True,
            "external_url": None,
            "is_managed_externally": False,
        },
    ]

    # check that dataset was created, and has a reference to the table
    dataset_schema = DatasetSchema()
    datasets = [{
        k: v
        for k, v in dataset_schema.dump(dataset).items()
        if k not in ignored_keys
    } for dataset in session.query(Dataset).all()]
    assert datasets == [{
        "id":
        1,
        "sqlatable_id":
        1,
        "name":
        "old_dataset",
        "changed_by":
        None,
        "created_by":
        None,
        "columns": [5, 6, 7, 8, 9, 10],
        "is_physical":
        False,
        "tables": [1],
        "extra_json":
        "{}",
        "external_url":
        None,
        "is_managed_externally":
        False,
        "expression":
        """
SELECT
  ds,
  user_id,
  revenue,
  expenses,
  revenue - expenses AS profit
FROM
  some_table""",
    }]