def create_table(self, name, schema=None, id=0, cols_names=[], metric_names=[]): params = {"remote_id": id, "database_name": "examples"} table = SqlaTable(id=id, schema=schema, table_name=name, params=json.dumps(params)) for col_name in cols_names: table.columns.append(TableColumn(column_name=col_name)) for metric_name in metric_names: table.metrics.append( SqlMetric(metric_name=metric_name, expression="")) return table
def text_column_table(): with app.app_context(): table = SqlaTable( table_name="text_column_table", sql=("SELECT 'foo' as foo " "UNION SELECT '' " "UNION SELECT NULL " "UNION SELECT 'null' " "UNION SELECT '\"text in double quotes\"' " "UNION SELECT '''text in single quotes''' " "UNION SELECT 'double quotes \" in text' " "UNION SELECT 'single quotes '' in text' "), database=get_example_database(), ) TableColumn(column_name="foo", type="VARCHAR(255)", table=table) SqlMetric(metric_name="count", expression="count(*)", table=table) yield table
def test_update_sqlatable_metric(mocker: MockFixture, app_context: None, session: Session) -> None: """ Test that updating a ``SqlaTable`` also updates the corresponding ``Dataset``. For this test we check that updating the SQL expression in a metric belonging to a ``SqlaTable`` is reflected in the ``Dataset`` metric. """ # patch session mocker.patch("superset.security.SupersetSecurityManager.get_session", return_value=session) from superset.columns.models import Column from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn from superset.datasets.models import Dataset from superset.models.core import Database from superset.tables.models import Table engine = session.get_bind() Dataset.metadata.create_all(engine) # pylint: disable=no-member columns = [ TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"), ] metrics = [ SqlMetric(metric_name="cnt", expression="COUNT(*)"), ] sqla_table = SqlaTable( table_name="old_dataset", columns=columns, metrics=metrics, database=Database(database_name="my_database", sqlalchemy_uri="sqlite://"), ) session.add(sqla_table) session.flush() # check that the metric was created column = session.query(Column).filter_by(is_physical=False).one() assert column.expression == "COUNT(*)" # change the metric definition sqla_table.metrics[0].expression = "MAX(ds)" session.flush() assert column.expression == "MAX(ds)"
def create_table(self, name, schema="", id=0, cols_names=[], cols_uuids=None, metric_names=[]): database_name = "main" name = "{0}{1}".format(NAME_PREFIX, name) params = {DBREF: id, "database_name": database_name} if cols_uuids is None: cols_uuids = [None] * len(cols_names) dict_rep = { "database_id": get_example_database().id, "table_name": name, "schema": schema, "id": id, "params": json.dumps(params), "columns": [{ "column_name": c, "uuid": u } for c, u in zip(cols_names, cols_uuids)], "metrics": [{ "metric_name": c, "expression": "" } for c in metric_names], } table = SqlaTable(id=id, schema=schema, table_name=name, params=json.dumps(params)) for col_name, uuid in zip(cols_names, cols_uuids): table.columns.append(TableColumn(column_name=col_name, uuid=uuid)) for metric_name in metric_names: table.metrics.append( SqlMetric(metric_name=metric_name, expression="")) return table, dict_rep
def create_table(self, name, schema='', id=0, cols_names=[], metric_names=[]): params = {'remote_id': id, 'database_name': 'main'} table = SqlaTable( id=id, schema=schema, table_name=name, params=json.dumps(params), ) for col_name in cols_names: table.columns.append(TableColumn(column_name=col_name)) for metric_name in metric_names: table.metrics.append(SqlMetric(metric_name=metric_name)) return table
def create_table(self, name, schema='', id=0, cols_names=[], metric_names=[]): database_name = 'main' name = '{0}{1}'.format(NAME_PREFIX, name) params = {DBREF: id, 'database_name': database_name} dict_rep = { 'database_id': get_main_database(db.session).id, 'table_name': name, 'schema': schema, 'id': id, 'params': json.dumps(params), 'columns': [{ 'column_name': c } for c in cols_names], 'metrics': [{ 'metric_name': c, 'expression': '' } for c in metric_names], } table = SqlaTable( id=id, schema=schema, table_name=name, params=json.dumps(params), ) for col_name in cols_names: table.columns.append(TableColumn(column_name=col_name)) for metric_name in metric_names: table.metrics.append( SqlMetric(metric_name=metric_name, expression='')) return table, dict_rep
def test_should_generate_closed_and_open_time_filter_range(): with app.app_context(): if backend() != "postgresql": pytest.skip( f"{backend()} has different dialect for datetime column") table = SqlaTable( table_name="temporal_column_table", sql=("SELECT '2021-12-31'::timestamp as datetime_col " "UNION SELECT '2022-01-01'::timestamp " "UNION SELECT '2022-03-10'::timestamp " "UNION SELECT '2023-01-01'::timestamp " "UNION SELECT '2023-03-10'::timestamp "), database=get_example_database(), ) TableColumn( column_name="datetime_col", type="TIMESTAMP", table=table, is_dttm=True, ) SqlMetric(metric_name="count", expression="count(*)", table=table) result_object = table.query({ "metrics": ["count"], "is_timeseries": False, "filter": [], "from_dttm": datetime(2022, 1, 1), "to_dttm": datetime(2023, 1, 1), "granularity": "datetime_col", }) """ >>> result_object.query SELECT count(*) AS count FROM (SELECT '2021-12-31'::timestamp as datetime_col UNION SELECT '2022-01-01'::timestamp UNION SELECT '2022-03-10'::timestamp UNION SELECT '2023-01-01'::timestamp UNION SELECT '2023-03-10'::timestamp) AS virtual_table WHERE datetime_col >= TO_TIMESTAMP('2022-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US') AND datetime_col < TO_TIMESTAMP('2023-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US') """ assert result_object.df.iloc[0]["count"] == 2
def decode_dashboards(o: Dict[str, Any]) -> Any: """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ if "__Dashboard__" in o: return Dashboard(**o["__Dashboard__"]) if "__Slice__" in o: return Slice(**o["__Slice__"]) if "__TableColumn__" in o: return TableColumn(**o["__TableColumn__"]) if "__SqlaTable__" in o: return SqlaTable(**o["__SqlaTable__"]) if "__SqlMetric__" in o: return SqlMetric(**o["__SqlMetric__"]) if "__datetime__" in o: return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S") return o
def load_birth_names(only_metadata: bool = False, force: bool = False, sample: bool = False) -> None: """Loading birth name dataset from a zip file in the repo""" # pylint: disable=too-many-locals tbl_name = "birth_names" database = get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): load_data(tbl_name, database, sample=sample) obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: print(f"Creating table [{tbl_name}] reference") obj = TBL(table_name=tbl_name) db.session.add(obj) obj.main_dttm_col = "ds" obj.database = database obj.filter_select_enabled = True obj.fetch_metadata() if not any(col.column_name == "num_california" for col in obj.columns): col_state = str(column("state").compile(db.engine)) col_num = str(column("num").compile(db.engine)) obj.columns.append( TableColumn( column_name="num_california", expression= f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END", )) if not any(col.metric_name == "sum__num" for col in obj.metrics): col = str(column("num").compile(db.engine)) obj.metrics.append( SqlMetric(metric_name="sum__num", expression=f"SUM({col})")) db.session.commit() slices, _ = create_slices(obj) create_dashboard(slices)
def virtual_dataset(): from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn dataset = SqlaTable( table_name="virtual_dataset", sql= ("SELECT 0 as col1, 'a' as col2, 1.0 as col3, NULL as col4, '2000-01-01 00:00:00' as col5 " "UNION ALL " "SELECT 1, 'b', 1.1, NULL, '2000-01-02 00:00:00' " "UNION ALL " "SELECT 2 as col1, 'c' as col2, 1.2, NULL, '2000-01-03 00:00:00' " "UNION ALL " "SELECT 3 as col1, 'd' as col2, 1.3, NULL, '2000-01-04 00:00:00' " "UNION ALL " "SELECT 4 as col1, 'e' as col2, 1.4, NULL, '2000-01-05 00:00:00' " "UNION ALL " "SELECT 5 as col1, 'f' as col2, 1.5, NULL, '2000-01-06 00:00:00' " "UNION ALL " "SELECT 6 as col1, 'g' as col2, 1.6, NULL, '2000-01-07 00:00:00' " "UNION ALL " "SELECT 7 as col1, 'h' as col2, 1.7, NULL, '2000-01-08 00:00:00' " "UNION ALL " "SELECT 8 as col1, 'i' as col2, 1.8, NULL, '2000-01-09 00:00:00' " "UNION ALL " "SELECT 9 as col1, 'j' as col2, 1.9, NULL, '2000-01-10 00:00:00' "), database=get_example_database(), ) TableColumn(column_name="col1", type="INTEGER", table=dataset) TableColumn(column_name="col2", type="VARCHAR(255)", table=dataset) TableColumn(column_name="col3", type="DECIMAL(4,2)", table=dataset) TableColumn(column_name="col4", type="VARCHAR(255)", table=dataset) # Different database dialect datetime type is not consistent, so temporarily use varchar TableColumn(column_name="col5", type="VARCHAR(255)", table=dataset) SqlMetric(metric_name="count", expression="count(*)", table=dataset) db.session.merge(dataset) yield dataset db.session.delete(dataset) db.session.commit()
def decode_dashboards(o): """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ import superset.models.core as models if "__Dashboard__" in o: return Dashboard(**o["__Dashboard__"]) elif "__Slice__" in o: return Slice(**o["__Slice__"]) elif "__TableColumn__" in o: return TableColumn(**o["__TableColumn__"]) elif "__SqlaTable__" in o: return SqlaTable(**o["__SqlaTable__"]) elif "__SqlMetric__" in o: return SqlMetric(**o["__SqlMetric__"]) elif "__datetime__" in o: return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S") else: return o
def _add_table_metrics(datasource: "BaseDatasource") -> None: if not any(col.column_name == "num_california" for col in datasource.columns): col_state = str(column("state").compile(db.engine)) col_num = str(column("num").compile(db.engine)) datasource.columns.append( TableColumn( column_name="num_california", expression= f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END", )) if not any(col.metric_name == "sum__num" for col in datasource.metrics): col = str(column("num").compile(db.engine)) datasource.metrics.append( SqlMetric(metric_name="sum__num", expression=f"SUM({col})")) for col in datasource.columns: if col.column_name == "ds": col.is_dttm = True # type: ignore break
def virtual_dataset_comma_in_column_value(): from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn dataset = SqlaTable( table_name="virtual_dataset", sql=("SELECT 'col1,row1' as col1, 'col2, row1' as col2 " "UNION ALL " "SELECT 'col1,row2' as col1, 'col2, row2' as col2 " "UNION ALL " "SELECT 'col1,row3' as col1, 'col2, row3' as col2 "), database=get_example_database(), ) TableColumn(column_name="col1", type="VARCHAR(255)", table=dataset) TableColumn(column_name="col2", type="VARCHAR(255)", table=dataset) SqlMetric(metric_name="count", expression="count(*)", table=dataset) db.session.merge(dataset) yield dataset db.session.delete(dataset) db.session.commit()
def create_table(self, name, schema="", id=0, cols_names=[], metric_names=[]): database_name = "main" name = "{0}{1}".format(NAME_PREFIX, name) params = {DBREF: id, "database_name": database_name} dict_rep = { "database_id": get_main_database().id, "table_name": name, "schema": schema, "id": id, "params": json.dumps(params), "columns": [{"column_name": c} for c in cols_names], "metrics": [{"metric_name": c, "expression": ""} for c in metric_names], } table = SqlaTable( id=id, schema=schema, table_name=name, params=json.dumps(params) ) for col_name in cols_names: table.columns.append(TableColumn(column_name=col_name)) for metric_name in metric_names: table.metrics.append(SqlMetric(metric_name=metric_name, expression="")) return table, dict_rep
def decode_dashboards(o): """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ import superset.models.core as models from superset import security_manager from superset.connectors.sqla.models import ( SqlaTable, SqlMetric, TableColumn, ) if '__Dashboard__' in o: return models.Dashboard(**o['__Dashboard__']) elif '__Slice__' in o: return models.Slice(**o['__Slice__']) elif '__TableColumn__' in o: return TableColumn(**o['__TableColumn__']) elif '__SqlaTable__' in o: return SqlaTable(**o['__SqlaTable__']) elif '__SqlMetric__' in o: return SqlMetric(**o['__SqlMetric__']) elif '__Database__' in o: return models.Database(**o['__Database__']) elif '__User__' in o: return security_manager.user_model(**o['__User__']) elif '__Role__' in o: return security_manager.role_model(**o['__Role__']) elif '__ViewMenu__' in o: return security_manager.viewmenu_model(**o['__ViewMenu__']) elif '__PermissionView__' in o: return security_manager.permissionview_model(**o['__PermissionView__']) elif '__datetime__' in o: return datetime.strptime(o['__datetime__'], '%Y-%m-%dT%H:%M:%S') else: return o
def decode_dashboards( # pylint: disable=too-many-return-statements o: Dict[str, Any]) -> Any: """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ # pylint: disable=import-outside-toplevel from superset.connectors.druid.models import ( DruidCluster, DruidColumn, DruidDatasource, DruidMetric, ) if "__Dashboard__" in o: return Dashboard(**o["__Dashboard__"]) if "__Slice__" in o: return Slice(**o["__Slice__"]) if "__TableColumn__" in o: return TableColumn(**o["__TableColumn__"]) if "__SqlaTable__" in o: return SqlaTable(**o["__SqlaTable__"]) if "__SqlMetric__" in o: return SqlMetric(**o["__SqlMetric__"]) if "__DruidCluster__" in o: return DruidCluster(**o["__DruidCluster__"]) if "__DruidColumn__" in o: return DruidColumn(**o["__DruidColumn__"]) if "__DruidDatasource__" in o: return DruidDatasource(**o["__DruidDatasource__"]) if "__DruidMetric__" in o: return DruidMetric(**o["__DruidMetric__"]) if "__datetime__" in o: return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S") return o
def _create_energy_table(): table = create_table_metadata( table_name=ENERGY_USAGE_TBL_NAME, database=get_example_database(), table_description="Energy consumption", ) table.fetch_metadata() if not any(col.metric_name == "sum__value" for col in table.metrics): col = str(column("value").compile(db.engine)) table.metrics.append( SqlMetric(metric_name="sum__value", expression=f"SUM({col})")) db.session.merge(table) db.session.commit() table.fetch_metadata() for slice_data in _get_energy_slices(): _create_and_commit_energy_slice( table, slice_data["slice_title"], slice_data["viz_type"], slice_data["params"], )
def test_query_cache_key_changes_when_metric_is_updated(self): self.login(username="******") payload = get_query_context("birth_names") # make temporary change and revert it to refresh the changed_on property datasource = ConnectorRegistry.get_datasource( datasource_type=payload["datasource"]["type"], datasource_id=payload["datasource"]["id"], session=db.session, ) datasource.metrics.append( SqlMetric(metric_name="foo", expression="select 1;")) db.session.commit() # construct baseline query_cache_key query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] cache_key_original = query_context.query_cache_key(query_object) # wait a second since mysql records timestamps in second granularity time.sleep(1) datasource.metrics[0].expression = "select 2;" db.session.commit() # create new QueryContext with unchanged attributes, extract new query_cache_key query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] cache_key_new = query_context.query_cache_key(query_object) datasource.metrics = [] db.session.commit() # the new cache_key should be different due to updated datasource self.assertNotEqual(cache_key_original, cache_key_new)
def decode_dashboards(o): """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ import superset.models.core as models from superset.connectors.sqla.models import ( SqlaTable, SqlMetric, TableColumn, ) if '__Dashboard__' in o: d = models.Dashboard() d.__dict__.update(o['__Dashboard__']) return d elif '__Slice__' in o: d = models.Slice() d.__dict__.update(o['__Slice__']) return d elif '__TableColumn__' in o: d = TableColumn() d.__dict__.update(o['__TableColumn__']) return d elif '__SqlaTable__' in o: d = SqlaTable() d.__dict__.update(o['__SqlaTable__']) return d elif '__SqlMetric__' in o: d = SqlMetric() d.__dict__.update(o['__SqlMetric__']) return d elif '__datetime__' in o: return datetime.strptime(o['__datetime__'], '%Y-%m-%dT%H:%M:%S') else: return o
def load_birth_names(): """Loading birth name dataset from a zip file in the repo""" data = get_example_data('birth_names.json.gz') pdf = pd.read_json(data) pdf.ds = pd.to_datetime(pdf.ds, unit='ms') pdf.to_sql('birth_names', db.engine, if_exists='replace', chunksize=500, dtype={ 'ds': DateTime, 'gender': String(16), 'state': String(10), 'name': String(255), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table [birth_names] reference') obj = db.session.query(TBL).filter_by(table_name='birth_names').first() if not obj: obj = TBL(table_name='birth_names') obj.main_dttm_col = 'ds' obj.database = get_or_create_main_db() obj.filter_select_enabled = True if not any(col.column_name == 'num_california' for col in obj.columns): obj.columns.append( TableColumn( column_name='num_california', expression="CASE WHEN state = 'CA' THEN num ELSE 0 END", )) if not any(col.metric_name == 'sum__num' for col in obj.metrics): obj.metrics.append( SqlMetric( metric_name='sum__num', expression='SUM(num)', )) db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj defaults = { 'compare_lag': '10', 'compare_suffix': 'o10Y', 'limit': '25', 'granularity_sqla': 'ds', 'groupby': [], 'metric': 'sum__num', 'metrics': ['sum__num'], 'row_limit': config.get('ROW_LIMIT'), 'since': '100 years ago', 'until': 'now', 'viz_type': 'table', 'where': '', 'markup_type': 'markdown', } admin = security_manager.find_user('admin') print('Creating some slices') slices = [ Slice(slice_name='Girls', viz_type='table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, groupby=['name'], filters=[{ 'col': 'gender', 'op': 'in', 'val': ['girl'], }], row_limit=50, timeseries_limit_metric='sum__num')), Slice(slice_name='Boys', viz_type='table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, groupby=['name'], filters=[{ 'col': 'gender', 'op': 'in', 'val': ['boy'], }], row_limit=50)), Slice(slice_name='Participants', viz_type='big_number', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='big_number', granularity_sqla='ds', compare_lag='5', compare_suffix='over 5Y')), Slice(slice_name='Genders', viz_type='pie', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='pie', groupby=['gender'])), Slice(slice_name='Genders by State', viz_type='dist_bar', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, adhoc_filters=[ { 'clause': 'WHERE', 'expressionType': 'SIMPLE', 'filterOptionName': '2745eae5', 'comparator': ['other'], 'operator': 'not in', 'subject': 'state', }, ], viz_type='dist_bar', metrics=[ { 'expressionType': 'SIMPLE', 'column': { 'column_name': 'sum_boys', 'type': 'BIGINT(20)', }, 'aggregate': 'SUM', 'label': 'Boys', 'optionName': 'metric_11', }, { 'expressionType': 'SIMPLE', 'column': { 'column_name': 'sum_girls', 'type': 'BIGINT(20)', }, 'aggregate': 'SUM', 'label': 'Girls', 'optionName': 'metric_12', }, ], groupby=['state'])), Slice(slice_name='Trends', viz_type='line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='line', groupby=['name'], granularity_sqla='ds', rich_tooltip=True, show_legend=True)), Slice(slice_name='Average and Sum Trends', viz_type='dual_line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='dual_line', metric={ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num', 'type': 'BIGINT(20)', }, 'aggregate': 'AVG', 'label': 'AVG(num)', 'optionName': 'metric_vgops097wej_g8uff99zhk7', }, metric_2='sum__num', granularity_sqla='ds')), Slice(slice_name='Title', viz_type='markup', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='markup', markup_type='html', code="""\ <div style='text-align:center'> <h1>Birth Names Dashboard</h1> <p> The source dataset came from <a href='https://github.com/hadley/babynames' target='_blank'>[here]</a> </p> <img src='/static/assets/images/babytux.jpg'> </div> """)), Slice(slice_name='Name Cloud', viz_type='word_cloud', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='word_cloud', size_from='10', series='name', size_to='70', rotation='square', limit='100')), Slice(slice_name='Pivot Table', viz_type='pivot_table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='pivot_table', metrics=['sum__num'], groupby=['name'], columns=['state'])), Slice(slice_name='Number of Girls', viz_type='big_number_total', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='big_number_total', granularity_sqla='ds', filters=[{ 'col': 'gender', 'op': 'in', 'val': ['girl'], }], subheader='total female participants')), Slice(slice_name='Number of California Births', viz_type='big_number_total', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, metric={ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num_california', 'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, 'aggregate': 'SUM', 'label': 'SUM(num_california)', }, viz_type='big_number_total', granularity_sqla='ds')), Slice(slice_name='Top 10 California Names Timeseries', viz_type='line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, metrics=[{ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num_california', 'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, 'aggregate': 'SUM', 'label': 'SUM(num_california)', }], viz_type='line', granularity_sqla='ds', groupby=['name'], timeseries_limit_metric={ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num_california', 'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, 'aggregate': 'SUM', 'label': 'SUM(num_california)', }, limit='10')), Slice(slice_name='Names Sorted by Num in California', viz_type='table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, groupby=['name'], row_limit=50, timeseries_limit_metric={ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num_california', 'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, 'aggregate': 'SUM', 'label': 'SUM(num_california)', })), Slice(slice_name='Num Births Trend', viz_type='line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='line')), Slice(slice_name='Daily Totals', viz_type='table', datasource_type='table', datasource_id=tbl.id, created_by=admin, params=get_slice_json(defaults, groupby=['ds'], since='40 years ago', until='now', viz_type='table')), ] for slc in slices: merge_slice(slc) print('Creating a dashboard') dash = db.session.query(Dash).filter_by(dashboard_title='Births').first() if not dash: dash = Dash() js = textwrap.dedent("""\ { "CHART-0dd270f0": { "meta": { "chartId": 51, "width": 2, "height": 50 }, "type": "CHART", "id": "CHART-0dd270f0", "children": [] }, "CHART-a3c21bcc": { "meta": { "chartId": 52, "width": 2, "height": 50 }, "type": "CHART", "id": "CHART-a3c21bcc", "children": [] }, "CHART-976960a5": { "meta": { "chartId": 53, "width": 2, "height": 25 }, "type": "CHART", "id": "CHART-976960a5", "children": [] }, "CHART-58575537": { "meta": { "chartId": 54, "width": 2, "height": 25 }, "type": "CHART", "id": "CHART-58575537", "children": [] }, "CHART-e9cd8f0b": { "meta": { "chartId": 55, "width": 8, "height": 38 }, "type": "CHART", "id": "CHART-e9cd8f0b", "children": [] }, "CHART-e440d205": { "meta": { "chartId": 56, "width": 8, "height": 50 }, "type": "CHART", "id": "CHART-e440d205", "children": [] }, "CHART-59444e0b": { "meta": { "chartId": 57, "width": 3, "height": 38 }, "type": "CHART", "id": "CHART-59444e0b", "children": [] }, "CHART-e2cb4997": { "meta": { "chartId": 59, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-e2cb4997", "children": [] }, "CHART-e8774b49": { "meta": { "chartId": 60, "width": 12, "height": 50 }, "type": "CHART", "id": "CHART-e8774b49", "children": [] }, "CHART-985bfd1e": { "meta": { "chartId": 61, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-985bfd1e", "children": [] }, "CHART-17f13246": { "meta": { "chartId": 62, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-17f13246", "children": [] }, "CHART-729324f6": { "meta": { "chartId": 63, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-729324f6", "children": [] }, "COLUMN-25a865d6": { "meta": { "width": 4, "background": "BACKGROUND_TRANSPARENT" }, "type": "COLUMN", "id": "COLUMN-25a865d6", "children": [ "ROW-cc97c6ac", "CHART-e2cb4997" ] }, "COLUMN-4557b6ba": { "meta": { "width": 8, "background": "BACKGROUND_TRANSPARENT" }, "type": "COLUMN", "id": "COLUMN-4557b6ba", "children": [ "ROW-d2e78e59", "CHART-e9cd8f0b" ] }, "GRID_ID": { "type": "GRID", "id": "GRID_ID", "children": [ "ROW-8515ace3", "ROW-1890385f", "ROW-f0b64094", "ROW-be9526b8" ] }, "HEADER_ID": { "meta": { "text": "Births" }, "type": "HEADER", "id": "HEADER_ID" }, "MARKDOWN-00178c27": { "meta": { "width": 5, "code": "<div style=\\"text-align:center\\">\\n <h1>Birth Names Dashboard</h1>\\n <p>\\n The source dataset came from\\n <a href=\\"https://github.com/hadley/babynames\\" target=\\"_blank\\">[here]</a>\\n </p>\\n <img src=\\"/static/assets/images/babytux.jpg\\">\\n</div>\\n", "height": 38 }, "type": "MARKDOWN", "id": "MARKDOWN-00178c27", "children": [] }, "ROOT_ID": { "type": "ROOT", "id": "ROOT_ID", "children": [ "GRID_ID" ] }, "ROW-1890385f": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-1890385f", "children": [ "CHART-e440d205", "CHART-0dd270f0", "CHART-a3c21bcc" ] }, "ROW-8515ace3": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-8515ace3", "children": [ "COLUMN-25a865d6", "COLUMN-4557b6ba" ] }, "ROW-be9526b8": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-be9526b8", "children": [ "CHART-985bfd1e", "CHART-17f13246", "CHART-729324f6" ] }, "ROW-cc97c6ac": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-cc97c6ac", "children": [ "CHART-976960a5", "CHART-58575537" ] }, "ROW-d2e78e59": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-d2e78e59", "children": [ "MARKDOWN-00178c27", "CHART-59444e0b" ] }, "ROW-f0b64094": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-f0b64094", "children": [ "CHART-e8774b49" ] }, "DASHBOARD_VERSION_KEY": "v2" } """) pos = json.loads(js) # dashboard v2 doesn't allow add markup slice dash.slices = [slc for slc in slices if slc.viz_type != 'markup'] update_slice_ids(pos, dash.slices) dash.dashboard_title = 'Births' dash.position_json = json.dumps(pos, indent=4) dash.slug = 'births' db.session.merge(dash) db.session.commit()
def test_dataset_macro(mocker: MockFixture, app_context: None) -> None: """ Test the ``dataset_macro`` macro. """ # pylint: disable=import-outside-toplevel from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn from superset.models.core import Database columns = [ TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"), TableColumn(column_name="num_boys", type="INTEGER"), TableColumn(column_name="revenue", type="INTEGER"), TableColumn(column_name="expenses", type="INTEGER"), TableColumn(column_name="profit", type="INTEGER", expression="revenue-expenses"), ] metrics = [ SqlMetric(metric_name="cnt", expression="COUNT(*)"), ] dataset = SqlaTable( table_name="old_dataset", columns=columns, metrics=metrics, main_dttm_col="ds", default_endpoint= "https://www.youtube.com/watch?v=dQw4w9WgXcQ", # not used database=Database(database_name="my_database", sqlalchemy_uri="sqlite://"), offset=-8, description="This is the description", is_featured=1, cache_timeout=3600, schema="my_schema", sql=None, params=json.dumps({ "remote_id": 64, "database_name": "examples", "import_time": 1606677834, }), perm=None, filter_select_enabled=1, fetch_values_predicate="foo IN (1, 2)", is_sqllab_view=0, # no longer used? template_params=json.dumps({"answer": "42"}), schema_perm=None, extra=json.dumps({"warning_markdown": "*WARNING*"}), ) DatasetDAO = mocker.patch("superset.datasets.dao.DatasetDAO") DatasetDAO.find_by_id.return_value = dataset assert (dataset_macro(1) == """(SELECT ds AS ds, num_boys AS num_boys, revenue AS revenue, expenses AS expenses, revenue-expenses AS profit FROM my_schema.old_dataset) AS dataset_1""") assert (dataset_macro(1, include_metrics=True) == """(SELECT ds AS ds, num_boys AS num_boys, revenue AS revenue, expenses AS expenses, revenue-expenses AS profit, COUNT(*) AS cnt FROM my_schema.old_dataset GROUP BY ds, num_boys, revenue, expenses, revenue-expenses) AS dataset_1""") assert (dataset_macro(1, include_metrics=True, columns=["ds"]) == """(SELECT ds AS ds, COUNT(*) AS cnt FROM my_schema.old_dataset GROUP BY ds) AS dataset_1""") DatasetDAO.find_by_id.return_value = None with pytest.raises(DatasetNotFoundError) as excinfo: dataset_macro(1) assert str(excinfo.value) == "Dataset 1 not found!"
def load_birth_names(only_metadata=False, force=False): """Loading birth name dataset from a zip file in the repo""" # pylint: disable=too-many-locals tbl_name = "birth_names" database = get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): pdf = pd.read_json(get_example_data("birth_names.json.gz")) pdf.ds = pd.to_datetime(pdf.ds, unit="ms") pdf.to_sql( tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=500, dtype={ "ds": DateTime, "gender": String(16), "state": String(10), "name": String(255), }, index=False, ) print("Done loading table!") print("-" * 80) obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: print(f"Creating table [{tbl_name}] reference") obj = TBL(table_name=tbl_name) db.session.add(obj) obj.main_dttm_col = "ds" obj.database = database obj.filter_select_enabled = True if not any(col.column_name == "num_california" for col in obj.columns): col_state = str(column("state").compile(db.engine)) col_num = str(column("num").compile(db.engine)) obj.columns.append( TableColumn( column_name="num_california", expression= f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END", )) if not any(col.metric_name == "sum__num" for col in obj.metrics): col = str(column("num").compile(db.engine)) obj.metrics.append( SqlMetric(metric_name="sum__num", expression=f"SUM({col})")) db.session.commit() obj.fetch_metadata() tbl = obj defaults = { "compare_lag": "10", "compare_suffix": "o10Y", "limit": "25", "granularity_sqla": "ds", "groupby": [], "metric": "sum__num", "metrics": [{ "expressionType": "SIMPLE", "column": { "column_name": "num", "type": "BIGINT" }, "aggregate": "SUM", "label": "Births", "optionName": "metric_11", }], "row_limit": config.get("ROW_LIMIT"), "since": "100 years ago", "until": "now", "viz_type": "table", "where": "", "markup_type": "markdown", } admin = security_manager.find_user("admin") print("Creating some slices") slices = [ Slice( slice_name="Girls", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], filters=[{ "col": "gender", "op": "in", "val": ["girl"] }], row_limit=50, timeseries_limit_metric="sum__num", ), ), Slice( slice_name="Boys", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], filters=[{ "col": "gender", "op": "in", "val": ["boy"] }], row_limit=50, ), ), Slice( slice_name="Participants", viz_type="big_number", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="big_number", granularity_sqla="ds", compare_lag="5", compare_suffix="over 5Y", ), ), Slice( slice_name="Genders", viz_type="pie", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="pie", groupby=["gender"]), ), Slice( slice_name="Genders by State", viz_type="dist_bar", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, adhoc_filters=[{ "clause": "WHERE", "expressionType": "SIMPLE", "filterOptionName": "2745eae5", "comparator": ["other"], "operator": "not in", "subject": "state", }], viz_type="dist_bar", metrics=[ { "expressionType": "SIMPLE", "column": { "column_name": "sum_boys", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Boys", "optionName": "metric_11", }, { "expressionType": "SIMPLE", "column": { "column_name": "sum_girls", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Girls", "optionName": "metric_12", }, ], groupby=["state"], ), ), Slice( slice_name="Trends", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="line", groupby=["name"], granularity_sqla="ds", rich_tooltip=True, show_legend=True, ), ), Slice( slice_name="Average and Sum Trends", viz_type="dual_line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="dual_line", metric={ "expressionType": "SIMPLE", "column": { "column_name": "num", "type": "BIGINT(20)" }, "aggregate": "AVG", "label": "AVG(num)", "optionName": "metric_vgops097wej_g8uff99zhk7", }, metric_2="sum__num", granularity_sqla="ds", ), ), Slice( slice_name="Title", viz_type="markup", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="markup", markup_type="html", code="""\ <div style='text-align:center'> <h1>Birth Names Dashboard</h1> <p> The source dataset came from <a href='https://github.com/hadley/babynames' target='_blank'>[here]</a> </p> <img src="/static/assets/images/babies.png"> </div> """, ), ), Slice( slice_name="Name Cloud", viz_type="word_cloud", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="word_cloud", size_from="10", series="name", size_to="70", rotation="square", limit="100", ), ), Slice( slice_name="Pivot Table", viz_type="pivot_table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="pivot_table", groupby=["name"], columns=["state"]), ), Slice( slice_name="Number of Girls", viz_type="big_number_total", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="big_number_total", granularity_sqla="ds", filters=[{ "col": "gender", "op": "in", "val": ["girl"] }], subheader="total female participants", ), ), Slice( slice_name="Number of California Births", viz_type="big_number_total", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, viz_type="big_number_total", granularity_sqla="ds", ), ), Slice( slice_name="Top 10 California Names Timeseries", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metrics=[{ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }], viz_type="line", granularity_sqla="ds", groupby=["name"], timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, limit="10", ), ), Slice( slice_name="Names Sorted by Num in California", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], row_limit=50, timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, ), ), Slice( slice_name="Num Births Trend", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="line"), ), Slice( slice_name="Daily Totals", viz_type="table", datasource_type="table", datasource_id=tbl.id, created_by=admin, params=get_slice_json( defaults, groupby=["ds"], since="40 years ago", until="now", viz_type="table", ), ), ] for slc in slices: merge_slice(slc) print("Creating a dashboard") dash = db.session.query(Dash).filter_by(slug="births").first() if not dash: dash = Dash() db.session.add(dash) dash.published = True js = textwrap.dedent( # pylint: disable=line-too-long """\ { "CHART-0dd270f0": { "meta": { "chartId": 51, "width": 2, "height": 50 }, "type": "CHART", "id": "CHART-0dd270f0", "children": [] }, "CHART-a3c21bcc": { "meta": { "chartId": 52, "width": 2, "height": 50 }, "type": "CHART", "id": "CHART-a3c21bcc", "children": [] }, "CHART-976960a5": { "meta": { "chartId": 53, "width": 2, "height": 25 }, "type": "CHART", "id": "CHART-976960a5", "children": [] }, "CHART-58575537": { "meta": { "chartId": 54, "width": 2, "height": 25 }, "type": "CHART", "id": "CHART-58575537", "children": [] }, "CHART-e9cd8f0b": { "meta": { "chartId": 55, "width": 8, "height": 38 }, "type": "CHART", "id": "CHART-e9cd8f0b", "children": [] }, "CHART-e440d205": { "meta": { "chartId": 56, "width": 8, "height": 50 }, "type": "CHART", "id": "CHART-e440d205", "children": [] }, "CHART-59444e0b": { "meta": { "chartId": 57, "width": 3, "height": 38 }, "type": "CHART", "id": "CHART-59444e0b", "children": [] }, "CHART-e2cb4997": { "meta": { "chartId": 59, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-e2cb4997", "children": [] }, "CHART-e8774b49": { "meta": { "chartId": 60, "width": 12, "height": 50 }, "type": "CHART", "id": "CHART-e8774b49", "children": [] }, "CHART-985bfd1e": { "meta": { "chartId": 61, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-985bfd1e", "children": [] }, "CHART-17f13246": { "meta": { "chartId": 62, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-17f13246", "children": [] }, "CHART-729324f6": { "meta": { "chartId": 63, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-729324f6", "children": [] }, "COLUMN-25a865d6": { "meta": { "width": 4, "background": "BACKGROUND_TRANSPARENT" }, "type": "COLUMN", "id": "COLUMN-25a865d6", "children": [ "ROW-cc97c6ac", "CHART-e2cb4997" ] }, "COLUMN-4557b6ba": { "meta": { "width": 8, "background": "BACKGROUND_TRANSPARENT" }, "type": "COLUMN", "id": "COLUMN-4557b6ba", "children": [ "ROW-d2e78e59", "CHART-e9cd8f0b" ] }, "GRID_ID": { "type": "GRID", "id": "GRID_ID", "children": [ "ROW-8515ace3", "ROW-1890385f", "ROW-f0b64094", "ROW-be9526b8" ] }, "HEADER_ID": { "meta": { "text": "Births" }, "type": "HEADER", "id": "HEADER_ID" }, "MARKDOWN-00178c27": { "meta": { "width": 5, "code": "<div style=\\"text-align:center\\">\\n <h1>Birth Names Dashboard</h1>\\n <p>\\n The source dataset came from\\n <a href=\\"https://github.com/hadley/babynames\\" target=\\"_blank\\">[here]</a>\\n </p>\\n <img src=\\"/static/assets/images/babies.png\\" style=\\"width:55%;\\">\\n</div>\\n", "height": 38 }, "type": "MARKDOWN", "id": "MARKDOWN-00178c27", "children": [] }, "ROOT_ID": { "type": "ROOT", "id": "ROOT_ID", "children": [ "GRID_ID" ] }, "ROW-1890385f": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-1890385f", "children": [ "CHART-e440d205", "CHART-0dd270f0", "CHART-a3c21bcc" ] }, "ROW-8515ace3": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-8515ace3", "children": [ "COLUMN-25a865d6", "COLUMN-4557b6ba" ] }, "ROW-be9526b8": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-be9526b8", "children": [ "CHART-985bfd1e", "CHART-17f13246", "CHART-729324f6" ] }, "ROW-cc97c6ac": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-cc97c6ac", "children": [ "CHART-976960a5", "CHART-58575537" ] }, "ROW-d2e78e59": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-d2e78e59", "children": [ "MARKDOWN-00178c27", "CHART-59444e0b" ] }, "ROW-f0b64094": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-f0b64094", "children": [ "CHART-e8774b49" ] }, "DASHBOARD_VERSION_KEY": "v2" } """ # pylint: enable=line-too-long ) pos = json.loads(js) # dashboard v2 doesn't allow add markup slice dash.slices = [slc for slc in slices if slc.viz_type != "markup"] update_slice_ids(pos, dash.slices) dash.dashboard_title = "Births" dash.position_json = json.dumps(pos, indent=4) dash.slug = "births" db.session.commit()
def load_country_map_data(only_metadata: bool = False, force: bool = False) -> None: """Loading data for map with country map""" tbl_name = "birth_france_by_region" database = utils.get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): csv_bytes = get_example_data("birth_france_data_for_country_map.csv", make_bytes=False) data = pd.read_csv(csv_bytes, encoding="utf-8") data["dttm"] = datetime.datetime.now().date() data.to_sql( # pylint: disable=no-member tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=500, dtype={ "DEPT_ID": String(10), "2003": BigInteger, "2004": BigInteger, "2005": BigInteger, "2006": BigInteger, "2007": BigInteger, "2008": BigInteger, "2009": BigInteger, "2010": BigInteger, "2011": BigInteger, "2012": BigInteger, "2013": BigInteger, "2014": BigInteger, "dttm": Date(), }, index=False, ) print("Done loading table!") print("-" * 80) print("Creating table reference") obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: obj = TBL(table_name=tbl_name) obj.main_dttm_col = "dttm" obj.database = database if not any(col.metric_name == "avg__2004" for col in obj.metrics): col = str(column("2004").compile(db.engine)) obj.metrics.append( SqlMetric(metric_name="avg__2004", expression=f"AVG({col})")) db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { "granularity_sqla": "", "since": "", "until": "", "viz_type": "country_map", "entity": "DEPT_ID", "metric": { "expressionType": "SIMPLE", "column": { "type": "INT", "column_name": "2004" }, "aggregate": "AVG", "label": "Boys", "optionName": "metric_112342", }, "row_limit": 500000, } print("Creating a slice") slc = Slice( slice_name="Birth in France by department in 2016", viz_type="country_map", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_world_bank_health_n_pop( # pylint: disable=too-many-locals, too-many-statements only_metadata: bool = False, force: bool = False, sample: bool = False, ) -> None: """Loads the world bank health dataset, slices and a dashboard""" tbl_name = "wb_health_population" database = superset.utils.database.get_example_database() engine = database.get_sqla_engine() schema = inspect(engine).default_schema_name table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): data = get_example_data("countries.json.gz") pdf = pd.read_json(data) pdf.columns = [col.replace(".", "_") for col in pdf.columns] if database.backend == "presto": pdf.year = pd.to_datetime(pdf.year) pdf.year = pdf.year.dt.strftime("%Y-%m-%d %H:%M%:%S") else: pdf.year = pd.to_datetime(pdf.year) pdf = pdf.head(100) if sample else pdf pdf.to_sql( tbl_name, engine, schema=schema, if_exists="replace", chunksize=50, dtype={ # TODO(bkyryliuk): use TIMESTAMP type for presto "year": DateTime if database.backend != "presto" else String(255), "country_code": String(3), "country_name": String(255), "region": String(255), }, method="multi", index=False, ) print("Creating table [wb_health_population] reference") table = get_table_connector_registry() tbl = db.session.query(table).filter_by(table_name=tbl_name).first() if not tbl: tbl = table(table_name=tbl_name, schema=schema) tbl.description = utils.readfile( os.path.join(get_examples_folder(), "countries.md") ) tbl.main_dttm_col = "year" tbl.database = database tbl.filter_select_enabled = True metrics = [ "sum__SP_POP_TOTL", "sum__SH_DYN_AIDS", "sum__SH_DYN_AIDS", "sum__SP_RUR_TOTL_ZS", "sum__SP_DYN_LE00_IN", "sum__SP_RUR_TOTL", ] for metric in metrics: if not any(col.metric_name == metric for col in tbl.metrics): aggr_func = metric[:3] col = str(column(metric[5:]).compile(db.engine)) tbl.metrics.append( SqlMetric(metric_name=metric, expression=f"{aggr_func}({col})") ) db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() slices = create_slices(tbl) misc_dash_slices.add(slices[-1].slice_name) for slc in slices: merge_slice(slc) print("Creating a World's Health Bank dashboard") dash_name = "World Bank's Data" slug = "world_health" dash = db.session.query(Dashboard).filter_by(slug=slug).first() if not dash: dash = Dashboard() dash.published = True pos = dashboard_positions update_slice_ids(pos, slices) dash.dashboard_title = dash_name dash.position_json = json.dumps(pos, indent=4) dash.slug = slug dash.slices = slices[:-1] db.session.merge(dash) db.session.commit()
def load_world_bank_health_n_pop(only_metadata=False, force=False): # pylint: disable=too-many-locals """Loads the world bank health dataset, slices and a dashboard""" tbl_name = "wb_health_population" database = utils.get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): data = get_example_data("countries.json.gz") pdf = pd.read_json(data) pdf.columns = [col.replace(".", "_") for col in pdf.columns] pdf.year = pd.to_datetime(pdf.year) pdf.to_sql( tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=50, dtype={ "year": DateTime(), "country_code": String(3), "country_name": String(255), "region": String(255), }, index=False, ) print("Creating table [wb_health_population] reference") tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = utils.readfile( os.path.join(EXAMPLES_FOLDER, "countries.md")) tbl.main_dttm_col = "year" tbl.database = database tbl.filter_select_enabled = True metrics = [ "sum__SP_POP_TOTL", "sum__SH_DYN_AIDS", "sum__SH_DYN_AIDS", "sum__SP_RUR_TOTL_ZS", "sum__SP_DYN_LE00_IN", "sum__SP_RUR_TOTL", ] for metric in metrics: if not any(col.metric_name == metric for col in tbl.metrics): aggr_func = metric[:3] col = str(column(metric[5:]).compile(db.engine)) tbl.metrics.append( SqlMetric(metric_name=metric, expression=f"{aggr_func}({col})")) db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() defaults = { "compare_lag": "10", "compare_suffix": "o10Y", "limit": "25", "granularity_sqla": "year", "groupby": [], "metric": "sum__SP_POP_TOTL", "metrics": ["sum__SP_POP_TOTL"], "row_limit": config["ROW_LIMIT"], "since": "2014-01-01", "until": "2014-01-02", "time_range": "2014-01-01 : 2014-01-02", "markup_type": "markdown", "country_fieldtype": "cca3", "secondary_metric": { "aggregate": "SUM", "column": { "column_name": "SP_RUR_TOTL", "optionName": "_col_SP_RUR_TOTL", "type": "DOUBLE", }, "expressionType": "SIMPLE", "hasCustomLabel": True, "label": "Rural Population", }, "entity": "country_code", "show_bubbles": True, } print("Creating slices") slices = [ Slice( slice_name="Region Filter", viz_type="filter_box", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="filter_box", date_filter=False, filter_configs=[ { "asc": False, "clearable": True, "column": "region", "key": "2s98dfu", "metric": "sum__SP_POP_TOTL", "multiple": True, }, { "asc": False, "clearable": True, "key": "li3j2lk", "column": "country_name", "metric": "sum__SP_POP_TOTL", "multiple": True, }, ], ), ), Slice( slice_name="World's Population", viz_type="big_number", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="2000", viz_type="big_number", compare_lag="10", metric="sum__SP_POP_TOTL", compare_suffix="over 10Y", ), ), Slice( slice_name="Most Populated Countries", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="table", metrics=["sum__SP_POP_TOTL"], groupby=["country_name"], ), ), Slice( slice_name="Growth Rate", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="line", since="1960-01-01", metrics=["sum__SP_POP_TOTL"], num_period_compare="10", groupby=["country_name"], ), ), Slice( slice_name="% Rural", viz_type="world_map", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="world_map", metric="sum__SP_RUR_TOTL_ZS", num_period_compare="10", ), ), Slice( slice_name="Life Expectancy VS Rural %", viz_type="bubble", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="bubble", since="2011-01-01", until="2011-01-02", series="region", limit=0, entity="country_name", x="sum__SP_RUR_TOTL_ZS", y="sum__SP_DYN_LE00_IN", size="sum__SP_POP_TOTL", max_bubble_size="50", adhoc_filters=[{ "clause": "WHERE", "expressionType": "SIMPLE", "filterOptionName": "2745eae5", "comparator": [ "TCA", "MNP", "DMA", "MHL", "MCO", "SXM", "CYM", "TUV", "IMY", "KNA", "ASM", "ADO", "AMA", "PLW", ], "operator": "not in", "subject": "country_code", }], ), ), Slice( slice_name="Rural Breakdown", viz_type="sunburst", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="sunburst", groupby=["region", "country_name"], since="2011-01-01", until="2011-01-01", ), ), Slice( slice_name="World's Pop Growth", viz_type="area", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="1960-01-01", until="now", viz_type="area", groupby=["region"], ), ), Slice( slice_name="Box plot", viz_type="box_plot", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="1960-01-01", until="now", whisker_options="Min/max (no outliers)", x_ticks_layout="staggered", viz_type="box_plot", groupby=["region"], ), ), Slice( slice_name="Treemap", viz_type="treemap", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="1960-01-01", until="now", viz_type="treemap", metrics=["sum__SP_POP_TOTL"], groupby=["region", "country_code"], ), ), Slice( slice_name="Parallel Coordinates", viz_type="para", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="2011-01-01", until="2011-01-01", viz_type="para", limit=100, metrics=[ "sum__SP_POP_TOTL", "sum__SP_RUR_TOTL_ZS", "sum__SH_DYN_AIDS" ], secondary_metric="sum__SP_POP_TOTL", series="country_name", ), ), ] misc_dash_slices.add(slices[-1].slice_name) for slc in slices: merge_slice(slc) print("Creating a World's Health Bank dashboard") dash_name = "World Bank's Data" slug = "world_health" dash = db.session.query(Dashboard).filter_by(slug=slug).first() if not dash: dash = Dashboard() dash.published = True js = textwrap.dedent("""\ { "CHART-36bfc934": { "children": [], "id": "CHART-36bfc934", "meta": { "chartId": 40, "height": 25, "sliceName": "Region Filter", "width": 2 }, "type": "CHART" }, "CHART-37982887": { "children": [], "id": "CHART-37982887", "meta": { "chartId": 41, "height": 25, "sliceName": "World's Population", "width": 2 }, "type": "CHART" }, "CHART-17e0f8d8": { "children": [], "id": "CHART-17e0f8d8", "meta": { "chartId": 42, "height": 92, "sliceName": "Most Populated Countries", "width": 3 }, "type": "CHART" }, "CHART-2ee52f30": { "children": [], "id": "CHART-2ee52f30", "meta": { "chartId": 43, "height": 38, "sliceName": "Growth Rate", "width": 6 }, "type": "CHART" }, "CHART-2d5b6871": { "children": [], "id": "CHART-2d5b6871", "meta": { "chartId": 44, "height": 52, "sliceName": "% Rural", "width": 7 }, "type": "CHART" }, "CHART-0fd0d252": { "children": [], "id": "CHART-0fd0d252", "meta": { "chartId": 45, "height": 50, "sliceName": "Life Expectancy VS Rural %", "width": 8 }, "type": "CHART" }, "CHART-97f4cb48": { "children": [], "id": "CHART-97f4cb48", "meta": { "chartId": 46, "height": 38, "sliceName": "Rural Breakdown", "width": 3 }, "type": "CHART" }, "CHART-b5e05d6f": { "children": [], "id": "CHART-b5e05d6f", "meta": { "chartId": 47, "height": 50, "sliceName": "World's Pop Growth", "width": 4 }, "type": "CHART" }, "CHART-e76e9f5f": { "children": [], "id": "CHART-e76e9f5f", "meta": { "chartId": 48, "height": 50, "sliceName": "Box plot", "width": 4 }, "type": "CHART" }, "CHART-a4808bba": { "children": [], "id": "CHART-a4808bba", "meta": { "chartId": 49, "height": 50, "sliceName": "Treemap", "width": 8 }, "type": "CHART" }, "COLUMN-071bbbad": { "children": [ "ROW-1e064e3c", "ROW-afdefba9" ], "id": "COLUMN-071bbbad", "meta": { "background": "BACKGROUND_TRANSPARENT", "width": 9 }, "type": "COLUMN" }, "COLUMN-fe3914b8": { "children": [ "CHART-36bfc934", "CHART-37982887" ], "id": "COLUMN-fe3914b8", "meta": { "background": "BACKGROUND_TRANSPARENT", "width": 2 }, "type": "COLUMN" }, "GRID_ID": { "children": [ "ROW-46632bc2", "ROW-3fa26c5d", "ROW-812b3f13" ], "id": "GRID_ID", "type": "GRID" }, "HEADER_ID": { "id": "HEADER_ID", "meta": { "text": "World's Bank Data" }, "type": "HEADER" }, "ROOT_ID": { "children": [ "GRID_ID" ], "id": "ROOT_ID", "type": "ROOT" }, "ROW-1e064e3c": { "children": [ "COLUMN-fe3914b8", "CHART-2d5b6871" ], "id": "ROW-1e064e3c", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-3fa26c5d": { "children": [ "CHART-b5e05d6f", "CHART-0fd0d252" ], "id": "ROW-3fa26c5d", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-46632bc2": { "children": [ "COLUMN-071bbbad", "CHART-17e0f8d8" ], "id": "ROW-46632bc2", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-812b3f13": { "children": [ "CHART-a4808bba", "CHART-e76e9f5f" ], "id": "ROW-812b3f13", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-afdefba9": { "children": [ "CHART-2ee52f30", "CHART-97f4cb48" ], "id": "ROW-afdefba9", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "DASHBOARD_VERSION_KEY": "v2" } """) pos = json.loads(js) update_slice_ids(pos, slices) dash.dashboard_title = dash_name dash.position_json = json.dumps(pos, indent=4) dash.slug = slug dash.slices = slices[:-1] db.session.merge(dash) db.session.commit()
def load_energy(only_metadata: bool = False, force: bool = False, sample: bool = False) -> None: """Loads an energy related dataset to use with sankey and graphs""" tbl_name = "energy_usage" database = utils.get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): data = get_example_data("energy.json.gz") pdf = pd.read_json(data) pdf = pdf.head(100) if sample else pdf pdf.to_sql( tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=500, dtype={ "source": String(255), "target": String(255), "value": Float() }, index=False, method="multi", ) print("Creating table [wb_health_population] reference") table = get_table_connector_registry() tbl = db.session.query(table).filter_by(table_name=tbl_name).first() if not tbl: tbl = table(table_name=tbl_name) tbl.description = "Energy consumption" tbl.database = database tbl.filter_select_enabled = True if not any(col.metric_name == "sum__value" for col in tbl.metrics): col = str(column("value").compile(db.engine)) tbl.metrics.append( SqlMetric(metric_name="sum__value", expression=f"SUM({col})")) db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() slc = Slice( slice_name="Energy Sankey", viz_type="sankey", datasource_type="table", datasource_id=tbl.id, params=textwrap.dedent("""\ { "collapsed_fieldsets": "", "groupby": [ "source", "target" ], "metric": "sum__value", "row_limit": "5000", "slice_name": "Energy Sankey", "viz_type": "sankey" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc) slc = Slice( slice_name="Energy Force Layout", viz_type="graph_chart", datasource_type="table", datasource_id=tbl.id, params=textwrap.dedent("""\ { "source": "source", "target": "target", "edgeLength": 400, "repulsion": 1000, "layout": "force", "metric": "sum__value", "row_limit": "5000", "slice_name": "Force", "viz_type": "graph_chart" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc) slc = Slice( slice_name="Heatmap", viz_type="heatmap", datasource_type="table", datasource_id=tbl.id, params=textwrap.dedent("""\ { "all_columns_x": "source", "all_columns_y": "target", "canvas_image_rendering": "pixelated", "collapsed_fieldsets": "", "linear_color_scheme": "blue_white_yellow", "metric": "sum__value", "normalize_across": "heatmap", "slice_name": "Heatmap", "viz_type": "heatmap", "xscale_interval": "1", "yscale_interval": "1" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def test_create_virtual_sqlatable(mocker: MockFixture, app_context: None, session: Session) -> None: """ Test shadow write when creating a new ``SqlaTable``. When a new virtual ``SqlaTable`` is created, new models should also be created for ``Dataset`` and ``Column``. """ # patch session mocker.patch("superset.security.SupersetSecurityManager.get_session", return_value=session) from superset.columns.models import Column from superset.columns.schemas import ColumnSchema from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn from superset.datasets.models import Dataset from superset.datasets.schemas import DatasetSchema from superset.models.core import Database from superset.tables.models import Table engine = session.get_bind() Dataset.metadata.create_all(engine) # pylint: disable=no-member # create the ``Table`` that the virtual dataset points to database = Database(database_name="my_database", sqlalchemy_uri="sqlite://") table = Table( name="some_table", schema="my_schema", catalog=None, database=database, columns=[ Column(name="ds", is_temporal=True, type="TIMESTAMP"), Column(name="user_id", type="INTEGER"), Column(name="revenue", type="INTEGER"), Column(name="expenses", type="INTEGER"), ], ) session.add(table) session.commit() # create virtual dataset columns = [ TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"), TableColumn(column_name="user_id", type="INTEGER"), TableColumn(column_name="revenue", type="INTEGER"), TableColumn(column_name="expenses", type="INTEGER"), TableColumn(column_name="profit", type="INTEGER", expression="revenue-expenses"), ] metrics = [ SqlMetric(metric_name="cnt", expression="COUNT(*)"), ] sqla_table = SqlaTable( table_name="old_dataset", columns=columns, metrics=metrics, main_dttm_col="ds", default_endpoint= "https://www.youtube.com/watch?v=dQw4w9WgXcQ", # not used database=database, offset=-8, description="This is the description", is_featured=1, cache_timeout=3600, schema="my_schema", sql=""" SELECT ds, user_id, revenue, expenses, revenue - expenses AS profit FROM some_table""", params=json.dumps({ "remote_id": 64, "database_name": "examples", "import_time": 1606677834, }), perm=None, filter_select_enabled=1, fetch_values_predicate="foo IN (1, 2)", is_sqllab_view=0, # no longer used? template_params=json.dumps({"answer": "42"}), schema_perm=None, extra=json.dumps({"warning_markdown": "*WARNING*"}), ) session.add(sqla_table) session.flush() # ignore these keys when comparing results ignored_keys = {"created_on", "changed_on", "uuid"} # check that columns were created column_schema = ColumnSchema() column_schemas = [{ k: v for k, v in column_schema.dump(column).items() if k not in ignored_keys } for column in session.query(Column).all()] assert column_schemas == [ { "type": "TIMESTAMP", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": None, "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "ds", "is_physical": True, "changed_by": None, "is_temporal": True, "id": 1, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": None, "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "user_id", "is_physical": True, "changed_by": None, "is_temporal": False, "id": 2, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": None, "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "revenue", "is_physical": True, "changed_by": None, "is_temporal": False, "id": 3, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": None, "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "expenses", "is_physical": True, "changed_by": None, "is_temporal": False, "id": 4, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "TIMESTAMP", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": "ds", "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "ds", "is_physical": False, "changed_by": None, "is_temporal": True, "id": 5, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": "user_id", "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "user_id", "is_physical": False, "changed_by": None, "is_temporal": False, "id": 6, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": "revenue", "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "revenue", "is_physical": False, "changed_by": None, "is_temporal": False, "id": 7, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": "expenses", "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "expenses", "is_physical": False, "changed_by": None, "is_temporal": False, "id": 8, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": "revenue-expenses", "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "profit", "is_physical": False, "changed_by": None, "is_temporal": False, "id": 9, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "Unknown", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": "COUNT(*)", "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "cnt", "is_physical": False, "changed_by": None, "is_temporal": False, "id": 10, "is_aggregation": True, "external_url": None, "is_managed_externally": False, }, ] # check that dataset was created, and has a reference to the table dataset_schema = DatasetSchema() datasets = [{ k: v for k, v in dataset_schema.dump(dataset).items() if k not in ignored_keys } for dataset in session.query(Dataset).all()] assert datasets == [{ "id": 1, "sqlatable_id": 1, "name": "old_dataset", "changed_by": None, "created_by": None, "columns": [5, 6, 7, 8, 9, 10], "is_physical": False, "tables": [1], "extra_json": "{}", "external_url": None, "is_managed_externally": False, "expression": """ SELECT ds, user_id, revenue, expenses, revenue - expenses AS profit FROM some_table""", }]
def test_dataset_attributes(app_context: None, session: Session) -> None: """ Test that checks attributes in the dataset. If this check fails it means new attributes were added to ``SqlaTable``, and ``SqlaTable.after_insert`` should be updated to handle them! """ from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn from superset.models.core import Database engine = session.get_bind() SqlaTable.metadata.create_all(engine) # pylint: disable=no-member columns = [ TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"), TableColumn(column_name="user_id", type="INTEGER"), TableColumn(column_name="revenue", type="INTEGER"), TableColumn(column_name="expenses", type="INTEGER"), TableColumn(column_name="profit", type="INTEGER", expression="revenue-expenses"), ] metrics = [ SqlMetric(metric_name="cnt", expression="COUNT(*)"), ] sqla_table = SqlaTable( table_name="old_dataset", columns=columns, metrics=metrics, main_dttm_col="ds", default_endpoint= "https://www.youtube.com/watch?v=dQw4w9WgXcQ", # not used database=Database(database_name="my_database", sqlalchemy_uri="sqlite://"), offset=-8, description="This is the description", is_featured=1, cache_timeout=3600, schema="my_schema", sql=None, params=json.dumps({ "remote_id": 64, "database_name": "examples", "import_time": 1606677834, }), perm=None, filter_select_enabled=1, fetch_values_predicate="foo IN (1, 2)", is_sqllab_view=0, # no longer used? template_params=json.dumps({"answer": "42"}), schema_perm=None, extra=json.dumps({"warning_markdown": "*WARNING*"}), ) session.add(sqla_table) session.flush() dataset = session.query(SqlaTable).one() # If this test fails because attributes changed, make sure to update # ``SqlaTable.after_insert`` accordingly. assert sorted(dataset.__dict__.keys()) == [ "_sa_instance_state", "cache_timeout", "changed_by_fk", "changed_on", "columns", "created_by_fk", "created_on", "database", "database_id", "default_endpoint", "description", "external_url", "extra", "fetch_values_predicate", "filter_select_enabled", "id", "is_featured", "is_managed_externally", "is_sqllab_view", "main_dttm_col", "metrics", "offset", "params", "perm", "schema", "schema_perm", "sql", "table_name", "template_params", "uuid", ]
def load_birth_names(only_metadata: bool = False, force: bool = False, sample: bool = False) -> None: """Loading birth name dataset from a zip file in the repo""" # pylint: disable=too-many-locals tbl_name = "birth_names" database = get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): load_data(tbl_name, database, sample=sample) obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: print(f"Creating table [{tbl_name}] reference") obj = TBL(table_name=tbl_name) db.session.add(obj) obj.main_dttm_col = "ds" obj.database = database obj.filter_select_enabled = True if not any(col.column_name == "num_california" for col in obj.columns): col_state = str(column("state").compile(db.engine)) col_num = str(column("num").compile(db.engine)) obj.columns.append( TableColumn( column_name="num_california", expression= f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END", )) if not any(col.metric_name == "sum__num" for col in obj.metrics): col = str(column("num").compile(db.engine)) obj.metrics.append( SqlMetric(metric_name="sum__num", expression=f"SUM({col})")) db.session.commit() obj.fetch_metadata() tbl = obj metrics = [{ "expressionType": "SIMPLE", "column": { "column_name": "num", "type": "BIGINT" }, "aggregate": "SUM", "label": "Births", "optionName": "metric_11", }] metric = "sum__num" defaults = { "compare_lag": "10", "compare_suffix": "o10Y", "limit": "25", "granularity_sqla": "ds", "groupby": [], "row_limit": config["ROW_LIMIT"], "since": "100 years ago", "until": "now", "viz_type": "table", "markup_type": "markdown", } admin = security_manager.find_user("admin") print("Creating some slices") slices = [ Slice( slice_name="Participants", viz_type="big_number", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="big_number", granularity_sqla="ds", compare_lag="5", compare_suffix="over 5Y", metric=metric, ), ), Slice( slice_name="Genders", viz_type="pie", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="pie", groupby=["gender"], metric=metric), ), Slice( slice_name="Trends", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="line", groupby=["name"], granularity_sqla="ds", rich_tooltip=True, show_legend=True, metrics=metrics, ), ), Slice( slice_name="Genders by State", viz_type="dist_bar", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, adhoc_filters=[{ "clause": "WHERE", "expressionType": "SIMPLE", "filterOptionName": "2745eae5", "comparator": ["other"], "operator": "NOT IN", "subject": "state", }], viz_type="dist_bar", metrics=[ { "expressionType": "SIMPLE", "column": { "column_name": "sum_boys", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Boys", "optionName": "metric_11", }, { "expressionType": "SIMPLE", "column": { "column_name": "sum_girls", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Girls", "optionName": "metric_12", }, ], groupby=["state"], ), ), Slice( slice_name="Girls", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], adhoc_filters=[gen_filter("gender", "girl")], row_limit=50, timeseries_limit_metric="sum__num", metrics=metrics, ), ), Slice( slice_name="Girl Name Cloud", viz_type="word_cloud", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="word_cloud", size_from="10", series="name", size_to="70", rotation="square", limit="100", adhoc_filters=[gen_filter("gender", "girl")], metric=metric, ), ), Slice( slice_name="Boys", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], adhoc_filters=[gen_filter("gender", "boy")], row_limit=50, metrics=metrics, ), ), Slice( slice_name="Boy Name Cloud", viz_type="word_cloud", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="word_cloud", size_from="10", series="name", size_to="70", rotation="square", limit="100", adhoc_filters=[gen_filter("gender", "boy")], metric=metric, ), ), Slice( slice_name="Top 10 Girl Name Share", viz_type="area", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, adhoc_filters=[gen_filter("gender", "girl")], comparison_type="values", groupby=["name"], limit=10, stacked_style="expand", time_grain_sqla="P1D", viz_type="area", x_axis_forma="smart_date", metrics=metrics, ), ), Slice( slice_name="Top 10 Boy Name Share", viz_type="area", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, adhoc_filters=[gen_filter("gender", "boy")], comparison_type="values", groupby=["name"], limit=10, stacked_style="expand", time_grain_sqla="P1D", viz_type="area", x_axis_forma="smart_date", metrics=metrics, ), ), ] misc_slices = [ Slice( slice_name="Average and Sum Trends", viz_type="dual_line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="dual_line", metric={ "expressionType": "SIMPLE", "column": { "column_name": "num", "type": "BIGINT(20)" }, "aggregate": "AVG", "label": "AVG(num)", "optionName": "metric_vgops097wej_g8uff99zhk7", }, metric_2="sum__num", granularity_sqla="ds", metrics=metrics, ), ), Slice( slice_name="Num Births Trend", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="line", metrics=metrics), ), Slice( slice_name="Daily Totals", viz_type="table", datasource_type="table", datasource_id=tbl.id, created_by=admin, params=get_slice_json( defaults, groupby=["ds"], since="40 years ago", until="now", viz_type="table", metrics=metrics, ), ), Slice( slice_name="Number of California Births", viz_type="big_number_total", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, viz_type="big_number_total", granularity_sqla="ds", ), ), Slice( slice_name="Top 10 California Names Timeseries", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metrics=[{ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }], viz_type="line", granularity_sqla="ds", groupby=["name"], timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, limit="10", ), ), Slice( slice_name="Names Sorted by Num in California", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metrics=metrics, groupby=["name"], row_limit=50, timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, ), ), Slice( slice_name="Number of Girls", viz_type="big_number_total", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metric=metric, viz_type="big_number_total", granularity_sqla="ds", adhoc_filters=[gen_filter("gender", "girl")], subheader="total female participants", ), ), Slice( slice_name="Pivot Table", viz_type="pivot_table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="pivot_table", groupby=["name"], columns=["state"], metrics=metrics, ), ), ] for slc in slices: merge_slice(slc) for slc in misc_slices: merge_slice(slc) misc_dash_slices.add(slc.slice_name) print("Creating a dashboard") dash = db.session.query(Dashboard).filter_by(slug="births").first() if not dash: dash = Dashboard() db.session.add(dash) dash.published = True dash.json_metadata = textwrap.dedent("""\ { "label_colors": { "Girls": "#FF69B4", "Boys": "#ADD8E6", "girl": "#FF69B4", "boy": "#ADD8E6" } }""") js = textwrap.dedent( # pylint: disable=line-too-long """\ { "CHART-6GdlekVise": { "children": [], "id": "CHART-6GdlekVise", "meta": { "chartId": 5547, "height": 50, "sliceName": "Top 10 Girl Name Share", "width": 5 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-eh0w37bWbR" ], "type": "CHART" }, "CHART-6n9jxb30JG": { "children": [], "id": "CHART-6n9jxb30JG", "meta": { "chartId": 5540, "height": 36, "sliceName": "Genders by State", "width": 5 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW--EyBZQlDi" ], "type": "CHART" }, "CHART-Jj9qh1ol-N": { "children": [], "id": "CHART-Jj9qh1ol-N", "meta": { "chartId": 5545, "height": 50, "sliceName": "Boy Name Cloud", "width": 4 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-kzWtcvo8R1" ], "type": "CHART" }, "CHART-ODvantb_bF": { "children": [], "id": "CHART-ODvantb_bF", "meta": { "chartId": 5548, "height": 50, "sliceName": "Top 10 Boy Name Share", "width": 5 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-kzWtcvo8R1" ], "type": "CHART" }, "CHART-PAXUUqwmX9": { "children": [], "id": "CHART-PAXUUqwmX9", "meta": { "chartId": 5538, "height": 34, "sliceName": "Genders", "width": 3 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-2n0XgiHDgs" ], "type": "CHART" }, "CHART-_T6n_K9iQN": { "children": [], "id": "CHART-_T6n_K9iQN", "meta": { "chartId": 5539, "height": 36, "sliceName": "Trends", "width": 7 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW--EyBZQlDi" ], "type": "CHART" }, "CHART-eNY0tcE_ic": { "children": [], "id": "CHART-eNY0tcE_ic", "meta": { "chartId": 5537, "height": 34, "sliceName": "Participants", "width": 3 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-2n0XgiHDgs" ], "type": "CHART" }, "CHART-g075mMgyYb": { "children": [], "id": "CHART-g075mMgyYb", "meta": { "chartId": 5541, "height": 50, "sliceName": "Girls", "width": 3 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-eh0w37bWbR" ], "type": "CHART" }, "CHART-n-zGGE6S1y": { "children": [], "id": "CHART-n-zGGE6S1y", "meta": { "chartId": 5542, "height": 50, "sliceName": "Girl Name Cloud", "width": 4 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-eh0w37bWbR" ], "type": "CHART" }, "CHART-vJIPjmcbD3": { "children": [], "id": "CHART-vJIPjmcbD3", "meta": { "chartId": 5543, "height": 50, "sliceName": "Boys", "width": 3 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-kzWtcvo8R1" ], "type": "CHART" }, "DASHBOARD_VERSION_KEY": "v2", "GRID_ID": { "children": [ "ROW-2n0XgiHDgs", "ROW--EyBZQlDi", "ROW-eh0w37bWbR", "ROW-kzWtcvo8R1" ], "id": "GRID_ID", "parents": [ "ROOT_ID" ], "type": "GRID" }, "HEADER_ID": { "id": "HEADER_ID", "meta": { "text": "Births" }, "type": "HEADER" }, "MARKDOWN-zaflB60tbC": { "children": [], "id": "MARKDOWN-zaflB60tbC", "meta": { "code": "<div style=\\"text-align:center\\"> <h1>Birth Names Dashboard</h1> <img src=\\"/static/assets/images/babies.png\\" style=\\"width:50%;\\"></div>", "height": 34, "width": 6 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-2n0XgiHDgs" ], "type": "MARKDOWN" }, "ROOT_ID": { "children": [ "GRID_ID" ], "id": "ROOT_ID", "type": "ROOT" }, "ROW--EyBZQlDi": { "children": [ "CHART-_T6n_K9iQN", "CHART-6n9jxb30JG" ], "id": "ROW--EyBZQlDi", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "parents": [ "ROOT_ID", "GRID_ID" ], "type": "ROW" }, "ROW-2n0XgiHDgs": { "children": [ "CHART-eNY0tcE_ic", "MARKDOWN-zaflB60tbC", "CHART-PAXUUqwmX9" ], "id": "ROW-2n0XgiHDgs", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "parents": [ "ROOT_ID", "GRID_ID" ], "type": "ROW" }, "ROW-eh0w37bWbR": { "children": [ "CHART-g075mMgyYb", "CHART-n-zGGE6S1y", "CHART-6GdlekVise" ], "id": "ROW-eh0w37bWbR", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "parents": [ "ROOT_ID", "GRID_ID" ], "type": "ROW" }, "ROW-kzWtcvo8R1": { "children": [ "CHART-vJIPjmcbD3", "CHART-Jj9qh1ol-N", "CHART-ODvantb_bF" ], "id": "ROW-kzWtcvo8R1", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "parents": [ "ROOT_ID", "GRID_ID" ], "type": "ROW" } } """ # pylint: enable=line-too-long ) pos = json.loads(js) # dashboard v2 doesn't allow add markup slice dash.slices = [slc for slc in slices if slc.viz_type != "markup"] update_slice_ids(pos, dash.slices) dash.dashboard_title = "USA Births Names" dash.position_json = json.dumps(pos, indent=4) dash.slug = "births" db.session.commit()
def load_country_map_data(): """Loading data for map with country map""" csv_bytes = get_example_data( 'birth_france_data_for_country_map.csv', is_gzip=False, make_bytes=True) data = pd.read_csv(csv_bytes, encoding='utf-8') data['dttm'] = datetime.datetime.now().date() data.to_sql( # pylint: disable=no-member 'birth_france_by_region', db.engine, if_exists='replace', chunksize=500, dtype={ 'DEPT_ID': String(10), '2003': BigInteger, '2004': BigInteger, '2005': BigInteger, '2006': BigInteger, '2007': BigInteger, '2008': BigInteger, '2009': BigInteger, '2010': BigInteger, '2011': BigInteger, '2012': BigInteger, '2013': BigInteger, '2014': BigInteger, 'dttm': Date(), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table reference') obj = db.session.query(TBL).filter_by(table_name='birth_france_by_region').first() if not obj: obj = TBL(table_name='birth_france_by_region') obj.main_dttm_col = 'dttm' obj.database = utils.get_or_create_main_db() if not any(col.metric_name == 'avg__2004' for col in obj.metrics): col = str(column('2004').compile(db.engine)) obj.metrics.append(SqlMetric( metric_name='avg__2004', expression=f'AVG({col})', )) db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { 'granularity_sqla': '', 'since': '', 'until': '', 'where': '', 'viz_type': 'country_map', 'entity': 'DEPT_ID', 'metric': { 'expressionType': 'SIMPLE', 'column': { 'type': 'INT', 'column_name': '2004', }, 'aggregate': 'AVG', 'label': 'Boys', 'optionName': 'metric_112342', }, 'row_limit': 500000, } print('Creating a slice') slc = Slice( slice_name='Birth in France by department in 2016', viz_type='country_map', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(slice_data), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)