def sample_metrics() -> Dict["SqlMetric", Dict[str, Any]]: from superset.connectors.sqla.models import SqlMetric return { SqlMetric(metric_name="cnt", expression="COUNT(*)", metric_type="COUNT"): { "name": "cnt", "expression": "COUNT(*)", "extra_json": '{"metric_type": "COUNT"}', "type": "UNKNOWN", "is_additive": True, "is_aggregation": True, "is_filterable": False, "is_physical": False, }, SqlMetric(metric_name="avg revenue", expression="AVG(revenue)", metric_type="AVG"): { "name": "avg revenue", "expression": "AVG(revenue)", "extra_json": '{"metric_type": "AVG"}', "type": "UNKNOWN", "is_additive": False, "is_aggregation": True, "is_filterable": False, "is_physical": False, }, }
def create_table(self, name, schema='', id=0, cols_names=[], metric_names=[]): database_name = 'main' name = '{0}{1}'.format(NAME_PREFIX, name) params = {DBREF: id, 'database_name': database_name} dict_rep = { 'database_id': get_main_database(db.session).id, 'table_name': name, 'schema': schema, 'id': id, 'params': json.dumps(params), 'columns': [{ 'column_name': c } for c in cols_names], 'metrics': [{ 'metric_name': c } for c in metric_names], } table = SqlaTable( id=id, schema=schema, table_name=name, params=json.dumps(params), ) for col_name in cols_names: table.columns.append(TableColumn(column_name=col_name)) for metric_name in metric_names: table.metrics.append(SqlMetric(metric_name=metric_name)) return table, dict_rep
def create_table( self, name, schema=None, id=0, cols_names=[], cols_uuids=None, metric_names=[] ): database_name = "main" name = "{0}{1}".format(NAME_PREFIX, name) params = {DBREF: id, "database_name": database_name} if cols_uuids is None: cols_uuids = [None] * len(cols_names) dict_rep = { "database_id": get_example_database().id, "table_name": name, "schema": schema, "id": id, "params": json.dumps(params), "columns": [ {"column_name": c, "uuid": u} for c, u in zip(cols_names, cols_uuids) ], "metrics": [{"metric_name": c, "expression": ""} for c in metric_names], } table = SqlaTable( id=id, schema=schema, table_name=name, params=json.dumps(params) ) for col_name, uuid in zip(cols_names, cols_uuids): table.columns.append(TableColumn(column_name=col_name, uuid=uuid)) for metric_name in metric_names: table.metrics.append(SqlMetric(metric_name=metric_name, expression="")) return table, dict_rep
def test_query_cache_key_changes_when_metric_is_updated(self): self.login(username="******") payload = get_query_context("birth_names") # make temporary change and revert it to refresh the changed_on property datasource = DatasourceDAO.get_datasource( session=db.session, datasource_type=DatasourceType(payload["datasource"]["type"]), datasource_id=payload["datasource"]["id"], ) datasource.metrics.append(SqlMetric(metric_name="foo", expression="select 1;")) db.session.commit() # construct baseline query_cache_key query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] cache_key_original = query_context.query_cache_key(query_object) # wait a second since mysql records timestamps in second granularity time.sleep(1) datasource.metrics[0].expression = "select 2;" db.session.commit() # create new QueryContext with unchanged attributes, extract new query_cache_key query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] cache_key_new = query_context.query_cache_key(query_object) datasource.metrics = [] db.session.commit() # the new cache_key should be different due to updated datasource self.assertNotEqual(cache_key_original, cache_key_new)
def _add_table_metrics(datasource: SqlaTable) -> None: # By accessing the attribute first, we make sure `datasource.columns` and # `datasource.metrics` are already loaded. Otherwise accessing them later # may trigger an unnecessary and unexpected `after_update` event. columns, metrics = datasource.columns, datasource.metrics if not any(col.column_name == "num_california" for col in columns): col_state = str(column("state").compile(db.engine)) col_num = str(column("num").compile(db.engine)) columns.append( TableColumn( column_name="num_california", expression= f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END", )) if not any(col.metric_name == "sum__num" for col in metrics): col = str(column("num").compile(db.engine)) metrics.append( SqlMetric(metric_name="sum__num", expression=f"SUM({col})")) for col in columns: if col.column_name == "ds": col.is_dttm = True break
def decode_dashboards(o): """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ import superset.models.core as models from superset.connectors.sqla.models import ( SqlaTable, SqlMetric, TableColumn, ) if '__Dashboard__' in o: d = models.Dashboard() d.__dict__.update(o['__Dashboard__']) return d elif '__Slice__' in o: d = models.Slice() d.__dict__.update(o['__Slice__']) return d elif '__TableColumn__' in o: d = TableColumn() d.__dict__.update(o['__TableColumn__']) return d elif '__SqlaTable__' in o: d = SqlaTable() d.__dict__.update(o['__SqlaTable__']) return d elif '__SqlMetric__' in o: d = SqlMetric() d.__dict__.update(o['__SqlMetric__']) return d elif '__datetime__' in o: return datetime.strptime(o['__datetime__'], '%Y-%m-%dT%H:%M:%S') else: return o
def decode_dashboards( # pylint: disable=too-many-return-statements o: Dict[str, Any]) -> Any: """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ from superset.connectors.druid.models import ( DruidCluster, DruidColumn, DruidDatasource, DruidMetric, ) if "__Dashboard__" in o: return Dashboard(**o["__Dashboard__"]) if "__Slice__" in o: return Slice(**o["__Slice__"]) if "__TableColumn__" in o: return TableColumn(**o["__TableColumn__"]) if "__SqlaTable__" in o: return SqlaTable(**o["__SqlaTable__"]) if "__SqlMetric__" in o: return SqlMetric(**o["__SqlMetric__"]) if "__DruidCluster__" in o: return DruidCluster(**o["__DruidCluster__"]) if "__DruidColumn__" in o: return DruidColumn(**o["__DruidColumn__"]) if "__DruidDatasource__" in o: return DruidDatasource(**o["__DruidDatasource__"]) if "__DruidMetric__" in o: return DruidMetric(**o["__DruidMetric__"]) if "__datetime__" in o: return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S") return o
def _create_energy_table(df: DataFrame, table_name: str): database = get_example_database() table_description = "Energy consumption" schema = {"source": String(255), "target": String(255), "value": Float()} table = create_table_for_dashboard(df, table_name, database, schema, table_description) table.fetch_metadata() if not any(col.metric_name == "sum__value" for col in table.metrics): col = str(column("value").compile(db.engine)) table.metrics.append( SqlMetric(metric_name="sum__value", expression=f"SUM({col})")) db.session.merge(table) db.session.commit() table.fetch_metadata() for slice_data in _get_energy_slices(): _create_and_commit_energy_slice( table, slice_data["slice_title"], slice_data["viz_type"], slice_data["params"], )
def create_table(self, name, schema=None, id=0, cols_names=[], metric_names=[]): params = {"remote_id": id, "database_name": "examples"} table = SqlaTable( id=id, schema=schema, table_name=name, params=json.dumps(params) ) for col_name in cols_names: table.columns.append(TableColumn(column_name=col_name)) for metric_name in metric_names: table.metrics.append(SqlMetric(metric_name=metric_name, expression="")) return table
def physical_dataset(): from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn example_database = get_example_database() engine = example_database.get_sqla_engine() # sqlite can only execute one statement at a time engine.execute(""" CREATE TABLE IF NOT EXISTS physical_dataset( col1 INTEGER, col2 VARCHAR(255), col3 DECIMAL(4,2), col4 VARCHAR(255), col5 TIMESTAMP ); """) engine.execute(""" INSERT INTO physical_dataset values (0, 'a', 1.0, NULL, '2000-01-01 00:00:00'), (1, 'b', 1.1, NULL, '2000-01-02 00:00:00'), (2, 'c', 1.2, NULL, '2000-01-03 00:00:00'), (3, 'd', 1.3, NULL, '2000-01-04 00:00:00'), (4, 'e', 1.4, NULL, '2000-01-05 00:00:00'), (5, 'f', 1.5, NULL, '2000-01-06 00:00:00'), (6, 'g', 1.6, NULL, '2000-01-07 00:00:00'), (7, 'h', 1.7, NULL, '2000-01-08 00:00:00'), (8, 'i', 1.8, NULL, '2000-01-09 00:00:00'), (9, 'j', 1.9, NULL, '2000-01-10 00:00:00'); """) dataset = SqlaTable( table_name="physical_dataset", database=example_database, ) TableColumn(column_name="col1", type="INTEGER", table=dataset) TableColumn(column_name="col2", type="VARCHAR(255)", table=dataset) TableColumn(column_name="col3", type="DECIMAL(4,2)", table=dataset) TableColumn(column_name="col4", type="VARCHAR(255)", table=dataset) TableColumn(column_name="col5", type="TIMESTAMP", is_dttm=True, table=dataset) SqlMetric(metric_name="count", expression="count(*)", table=dataset) db.session.merge(dataset) db.session.commit() yield dataset engine.execute(""" DROP TABLE physical_dataset; """) dataset = db.session.query(SqlaTable).filter_by( table_name="physical_dataset").all() for ds in dataset: db.session.delete(ds) db.session.commit()
class SqlaConnectorTestCase(BaseConnectorTestCase): columns = [ TableColumn(column_name='region', type='VARCHAR(20)'), TableColumn(column_name='district', type='VARCHAR(20)'), TableColumn(column_name='project', type='VARCHAR(20)'), TableColumn(column_name='received', type='DATE', is_dttm=True), TableColumn(column_name='value', type='BIGINT'), ] metrics = [ SqlMetric(metric_name='sum__value', metric_type='sum', expression='SUM(value)'), SqlMetric(metric_name='avg__value', metric_type='avg', expression='AVG(value)'), SqlMetric(metric_name='ratio', metric_type='avg', expression='AVG(value/value2)'), SqlMetric(metric_name='value_percentage', metric_type='custom', expression='SUM(value)/SUM(value + value2)'), SqlMetric(metric_name='category_percentage', metric_type='custom', expression="SUM(CASE WHEN category='CategoryA' THEN 1 ELSE 0 END)/" 'CAST(COUNT(*) AS REAL)'), ] def setUp(self): super(SqlaConnectorTestCase, self).setUp() sqlalchemy_uri = 'sqlite:////tmp/test.db' database = Database( database_name='test_database', sqlalchemy_uri=sqlalchemy_uri) self.connection = database.get_sqla_engine().connect() self.datasource = SqlaTable(table_name='test_datasource', database=database, columns=self.columns, metrics=self.metrics) with database.get_sqla_engine().begin() as connection: self.df.to_sql(self.datasource.table_name, connection, if_exists='replace', index=False, dtype={'received': Date})
def create_table( self, name, schema='', id=0, cols_names=[], metric_names=[]): params = {'remote_id': id, 'database_name': 'main'} table = SqlaTable( id=id, schema=schema, table_name=name, params=json.dumps(params) ) for col_name in cols_names: table.columns.append( TableColumn(column_name=col_name)) for metric_name in metric_names: table.metrics.append(SqlMetric(metric_name=metric_name)) return table
def text_column_table(): with app.app_context(): table = SqlaTable( table_name="text_column_table", sql=("SELECT 'foo' as foo " "UNION SELECT '' " "UNION SELECT NULL " "UNION SELECT 'null' " "UNION SELECT '\"text in double quotes\"' " "UNION SELECT '''text in single quotes''' " "UNION SELECT 'double quotes \" in text' " "UNION SELECT 'single quotes '' in text' "), database=get_example_database(), ) TableColumn(column_name="foo", type="VARCHAR(255)", table=table) SqlMetric(metric_name="count", expression="count(*)", table=table) yield table
def test_update_sqlatable_metric(mocker: MockFixture, app_context: None, session: Session) -> None: """ Test that updating a ``SqlaTable`` also updates the corresponding ``Dataset``. For this test we check that updating the SQL expression in a metric belonging to a ``SqlaTable`` is reflected in the ``Dataset`` metric. """ # patch session mocker.patch("superset.security.SupersetSecurityManager.get_session", return_value=session) from superset.columns.models import Column from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn from superset.datasets.models import Dataset from superset.models.core import Database from superset.tables.models import Table engine = session.get_bind() Dataset.metadata.create_all(engine) # pylint: disable=no-member columns = [ TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"), ] metrics = [ SqlMetric(metric_name="cnt", expression="COUNT(*)"), ] sqla_table = SqlaTable( table_name="old_dataset", columns=columns, metrics=metrics, database=Database(database_name="my_database", sqlalchemy_uri="sqlite://"), ) session.add(sqla_table) session.flush() # check that the metric was created column = session.query(Column).filter_by(is_physical=False).one() assert column.expression == "COUNT(*)" # change the metric definition sqla_table.metrics[0].expression = "MAX(ds)" session.flush() assert column.expression == "MAX(ds)"
def test_should_generate_closed_and_open_time_filter_range(): with app.app_context(): if backend() != "postgresql": pytest.skip( f"{backend()} has different dialect for datetime column") table = SqlaTable( table_name="temporal_column_table", sql=("SELECT '2021-12-31'::timestamp as datetime_col " "UNION SELECT '2022-01-01'::timestamp " "UNION SELECT '2022-03-10'::timestamp " "UNION SELECT '2023-01-01'::timestamp " "UNION SELECT '2023-03-10'::timestamp "), database=get_example_database(), ) TableColumn( column_name="datetime_col", type="TIMESTAMP", table=table, is_dttm=True, ) SqlMetric(metric_name="count", expression="count(*)", table=table) result_object = table.query({ "metrics": ["count"], "is_timeseries": False, "filter": [], "from_dttm": datetime(2022, 1, 1), "to_dttm": datetime(2023, 1, 1), "granularity": "datetime_col", }) """ >>> result_object.query SELECT count(*) AS count FROM (SELECT '2021-12-31'::timestamp as datetime_col UNION SELECT '2022-01-01'::timestamp UNION SELECT '2022-03-10'::timestamp UNION SELECT '2023-01-01'::timestamp UNION SELECT '2023-03-10'::timestamp) AS virtual_table WHERE datetime_col >= TO_TIMESTAMP('2022-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US') AND datetime_col < TO_TIMESTAMP('2023-01-01 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US') """ assert result_object.df.iloc[0]["count"] == 2
def decode_dashboards(o: Dict[str, Any]) -> Any: """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ if "__Dashboard__" in o: return Dashboard(**o["__Dashboard__"]) if "__Slice__" in o: return Slice(**o["__Slice__"]) if "__TableColumn__" in o: return TableColumn(**o["__TableColumn__"]) if "__SqlaTable__" in o: return SqlaTable(**o["__SqlaTable__"]) if "__SqlMetric__" in o: return SqlMetric(**o["__SqlMetric__"]) if "__datetime__" in o: return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S") return o
def virtual_dataset(): from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn dataset = SqlaTable( table_name="virtual_dataset", sql= ("SELECT 0 as col1, 'a' as col2, 1.0 as col3, NULL as col4, '2000-01-01 00:00:00' as col5 " "UNION ALL " "SELECT 1, 'b', 1.1, NULL, '2000-01-02 00:00:00' " "UNION ALL " "SELECT 2 as col1, 'c' as col2, 1.2, NULL, '2000-01-03 00:00:00' " "UNION ALL " "SELECT 3 as col1, 'd' as col2, 1.3, NULL, '2000-01-04 00:00:00' " "UNION ALL " "SELECT 4 as col1, 'e' as col2, 1.4, NULL, '2000-01-05 00:00:00' " "UNION ALL " "SELECT 5 as col1, 'f' as col2, 1.5, NULL, '2000-01-06 00:00:00' " "UNION ALL " "SELECT 6 as col1, 'g' as col2, 1.6, NULL, '2000-01-07 00:00:00' " "UNION ALL " "SELECT 7 as col1, 'h' as col2, 1.7, NULL, '2000-01-08 00:00:00' " "UNION ALL " "SELECT 8 as col1, 'i' as col2, 1.8, NULL, '2000-01-09 00:00:00' " "UNION ALL " "SELECT 9 as col1, 'j' as col2, 1.9, NULL, '2000-01-10 00:00:00' "), database=get_example_database(), ) TableColumn(column_name="col1", type="INTEGER", table=dataset) TableColumn(column_name="col2", type="VARCHAR(255)", table=dataset) TableColumn(column_name="col3", type="DECIMAL(4,2)", table=dataset) TableColumn(column_name="col4", type="VARCHAR(255)", table=dataset) # Different database dialect datetime type is not consistent, so temporarily use varchar TableColumn(column_name="col5", type="VARCHAR(255)", table=dataset) SqlMetric(metric_name="count", expression="count(*)", table=dataset) db.session.merge(dataset) yield dataset db.session.delete(dataset) db.session.commit()
def load_birth_names(only_metadata: bool = False, force: bool = False, sample: bool = False) -> None: """Loading birth name dataset from a zip file in the repo""" # pylint: disable=too-many-locals tbl_name = "birth_names" database = get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): load_data(tbl_name, database, sample=sample) obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: print(f"Creating table [{tbl_name}] reference") obj = TBL(table_name=tbl_name) db.session.add(obj) obj.main_dttm_col = "ds" obj.database = database obj.filter_select_enabled = True obj.fetch_metadata() if not any(col.column_name == "num_california" for col in obj.columns): col_state = str(column("state").compile(db.engine)) col_num = str(column("num").compile(db.engine)) obj.columns.append( TableColumn( column_name="num_california", expression= f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END", )) if not any(col.metric_name == "sum__num" for col in obj.metrics): col = str(column("num").compile(db.engine)) obj.metrics.append( SqlMetric(metric_name="sum__num", expression=f"SUM({col})")) db.session.commit() slices, _ = create_slices(obj) create_dashboard(slices)
def decode_dashboards(o): """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ import superset.models.core as models if "__Dashboard__" in o: return Dashboard(**o["__Dashboard__"]) elif "__Slice__" in o: return Slice(**o["__Slice__"]) elif "__TableColumn__" in o: return TableColumn(**o["__TableColumn__"]) elif "__SqlaTable__" in o: return SqlaTable(**o["__SqlaTable__"]) elif "__SqlMetric__" in o: return SqlMetric(**o["__SqlMetric__"]) elif "__datetime__" in o: return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S") else: return o
def _add_table_metrics(datasource: "BaseDatasource") -> None: if not any(col.column_name == "num_california" for col in datasource.columns): col_state = str(column("state").compile(db.engine)) col_num = str(column("num").compile(db.engine)) datasource.columns.append( TableColumn( column_name="num_california", expression= f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END", )) if not any(col.metric_name == "sum__num" for col in datasource.metrics): col = str(column("num").compile(db.engine)) datasource.metrics.append( SqlMetric(metric_name="sum__num", expression=f"SUM({col})")) for col in datasource.columns: if col.column_name == "ds": col.is_dttm = True # type: ignore break
def virtual_dataset_comma_in_column_value(): from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn dataset = SqlaTable( table_name="virtual_dataset", sql=("SELECT 'col1,row1' as col1, 'col2, row1' as col2 " "UNION ALL " "SELECT 'col1,row2' as col1, 'col2, row2' as col2 " "UNION ALL " "SELECT 'col1,row3' as col1, 'col2, row3' as col2 "), database=get_example_database(), ) TableColumn(column_name="col1", type="VARCHAR(255)", table=dataset) TableColumn(column_name="col2", type="VARCHAR(255)", table=dataset) SqlMetric(metric_name="count", expression="count(*)", table=dataset) db.session.merge(dataset) yield dataset db.session.delete(dataset) db.session.commit()
def create_table(self, name, schema="", id=0, cols_names=[], metric_names=[]): database_name = "main" name = "{0}{1}".format(NAME_PREFIX, name) params = {DBREF: id, "database_name": database_name} dict_rep = { "database_id": get_main_database().id, "table_name": name, "schema": schema, "id": id, "params": json.dumps(params), "columns": [{"column_name": c} for c in cols_names], "metrics": [{"metric_name": c, "expression": ""} for c in metric_names], } table = SqlaTable( id=id, schema=schema, table_name=name, params=json.dumps(params) ) for col_name in cols_names: table.columns.append(TableColumn(column_name=col_name)) for metric_name in metric_names: table.metrics.append(SqlMetric(metric_name=metric_name, expression="")) return table, dict_rep
def decode_dashboards(o): """ Function to be passed into json.loads obj_hook parameter Recreates the dashboard object from a json representation. """ import superset.models.core as models from superset import security_manager from superset.connectors.sqla.models import ( SqlaTable, SqlMetric, TableColumn, ) if '__Dashboard__' in o: return models.Dashboard(**o['__Dashboard__']) elif '__Slice__' in o: return models.Slice(**o['__Slice__']) elif '__TableColumn__' in o: return TableColumn(**o['__TableColumn__']) elif '__SqlaTable__' in o: return SqlaTable(**o['__SqlaTable__']) elif '__SqlMetric__' in o: return SqlMetric(**o['__SqlMetric__']) elif '__Database__' in o: return models.Database(**o['__Database__']) elif '__User__' in o: return security_manager.user_model(**o['__User__']) elif '__Role__' in o: return security_manager.role_model(**o['__Role__']) elif '__ViewMenu__' in o: return security_manager.viewmenu_model(**o['__ViewMenu__']) elif '__PermissionView__' in o: return security_manager.permissionview_model(**o['__PermissionView__']) elif '__datetime__' in o: return datetime.strptime(o['__datetime__'], '%Y-%m-%dT%H:%M:%S') else: return o
def _create_energy_table(): table = create_table_metadata( table_name=ENERGY_USAGE_TBL_NAME, database=get_example_database(), table_description="Energy consumption", ) table.fetch_metadata() if not any(col.metric_name == "sum__value" for col in table.metrics): col = str(column("value").compile(db.engine)) table.metrics.append( SqlMetric(metric_name="sum__value", expression=f"SUM({col})")) db.session.merge(table) db.session.commit() table.fetch_metadata() for slice_data in _get_energy_slices(): _create_and_commit_energy_slice( table, slice_data["slice_title"], slice_data["viz_type"], slice_data["params"], )
def load_country_map_data(only_metadata: bool = False, force: bool = False) -> None: """Loading data for map with country map""" tbl_name = "birth_france_by_region" database = utils.get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): csv_bytes = get_example_data("birth_france_data_for_country_map.csv", make_bytes=False) data = pd.read_csv(csv_bytes, encoding="utf-8") data["dttm"] = datetime.datetime.now().date() data.to_sql( # pylint: disable=no-member tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=500, dtype={ "DEPT_ID": String(10), "2003": BigInteger, "2004": BigInteger, "2005": BigInteger, "2006": BigInteger, "2007": BigInteger, "2008": BigInteger, "2009": BigInteger, "2010": BigInteger, "2011": BigInteger, "2012": BigInteger, "2013": BigInteger, "2014": BigInteger, "dttm": Date(), }, index=False, ) print("Done loading table!") print("-" * 80) print("Creating table reference") obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: obj = TBL(table_name=tbl_name) obj.main_dttm_col = "dttm" obj.database = database if not any(col.metric_name == "avg__2004" for col in obj.metrics): col = str(column("2004").compile(db.engine)) obj.metrics.append( SqlMetric(metric_name="avg__2004", expression=f"AVG({col})")) db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { "granularity_sqla": "", "since": "", "until": "", "viz_type": "country_map", "entity": "DEPT_ID", "metric": { "expressionType": "SIMPLE", "column": { "type": "INT", "column_name": "2004" }, "aggregate": "AVG", "label": "Boys", "optionName": "metric_112342", }, "row_limit": 500000, } print("Creating a slice") slc = Slice( slice_name="Birth in France by department in 2016", viz_type="country_map", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_energy(only_metadata: bool = False, force: bool = False, sample: bool = False) -> None: """Loads an energy related dataset to use with sankey and graphs""" tbl_name = "energy_usage" database = utils.get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): data = get_example_data("energy.json.gz") pdf = pd.read_json(data) pdf = pdf.head(100) if sample else pdf pdf.to_sql( tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=500, dtype={ "source": String(255), "target": String(255), "value": Float() }, index=False, method="multi", ) print("Creating table [wb_health_population] reference") table = get_table_connector_registry() tbl = db.session.query(table).filter_by(table_name=tbl_name).first() if not tbl: tbl = table(table_name=tbl_name) tbl.description = "Energy consumption" tbl.database = database tbl.filter_select_enabled = True if not any(col.metric_name == "sum__value" for col in tbl.metrics): col = str(column("value").compile(db.engine)) tbl.metrics.append( SqlMetric(metric_name="sum__value", expression=f"SUM({col})")) db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() slc = Slice( slice_name="Energy Sankey", viz_type="sankey", datasource_type="table", datasource_id=tbl.id, params=textwrap.dedent("""\ { "collapsed_fieldsets": "", "groupby": [ "source", "target" ], "metric": "sum__value", "row_limit": "5000", "slice_name": "Energy Sankey", "viz_type": "sankey" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc) slc = Slice( slice_name="Energy Force Layout", viz_type="graph_chart", datasource_type="table", datasource_id=tbl.id, params=textwrap.dedent("""\ { "source": "source", "target": "target", "edgeLength": 400, "repulsion": 1000, "layout": "force", "metric": "sum__value", "row_limit": "5000", "slice_name": "Force", "viz_type": "graph_chart" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc) slc = Slice( slice_name="Heatmap", viz_type="heatmap", datasource_type="table", datasource_id=tbl.id, params=textwrap.dedent("""\ { "all_columns_x": "source", "all_columns_y": "target", "canvas_image_rendering": "pixelated", "collapsed_fieldsets": "", "linear_color_scheme": "blue_white_yellow", "metric": "sum__value", "normalize_across": "heatmap", "slice_name": "Heatmap", "viz_type": "heatmap", "xscale_interval": "1", "yscale_interval": "1" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_birth_names(only_metadata=False, force=False): """Loading birth name dataset from a zip file in the repo""" # pylint: disable=too-many-locals tbl_name = "birth_names" database = get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): pdf = pd.read_json(get_example_data("birth_names.json.gz")) pdf.ds = pd.to_datetime(pdf.ds, unit="ms") pdf.to_sql( tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=500, dtype={ "ds": DateTime, "gender": String(16), "state": String(10), "name": String(255), }, index=False, ) print("Done loading table!") print("-" * 80) obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: print(f"Creating table [{tbl_name}] reference") obj = TBL(table_name=tbl_name) db.session.add(obj) obj.main_dttm_col = "ds" obj.database = database obj.filter_select_enabled = True if not any(col.column_name == "num_california" for col in obj.columns): col_state = str(column("state").compile(db.engine)) col_num = str(column("num").compile(db.engine)) obj.columns.append( TableColumn( column_name="num_california", expression= f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END", )) if not any(col.metric_name == "sum__num" for col in obj.metrics): col = str(column("num").compile(db.engine)) obj.metrics.append( SqlMetric(metric_name="sum__num", expression=f"SUM({col})")) db.session.commit() obj.fetch_metadata() tbl = obj defaults = { "compare_lag": "10", "compare_suffix": "o10Y", "limit": "25", "granularity_sqla": "ds", "groupby": [], "metric": "sum__num", "metrics": [{ "expressionType": "SIMPLE", "column": { "column_name": "num", "type": "BIGINT" }, "aggregate": "SUM", "label": "Births", "optionName": "metric_11", }], "row_limit": config.get("ROW_LIMIT"), "since": "100 years ago", "until": "now", "viz_type": "table", "where": "", "markup_type": "markdown", } admin = security_manager.find_user("admin") print("Creating some slices") slices = [ Slice( slice_name="Girls", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], filters=[{ "col": "gender", "op": "in", "val": ["girl"] }], row_limit=50, timeseries_limit_metric="sum__num", ), ), Slice( slice_name="Boys", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], filters=[{ "col": "gender", "op": "in", "val": ["boy"] }], row_limit=50, ), ), Slice( slice_name="Participants", viz_type="big_number", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="big_number", granularity_sqla="ds", compare_lag="5", compare_suffix="over 5Y", ), ), Slice( slice_name="Genders", viz_type="pie", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="pie", groupby=["gender"]), ), Slice( slice_name="Genders by State", viz_type="dist_bar", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, adhoc_filters=[{ "clause": "WHERE", "expressionType": "SIMPLE", "filterOptionName": "2745eae5", "comparator": ["other"], "operator": "not in", "subject": "state", }], viz_type="dist_bar", metrics=[ { "expressionType": "SIMPLE", "column": { "column_name": "sum_boys", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Boys", "optionName": "metric_11", }, { "expressionType": "SIMPLE", "column": { "column_name": "sum_girls", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Girls", "optionName": "metric_12", }, ], groupby=["state"], ), ), Slice( slice_name="Trends", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="line", groupby=["name"], granularity_sqla="ds", rich_tooltip=True, show_legend=True, ), ), Slice( slice_name="Average and Sum Trends", viz_type="dual_line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="dual_line", metric={ "expressionType": "SIMPLE", "column": { "column_name": "num", "type": "BIGINT(20)" }, "aggregate": "AVG", "label": "AVG(num)", "optionName": "metric_vgops097wej_g8uff99zhk7", }, metric_2="sum__num", granularity_sqla="ds", ), ), Slice( slice_name="Title", viz_type="markup", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="markup", markup_type="html", code="""\ <div style='text-align:center'> <h1>Birth Names Dashboard</h1> <p> The source dataset came from <a href='https://github.com/hadley/babynames' target='_blank'>[here]</a> </p> <img src="/static/assets/images/babies.png"> </div> """, ), ), Slice( slice_name="Name Cloud", viz_type="word_cloud", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="word_cloud", size_from="10", series="name", size_to="70", rotation="square", limit="100", ), ), Slice( slice_name="Pivot Table", viz_type="pivot_table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="pivot_table", groupby=["name"], columns=["state"]), ), Slice( slice_name="Number of Girls", viz_type="big_number_total", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="big_number_total", granularity_sqla="ds", filters=[{ "col": "gender", "op": "in", "val": ["girl"] }], subheader="total female participants", ), ), Slice( slice_name="Number of California Births", viz_type="big_number_total", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, viz_type="big_number_total", granularity_sqla="ds", ), ), Slice( slice_name="Top 10 California Names Timeseries", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metrics=[{ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }], viz_type="line", granularity_sqla="ds", groupby=["name"], timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, limit="10", ), ), Slice( slice_name="Names Sorted by Num in California", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], row_limit=50, timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, ), ), Slice( slice_name="Num Births Trend", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="line"), ), Slice( slice_name="Daily Totals", viz_type="table", datasource_type="table", datasource_id=tbl.id, created_by=admin, params=get_slice_json( defaults, groupby=["ds"], since="40 years ago", until="now", viz_type="table", ), ), ] for slc in slices: merge_slice(slc) print("Creating a dashboard") dash = db.session.query(Dash).filter_by(slug="births").first() if not dash: dash = Dash() db.session.add(dash) dash.published = True js = textwrap.dedent( # pylint: disable=line-too-long """\ { "CHART-0dd270f0": { "meta": { "chartId": 51, "width": 2, "height": 50 }, "type": "CHART", "id": "CHART-0dd270f0", "children": [] }, "CHART-a3c21bcc": { "meta": { "chartId": 52, "width": 2, "height": 50 }, "type": "CHART", "id": "CHART-a3c21bcc", "children": [] }, "CHART-976960a5": { "meta": { "chartId": 53, "width": 2, "height": 25 }, "type": "CHART", "id": "CHART-976960a5", "children": [] }, "CHART-58575537": { "meta": { "chartId": 54, "width": 2, "height": 25 }, "type": "CHART", "id": "CHART-58575537", "children": [] }, "CHART-e9cd8f0b": { "meta": { "chartId": 55, "width": 8, "height": 38 }, "type": "CHART", "id": "CHART-e9cd8f0b", "children": [] }, "CHART-e440d205": { "meta": { "chartId": 56, "width": 8, "height": 50 }, "type": "CHART", "id": "CHART-e440d205", "children": [] }, "CHART-59444e0b": { "meta": { "chartId": 57, "width": 3, "height": 38 }, "type": "CHART", "id": "CHART-59444e0b", "children": [] }, "CHART-e2cb4997": { "meta": { "chartId": 59, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-e2cb4997", "children": [] }, "CHART-e8774b49": { "meta": { "chartId": 60, "width": 12, "height": 50 }, "type": "CHART", "id": "CHART-e8774b49", "children": [] }, "CHART-985bfd1e": { "meta": { "chartId": 61, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-985bfd1e", "children": [] }, "CHART-17f13246": { "meta": { "chartId": 62, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-17f13246", "children": [] }, "CHART-729324f6": { "meta": { "chartId": 63, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-729324f6", "children": [] }, "COLUMN-25a865d6": { "meta": { "width": 4, "background": "BACKGROUND_TRANSPARENT" }, "type": "COLUMN", "id": "COLUMN-25a865d6", "children": [ "ROW-cc97c6ac", "CHART-e2cb4997" ] }, "COLUMN-4557b6ba": { "meta": { "width": 8, "background": "BACKGROUND_TRANSPARENT" }, "type": "COLUMN", "id": "COLUMN-4557b6ba", "children": [ "ROW-d2e78e59", "CHART-e9cd8f0b" ] }, "GRID_ID": { "type": "GRID", "id": "GRID_ID", "children": [ "ROW-8515ace3", "ROW-1890385f", "ROW-f0b64094", "ROW-be9526b8" ] }, "HEADER_ID": { "meta": { "text": "Births" }, "type": "HEADER", "id": "HEADER_ID" }, "MARKDOWN-00178c27": { "meta": { "width": 5, "code": "<div style=\\"text-align:center\\">\\n <h1>Birth Names Dashboard</h1>\\n <p>\\n The source dataset came from\\n <a href=\\"https://github.com/hadley/babynames\\" target=\\"_blank\\">[here]</a>\\n </p>\\n <img src=\\"/static/assets/images/babies.png\\" style=\\"width:55%;\\">\\n</div>\\n", "height": 38 }, "type": "MARKDOWN", "id": "MARKDOWN-00178c27", "children": [] }, "ROOT_ID": { "type": "ROOT", "id": "ROOT_ID", "children": [ "GRID_ID" ] }, "ROW-1890385f": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-1890385f", "children": [ "CHART-e440d205", "CHART-0dd270f0", "CHART-a3c21bcc" ] }, "ROW-8515ace3": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-8515ace3", "children": [ "COLUMN-25a865d6", "COLUMN-4557b6ba" ] }, "ROW-be9526b8": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-be9526b8", "children": [ "CHART-985bfd1e", "CHART-17f13246", "CHART-729324f6" ] }, "ROW-cc97c6ac": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-cc97c6ac", "children": [ "CHART-976960a5", "CHART-58575537" ] }, "ROW-d2e78e59": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-d2e78e59", "children": [ "MARKDOWN-00178c27", "CHART-59444e0b" ] }, "ROW-f0b64094": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-f0b64094", "children": [ "CHART-e8774b49" ] }, "DASHBOARD_VERSION_KEY": "v2" } """ # pylint: enable=line-too-long ) pos = json.loads(js) # dashboard v2 doesn't allow add markup slice dash.slices = [slc for slc in slices if slc.viz_type != "markup"] update_slice_ids(pos, dash.slices) dash.dashboard_title = "Births" dash.position_json = json.dumps(pos, indent=4) dash.slug = "births" db.session.commit()
def load_world_bank_health_n_pop(only_metadata=False, force=False): # pylint: disable=too-many-locals """Loads the world bank health dataset, slices and a dashboard""" tbl_name = "wb_health_population" database = utils.get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): data = get_example_data("countries.json.gz") pdf = pd.read_json(data) pdf.columns = [col.replace(".", "_") for col in pdf.columns] pdf.year = pd.to_datetime(pdf.year) pdf.to_sql( tbl_name, database.get_sqla_engine(), if_exists="replace", chunksize=50, dtype={ "year": DateTime(), "country_code": String(3), "country_name": String(255), "region": String(255), }, index=False, ) print("Creating table [wb_health_population] reference") tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = utils.readfile( os.path.join(EXAMPLES_FOLDER, "countries.md")) tbl.main_dttm_col = "year" tbl.database = database tbl.filter_select_enabled = True metrics = [ "sum__SP_POP_TOTL", "sum__SH_DYN_AIDS", "sum__SH_DYN_AIDS", "sum__SP_RUR_TOTL_ZS", "sum__SP_DYN_LE00_IN", "sum__SP_RUR_TOTL", ] for metric in metrics: if not any(col.metric_name == metric for col in tbl.metrics): aggr_func = metric[:3] col = str(column(metric[5:]).compile(db.engine)) tbl.metrics.append( SqlMetric(metric_name=metric, expression=f"{aggr_func}({col})")) db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() defaults = { "compare_lag": "10", "compare_suffix": "o10Y", "limit": "25", "granularity_sqla": "year", "groupby": [], "metric": "sum__SP_POP_TOTL", "metrics": ["sum__SP_POP_TOTL"], "row_limit": config["ROW_LIMIT"], "since": "2014-01-01", "until": "2014-01-02", "time_range": "2014-01-01 : 2014-01-02", "markup_type": "markdown", "country_fieldtype": "cca3", "secondary_metric": { "aggregate": "SUM", "column": { "column_name": "SP_RUR_TOTL", "optionName": "_col_SP_RUR_TOTL", "type": "DOUBLE", }, "expressionType": "SIMPLE", "hasCustomLabel": True, "label": "Rural Population", }, "entity": "country_code", "show_bubbles": True, } print("Creating slices") slices = [ Slice( slice_name="Region Filter", viz_type="filter_box", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="filter_box", date_filter=False, filter_configs=[ { "asc": False, "clearable": True, "column": "region", "key": "2s98dfu", "metric": "sum__SP_POP_TOTL", "multiple": True, }, { "asc": False, "clearable": True, "key": "li3j2lk", "column": "country_name", "metric": "sum__SP_POP_TOTL", "multiple": True, }, ], ), ), Slice( slice_name="World's Population", viz_type="big_number", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="2000", viz_type="big_number", compare_lag="10", metric="sum__SP_POP_TOTL", compare_suffix="over 10Y", ), ), Slice( slice_name="Most Populated Countries", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="table", metrics=["sum__SP_POP_TOTL"], groupby=["country_name"], ), ), Slice( slice_name="Growth Rate", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="line", since="1960-01-01", metrics=["sum__SP_POP_TOTL"], num_period_compare="10", groupby=["country_name"], ), ), Slice( slice_name="% Rural", viz_type="world_map", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="world_map", metric="sum__SP_RUR_TOTL_ZS", num_period_compare="10", ), ), Slice( slice_name="Life Expectancy VS Rural %", viz_type="bubble", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="bubble", since="2011-01-01", until="2011-01-02", series="region", limit=0, entity="country_name", x="sum__SP_RUR_TOTL_ZS", y="sum__SP_DYN_LE00_IN", size="sum__SP_POP_TOTL", max_bubble_size="50", adhoc_filters=[{ "clause": "WHERE", "expressionType": "SIMPLE", "filterOptionName": "2745eae5", "comparator": [ "TCA", "MNP", "DMA", "MHL", "MCO", "SXM", "CYM", "TUV", "IMY", "KNA", "ASM", "ADO", "AMA", "PLW", ], "operator": "not in", "subject": "country_code", }], ), ), Slice( slice_name="Rural Breakdown", viz_type="sunburst", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="sunburst", groupby=["region", "country_name"], since="2011-01-01", until="2011-01-01", ), ), Slice( slice_name="World's Pop Growth", viz_type="area", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="1960-01-01", until="now", viz_type="area", groupby=["region"], ), ), Slice( slice_name="Box plot", viz_type="box_plot", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="1960-01-01", until="now", whisker_options="Min/max (no outliers)", x_ticks_layout="staggered", viz_type="box_plot", groupby=["region"], ), ), Slice( slice_name="Treemap", viz_type="treemap", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="1960-01-01", until="now", viz_type="treemap", metrics=["sum__SP_POP_TOTL"], groupby=["region", "country_code"], ), ), Slice( slice_name="Parallel Coordinates", viz_type="para", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="2011-01-01", until="2011-01-01", viz_type="para", limit=100, metrics=[ "sum__SP_POP_TOTL", "sum__SP_RUR_TOTL_ZS", "sum__SH_DYN_AIDS" ], secondary_metric="sum__SP_POP_TOTL", series="country_name", ), ), ] misc_dash_slices.add(slices[-1].slice_name) for slc in slices: merge_slice(slc) print("Creating a World's Health Bank dashboard") dash_name = "World Bank's Data" slug = "world_health" dash = db.session.query(Dashboard).filter_by(slug=slug).first() if not dash: dash = Dashboard() dash.published = True js = textwrap.dedent("""\ { "CHART-36bfc934": { "children": [], "id": "CHART-36bfc934", "meta": { "chartId": 40, "height": 25, "sliceName": "Region Filter", "width": 2 }, "type": "CHART" }, "CHART-37982887": { "children": [], "id": "CHART-37982887", "meta": { "chartId": 41, "height": 25, "sliceName": "World's Population", "width": 2 }, "type": "CHART" }, "CHART-17e0f8d8": { "children": [], "id": "CHART-17e0f8d8", "meta": { "chartId": 42, "height": 92, "sliceName": "Most Populated Countries", "width": 3 }, "type": "CHART" }, "CHART-2ee52f30": { "children": [], "id": "CHART-2ee52f30", "meta": { "chartId": 43, "height": 38, "sliceName": "Growth Rate", "width": 6 }, "type": "CHART" }, "CHART-2d5b6871": { "children": [], "id": "CHART-2d5b6871", "meta": { "chartId": 44, "height": 52, "sliceName": "% Rural", "width": 7 }, "type": "CHART" }, "CHART-0fd0d252": { "children": [], "id": "CHART-0fd0d252", "meta": { "chartId": 45, "height": 50, "sliceName": "Life Expectancy VS Rural %", "width": 8 }, "type": "CHART" }, "CHART-97f4cb48": { "children": [], "id": "CHART-97f4cb48", "meta": { "chartId": 46, "height": 38, "sliceName": "Rural Breakdown", "width": 3 }, "type": "CHART" }, "CHART-b5e05d6f": { "children": [], "id": "CHART-b5e05d6f", "meta": { "chartId": 47, "height": 50, "sliceName": "World's Pop Growth", "width": 4 }, "type": "CHART" }, "CHART-e76e9f5f": { "children": [], "id": "CHART-e76e9f5f", "meta": { "chartId": 48, "height": 50, "sliceName": "Box plot", "width": 4 }, "type": "CHART" }, "CHART-a4808bba": { "children": [], "id": "CHART-a4808bba", "meta": { "chartId": 49, "height": 50, "sliceName": "Treemap", "width": 8 }, "type": "CHART" }, "COLUMN-071bbbad": { "children": [ "ROW-1e064e3c", "ROW-afdefba9" ], "id": "COLUMN-071bbbad", "meta": { "background": "BACKGROUND_TRANSPARENT", "width": 9 }, "type": "COLUMN" }, "COLUMN-fe3914b8": { "children": [ "CHART-36bfc934", "CHART-37982887" ], "id": "COLUMN-fe3914b8", "meta": { "background": "BACKGROUND_TRANSPARENT", "width": 2 }, "type": "COLUMN" }, "GRID_ID": { "children": [ "ROW-46632bc2", "ROW-3fa26c5d", "ROW-812b3f13" ], "id": "GRID_ID", "type": "GRID" }, "HEADER_ID": { "id": "HEADER_ID", "meta": { "text": "World's Bank Data" }, "type": "HEADER" }, "ROOT_ID": { "children": [ "GRID_ID" ], "id": "ROOT_ID", "type": "ROOT" }, "ROW-1e064e3c": { "children": [ "COLUMN-fe3914b8", "CHART-2d5b6871" ], "id": "ROW-1e064e3c", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-3fa26c5d": { "children": [ "CHART-b5e05d6f", "CHART-0fd0d252" ], "id": "ROW-3fa26c5d", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-46632bc2": { "children": [ "COLUMN-071bbbad", "CHART-17e0f8d8" ], "id": "ROW-46632bc2", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-812b3f13": { "children": [ "CHART-a4808bba", "CHART-e76e9f5f" ], "id": "ROW-812b3f13", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-afdefba9": { "children": [ "CHART-2ee52f30", "CHART-97f4cb48" ], "id": "ROW-afdefba9", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "DASHBOARD_VERSION_KEY": "v2" } """) pos = json.loads(js) update_slice_ids(pos, slices) dash.dashboard_title = dash_name dash.position_json = json.dumps(pos, indent=4) dash.slug = slug dash.slices = slices[:-1] db.session.merge(dash) db.session.commit()
def load_birth_names(only_metadata: bool = False, force: bool = False, sample: bool = False) -> None: """Loading birth name dataset from a zip file in the repo""" # pylint: disable=too-many-locals tbl_name = "birth_names" database = get_example_database() table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): load_data(tbl_name, database, sample=sample) obj = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not obj: print(f"Creating table [{tbl_name}] reference") obj = TBL(table_name=tbl_name) db.session.add(obj) obj.main_dttm_col = "ds" obj.database = database obj.filter_select_enabled = True if not any(col.column_name == "num_california" for col in obj.columns): col_state = str(column("state").compile(db.engine)) col_num = str(column("num").compile(db.engine)) obj.columns.append( TableColumn( column_name="num_california", expression= f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END", )) if not any(col.metric_name == "sum__num" for col in obj.metrics): col = str(column("num").compile(db.engine)) obj.metrics.append( SqlMetric(metric_name="sum__num", expression=f"SUM({col})")) db.session.commit() obj.fetch_metadata() tbl = obj metrics = [{ "expressionType": "SIMPLE", "column": { "column_name": "num", "type": "BIGINT" }, "aggregate": "SUM", "label": "Births", "optionName": "metric_11", }] metric = "sum__num" defaults = { "compare_lag": "10", "compare_suffix": "o10Y", "limit": "25", "granularity_sqla": "ds", "groupby": [], "row_limit": config["ROW_LIMIT"], "since": "100 years ago", "until": "now", "viz_type": "table", "markup_type": "markdown", } admin = security_manager.find_user("admin") print("Creating some slices") slices = [ Slice( slice_name="Participants", viz_type="big_number", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="big_number", granularity_sqla="ds", compare_lag="5", compare_suffix="over 5Y", metric=metric, ), ), Slice( slice_name="Genders", viz_type="pie", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="pie", groupby=["gender"], metric=metric), ), Slice( slice_name="Trends", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="line", groupby=["name"], granularity_sqla="ds", rich_tooltip=True, show_legend=True, metrics=metrics, ), ), Slice( slice_name="Genders by State", viz_type="dist_bar", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, adhoc_filters=[{ "clause": "WHERE", "expressionType": "SIMPLE", "filterOptionName": "2745eae5", "comparator": ["other"], "operator": "NOT IN", "subject": "state", }], viz_type="dist_bar", metrics=[ { "expressionType": "SIMPLE", "column": { "column_name": "sum_boys", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Boys", "optionName": "metric_11", }, { "expressionType": "SIMPLE", "column": { "column_name": "sum_girls", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Girls", "optionName": "metric_12", }, ], groupby=["state"], ), ), Slice( slice_name="Girls", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], adhoc_filters=[gen_filter("gender", "girl")], row_limit=50, timeseries_limit_metric="sum__num", metrics=metrics, ), ), Slice( slice_name="Girl Name Cloud", viz_type="word_cloud", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="word_cloud", size_from="10", series="name", size_to="70", rotation="square", limit="100", adhoc_filters=[gen_filter("gender", "girl")], metric=metric, ), ), Slice( slice_name="Boys", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], adhoc_filters=[gen_filter("gender", "boy")], row_limit=50, metrics=metrics, ), ), Slice( slice_name="Boy Name Cloud", viz_type="word_cloud", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="word_cloud", size_from="10", series="name", size_to="70", rotation="square", limit="100", adhoc_filters=[gen_filter("gender", "boy")], metric=metric, ), ), Slice( slice_name="Top 10 Girl Name Share", viz_type="area", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, adhoc_filters=[gen_filter("gender", "girl")], comparison_type="values", groupby=["name"], limit=10, stacked_style="expand", time_grain_sqla="P1D", viz_type="area", x_axis_forma="smart_date", metrics=metrics, ), ), Slice( slice_name="Top 10 Boy Name Share", viz_type="area", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, adhoc_filters=[gen_filter("gender", "boy")], comparison_type="values", groupby=["name"], limit=10, stacked_style="expand", time_grain_sqla="P1D", viz_type="area", x_axis_forma="smart_date", metrics=metrics, ), ), ] misc_slices = [ Slice( slice_name="Average and Sum Trends", viz_type="dual_line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="dual_line", metric={ "expressionType": "SIMPLE", "column": { "column_name": "num", "type": "BIGINT(20)" }, "aggregate": "AVG", "label": "AVG(num)", "optionName": "metric_vgops097wej_g8uff99zhk7", }, metric_2="sum__num", granularity_sqla="ds", metrics=metrics, ), ), Slice( slice_name="Num Births Trend", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="line", metrics=metrics), ), Slice( slice_name="Daily Totals", viz_type="table", datasource_type="table", datasource_id=tbl.id, created_by=admin, params=get_slice_json( defaults, groupby=["ds"], since="40 years ago", until="now", viz_type="table", metrics=metrics, ), ), Slice( slice_name="Number of California Births", viz_type="big_number_total", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, viz_type="big_number_total", granularity_sqla="ds", ), ), Slice( slice_name="Top 10 California Names Timeseries", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metrics=[{ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }], viz_type="line", granularity_sqla="ds", groupby=["name"], timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, limit="10", ), ), Slice( slice_name="Names Sorted by Num in California", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metrics=metrics, groupby=["name"], row_limit=50, timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, ), ), Slice( slice_name="Number of Girls", viz_type="big_number_total", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metric=metric, viz_type="big_number_total", granularity_sqla="ds", adhoc_filters=[gen_filter("gender", "girl")], subheader="total female participants", ), ), Slice( slice_name="Pivot Table", viz_type="pivot_table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="pivot_table", groupby=["name"], columns=["state"], metrics=metrics, ), ), ] for slc in slices: merge_slice(slc) for slc in misc_slices: merge_slice(slc) misc_dash_slices.add(slc.slice_name) print("Creating a dashboard") dash = db.session.query(Dashboard).filter_by(slug="births").first() if not dash: dash = Dashboard() db.session.add(dash) dash.published = True dash.json_metadata = textwrap.dedent("""\ { "label_colors": { "Girls": "#FF69B4", "Boys": "#ADD8E6", "girl": "#FF69B4", "boy": "#ADD8E6" } }""") js = textwrap.dedent( # pylint: disable=line-too-long """\ { "CHART-6GdlekVise": { "children": [], "id": "CHART-6GdlekVise", "meta": { "chartId": 5547, "height": 50, "sliceName": "Top 10 Girl Name Share", "width": 5 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-eh0w37bWbR" ], "type": "CHART" }, "CHART-6n9jxb30JG": { "children": [], "id": "CHART-6n9jxb30JG", "meta": { "chartId": 5540, "height": 36, "sliceName": "Genders by State", "width": 5 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW--EyBZQlDi" ], "type": "CHART" }, "CHART-Jj9qh1ol-N": { "children": [], "id": "CHART-Jj9qh1ol-N", "meta": { "chartId": 5545, "height": 50, "sliceName": "Boy Name Cloud", "width": 4 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-kzWtcvo8R1" ], "type": "CHART" }, "CHART-ODvantb_bF": { "children": [], "id": "CHART-ODvantb_bF", "meta": { "chartId": 5548, "height": 50, "sliceName": "Top 10 Boy Name Share", "width": 5 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-kzWtcvo8R1" ], "type": "CHART" }, "CHART-PAXUUqwmX9": { "children": [], "id": "CHART-PAXUUqwmX9", "meta": { "chartId": 5538, "height": 34, "sliceName": "Genders", "width": 3 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-2n0XgiHDgs" ], "type": "CHART" }, "CHART-_T6n_K9iQN": { "children": [], "id": "CHART-_T6n_K9iQN", "meta": { "chartId": 5539, "height": 36, "sliceName": "Trends", "width": 7 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW--EyBZQlDi" ], "type": "CHART" }, "CHART-eNY0tcE_ic": { "children": [], "id": "CHART-eNY0tcE_ic", "meta": { "chartId": 5537, "height": 34, "sliceName": "Participants", "width": 3 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-2n0XgiHDgs" ], "type": "CHART" }, "CHART-g075mMgyYb": { "children": [], "id": "CHART-g075mMgyYb", "meta": { "chartId": 5541, "height": 50, "sliceName": "Girls", "width": 3 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-eh0w37bWbR" ], "type": "CHART" }, "CHART-n-zGGE6S1y": { "children": [], "id": "CHART-n-zGGE6S1y", "meta": { "chartId": 5542, "height": 50, "sliceName": "Girl Name Cloud", "width": 4 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-eh0w37bWbR" ], "type": "CHART" }, "CHART-vJIPjmcbD3": { "children": [], "id": "CHART-vJIPjmcbD3", "meta": { "chartId": 5543, "height": 50, "sliceName": "Boys", "width": 3 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-kzWtcvo8R1" ], "type": "CHART" }, "DASHBOARD_VERSION_KEY": "v2", "GRID_ID": { "children": [ "ROW-2n0XgiHDgs", "ROW--EyBZQlDi", "ROW-eh0w37bWbR", "ROW-kzWtcvo8R1" ], "id": "GRID_ID", "parents": [ "ROOT_ID" ], "type": "GRID" }, "HEADER_ID": { "id": "HEADER_ID", "meta": { "text": "Births" }, "type": "HEADER" }, "MARKDOWN-zaflB60tbC": { "children": [], "id": "MARKDOWN-zaflB60tbC", "meta": { "code": "<div style=\\"text-align:center\\"> <h1>Birth Names Dashboard</h1> <img src=\\"/static/assets/images/babies.png\\" style=\\"width:50%;\\"></div>", "height": 34, "width": 6 }, "parents": [ "ROOT_ID", "GRID_ID", "ROW-2n0XgiHDgs" ], "type": "MARKDOWN" }, "ROOT_ID": { "children": [ "GRID_ID" ], "id": "ROOT_ID", "type": "ROOT" }, "ROW--EyBZQlDi": { "children": [ "CHART-_T6n_K9iQN", "CHART-6n9jxb30JG" ], "id": "ROW--EyBZQlDi", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "parents": [ "ROOT_ID", "GRID_ID" ], "type": "ROW" }, "ROW-2n0XgiHDgs": { "children": [ "CHART-eNY0tcE_ic", "MARKDOWN-zaflB60tbC", "CHART-PAXUUqwmX9" ], "id": "ROW-2n0XgiHDgs", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "parents": [ "ROOT_ID", "GRID_ID" ], "type": "ROW" }, "ROW-eh0w37bWbR": { "children": [ "CHART-g075mMgyYb", "CHART-n-zGGE6S1y", "CHART-6GdlekVise" ], "id": "ROW-eh0w37bWbR", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "parents": [ "ROOT_ID", "GRID_ID" ], "type": "ROW" }, "ROW-kzWtcvo8R1": { "children": [ "CHART-vJIPjmcbD3", "CHART-Jj9qh1ol-N", "CHART-ODvantb_bF" ], "id": "ROW-kzWtcvo8R1", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "parents": [ "ROOT_ID", "GRID_ID" ], "type": "ROW" } } """ # pylint: enable=line-too-long ) pos = json.loads(js) # dashboard v2 doesn't allow add markup slice dash.slices = [slc for slc in slices if slc.viz_type != "markup"] update_slice_ids(pos, dash.slices) dash.dashboard_title = "USA Births Names" dash.position_json = json.dumps(pos, indent=4) dash.slug = "births" db.session.commit()
def test_create_virtual_sqlatable(mocker: MockFixture, app_context: None, session: Session) -> None: """ Test shadow write when creating a new ``SqlaTable``. When a new virtual ``SqlaTable`` is created, new models should also be created for ``Dataset`` and ``Column``. """ # patch session mocker.patch("superset.security.SupersetSecurityManager.get_session", return_value=session) from superset.columns.models import Column from superset.columns.schemas import ColumnSchema from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn from superset.datasets.models import Dataset from superset.datasets.schemas import DatasetSchema from superset.models.core import Database from superset.tables.models import Table engine = session.get_bind() Dataset.metadata.create_all(engine) # pylint: disable=no-member # create the ``Table`` that the virtual dataset points to database = Database(database_name="my_database", sqlalchemy_uri="sqlite://") table = Table( name="some_table", schema="my_schema", catalog=None, database=database, columns=[ Column(name="ds", is_temporal=True, type="TIMESTAMP"), Column(name="user_id", type="INTEGER"), Column(name="revenue", type="INTEGER"), Column(name="expenses", type="INTEGER"), ], ) session.add(table) session.commit() # create virtual dataset columns = [ TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"), TableColumn(column_name="user_id", type="INTEGER"), TableColumn(column_name="revenue", type="INTEGER"), TableColumn(column_name="expenses", type="INTEGER"), TableColumn(column_name="profit", type="INTEGER", expression="revenue-expenses"), ] metrics = [ SqlMetric(metric_name="cnt", expression="COUNT(*)"), ] sqla_table = SqlaTable( table_name="old_dataset", columns=columns, metrics=metrics, main_dttm_col="ds", default_endpoint= "https://www.youtube.com/watch?v=dQw4w9WgXcQ", # not used database=database, offset=-8, description="This is the description", is_featured=1, cache_timeout=3600, schema="my_schema", sql=""" SELECT ds, user_id, revenue, expenses, revenue - expenses AS profit FROM some_table""", params=json.dumps({ "remote_id": 64, "database_name": "examples", "import_time": 1606677834, }), perm=None, filter_select_enabled=1, fetch_values_predicate="foo IN (1, 2)", is_sqllab_view=0, # no longer used? template_params=json.dumps({"answer": "42"}), schema_perm=None, extra=json.dumps({"warning_markdown": "*WARNING*"}), ) session.add(sqla_table) session.flush() # ignore these keys when comparing results ignored_keys = {"created_on", "changed_on", "uuid"} # check that columns were created column_schema = ColumnSchema() column_schemas = [{ k: v for k, v in column_schema.dump(column).items() if k not in ignored_keys } for column in session.query(Column).all()] assert column_schemas == [ { "type": "TIMESTAMP", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": None, "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "ds", "is_physical": True, "changed_by": None, "is_temporal": True, "id": 1, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": None, "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "user_id", "is_physical": True, "changed_by": None, "is_temporal": False, "id": 2, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": None, "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "revenue", "is_physical": True, "changed_by": None, "is_temporal": False, "id": 3, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": None, "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "expenses", "is_physical": True, "changed_by": None, "is_temporal": False, "id": 4, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "TIMESTAMP", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": "ds", "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "ds", "is_physical": False, "changed_by": None, "is_temporal": True, "id": 5, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": "user_id", "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "user_id", "is_physical": False, "changed_by": None, "is_temporal": False, "id": 6, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": "revenue", "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "revenue", "is_physical": False, "changed_by": None, "is_temporal": False, "id": 7, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": "expenses", "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "expenses", "is_physical": False, "changed_by": None, "is_temporal": False, "id": 8, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "INTEGER", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": "revenue-expenses", "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "profit", "is_physical": False, "changed_by": None, "is_temporal": False, "id": 9, "is_aggregation": False, "external_url": None, "is_managed_externally": False, }, { "type": "Unknown", "is_additive": False, "extra_json": "{}", "is_partition": False, "expression": "COUNT(*)", "unit": None, "warning_text": None, "created_by": None, "is_increase_desired": True, "description": None, "is_spatial": False, "name": "cnt", "is_physical": False, "changed_by": None, "is_temporal": False, "id": 10, "is_aggregation": True, "external_url": None, "is_managed_externally": False, }, ] # check that dataset was created, and has a reference to the table dataset_schema = DatasetSchema() datasets = [{ k: v for k, v in dataset_schema.dump(dataset).items() if k not in ignored_keys } for dataset in session.query(Dataset).all()] assert datasets == [{ "id": 1, "sqlatable_id": 1, "name": "old_dataset", "changed_by": None, "created_by": None, "columns": [5, 6, 7, 8, 9, 10], "is_physical": False, "tables": [1], "extra_json": "{}", "external_url": None, "is_managed_externally": False, "expression": """ SELECT ds, user_id, revenue, expenses, revenue - expenses AS profit FROM some_table""", }]