def load_test_users_run(): """ Loads admin, alpha, and gamma user for testing purposes Syncs permissions for those users/roles """ if config.get('TESTING'): security_manager.sync_role_definitions() gamma_sqllab_role = security_manager.add_role('gamma_sqllab') for perm in security_manager.find_role('Gamma').permissions: security_manager.add_permission_role(gamma_sqllab_role, perm) utils.get_or_create_main_db() db_perm = utils.get_main_database(security_manager.get_session).perm security_manager.merge_perm('database_access', db_perm) db_pvm = security_manager.find_permission_view_menu( view_menu_name=db_perm, permission_name='database_access') gamma_sqllab_role.permissions.append(db_pvm) for perm in security_manager.find_role('sql_lab').permissions: security_manager.add_permission_role(gamma_sqllab_role, perm) admin = security_manager.find_user('admin') if not admin: security_manager.add_user( 'admin', 'admin', ' user', '*****@*****.**', security_manager.find_role('Admin'), password='******') gamma = security_manager.find_user('gamma') if not gamma: security_manager.add_user( 'gamma', 'gamma', 'user', '*****@*****.**', security_manager.find_role('Gamma'), password='******') gamma2 = security_manager.find_user('gamma2') if not gamma2: security_manager.add_user( 'gamma2', 'gamma2', 'user', '*****@*****.**', security_manager.find_role('Gamma'), password='******') gamma_sqllab_user = security_manager.find_user('gamma_sqllab') if not gamma_sqllab_user: security_manager.add_user( 'gamma_sqllab', 'gamma_sqllab', 'user', '*****@*****.**', gamma_sqllab_role, password='******') alpha = security_manager.find_user('alpha') if not alpha: security_manager.add_user( 'alpha', 'alpha', 'user', '*****@*****.**', security_manager.find_role('Alpha'), password='******') security_manager.get_session.commit()
def load_bart_lines(): tbl_name = 'bart_lines' content = get_example_data('bart-lines.json.gz') df = pd.read_json(content, encoding='latin-1') df['path_json'] = df.path.map(json.dumps) df['polyline'] = df.path.map(polyline.encode) del df['path'] df.to_sql( tbl_name, db.engine, if_exists='replace', chunksize=500, dtype={ 'color': String(255), 'name': String(255), 'polyline': Text, 'path_json': Text, }, index=False) print('Creating table {} reference'.format(tbl_name)) tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = 'BART lines' tbl.database = get_or_create_main_db() db.session.merge(tbl) db.session.commit() tbl.fetch_metadata()
def load_sf_population_polygons(): tbl_name = 'sf_population_polygons' with gzip.open(os.path.join(DATA_FOLDER, 'sf_population.json.gz')) as f: df = pd.read_json(f) df['contour'] = df.contour.map(json.dumps) df.to_sql( tbl_name, db.engine, if_exists='replace', chunksize=500, dtype={ 'zipcode': BigInteger, 'population': BigInteger, 'contour': Text, 'area': BigInteger, }, index=False) print('Creating table {} reference'.format(tbl_name)) tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = 'Population density of San Francisco' tbl.database = utils.get_or_create_main_db() db.session.merge(tbl) db.session.commit() tbl.fetch_metadata()
def load_paris_iris_geojson(): tbl_name = 'paris_iris_mapping' with gzip.open(os.path.join(DATA_FOLDER, 'paris_iris.json.gz')) as f: df = pd.read_json(f) df['features'] = df.features.map(json.dumps) df.to_sql( tbl_name, db.engine, if_exists='replace', chunksize=500, dtype={ 'color': String(255), 'name': String(255), 'features': Text, 'type': Text, }, index=False) print('Creating table {} reference'.format(tbl_name)) tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = 'Map of Paris' tbl.database = utils.get_or_create_main_db() db.session.merge(tbl) db.session.commit() tbl.fetch_metadata()
def load_random_time_series_data(): """Loading random time series data from a zip file in the repo""" with gzip.open(os.path.join(DATA_FOLDER, 'random_time_series.json.gz')) as f: pdf = pd.read_json(f) pdf.ds = pd.to_datetime(pdf.ds, unit='s') pdf.to_sql( 'random_time_series', db.engine, if_exists='replace', chunksize=500, dtype={ 'ds': DateTime, }, index=False) print('Done loading table!') print('-' * 80) print('Creating table [random_time_series] reference') obj = db.session.query(TBL).filter_by(table_name='random_time_series').first() if not obj: obj = TBL(table_name='random_time_series') obj.main_dttm_col = 'ds' obj.database = utils.get_or_create_main_db() db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { 'granularity_sqla': 'day', 'row_limit': config.get('ROW_LIMIT'), 'since': '1 year ago', 'until': 'now', 'metric': 'count', 'where': '', 'viz_type': 'cal_heatmap', 'domain_granularity': 'month', 'subdomain_granularity': 'day', } print('Creating a slice') slc = Slice( slice_name='Calendar Heatmap', viz_type='cal_heatmap', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc)
def load_flights(): """Loading random time series data from a zip file in the repo""" tbl_name = 'flights' with gzip.open(os.path.join(DATA_FOLDER, 'flight_data.csv.gz')) as f: pdf = pd.read_csv(f, encoding='latin-1') # Loading airports info to join and get lat/long with gzip.open(os.path.join(DATA_FOLDER, 'airports.csv.gz')) as f: airports = pd.read_csv(f, encoding='latin-1') airports = airports.set_index('IATA_CODE') pdf['ds'] = pdf.YEAR.map(str) + '-0' + pdf.MONTH.map(str) + '-0' + pdf.DAY.map(str) pdf.ds = pd.to_datetime(pdf.ds) del pdf['YEAR'] del pdf['MONTH'] del pdf['DAY'] pdf = pdf.join(airports, on='ORIGIN_AIRPORT', rsuffix='_ORIG') pdf = pdf.join(airports, on='DESTINATION_AIRPORT', rsuffix='_DEST') pdf.to_sql( tbl_name, db.engine, if_exists='replace', chunksize=500, dtype={ 'ds': DateTime, }, index=False) tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = 'Random set of flights in the US' tbl.database = utils.get_or_create_main_db() db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() print('Done loading table!')
def load_country_map_data(): """Loading data for map with country map""" csv_bytes = get_example_data( 'birth_france_data_for_country_map.csv', is_gzip=False, make_bytes=True) data = pd.read_csv(csv_bytes, encoding='utf-8') data['dttm'] = datetime.datetime.now().date() data.to_sql( # pylint: disable=no-member 'birth_france_by_region', db.engine, if_exists='replace', chunksize=500, dtype={ 'DEPT_ID': String(10), '2003': BigInteger, '2004': BigInteger, '2005': BigInteger, '2006': BigInteger, '2007': BigInteger, '2008': BigInteger, '2009': BigInteger, '2010': BigInteger, '2011': BigInteger, '2012': BigInteger, '2013': BigInteger, '2014': BigInteger, 'dttm': Date(), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table reference') obj = db.session.query(TBL).filter_by(table_name='birth_france_by_region').first() if not obj: obj = TBL(table_name='birth_france_by_region') obj.main_dttm_col = 'dttm' obj.database = utils.get_or_create_main_db() if not any(col.metric_name == 'avg__2004' for col in obj.metrics): col = str(column('2004').compile(db.engine)) obj.metrics.append(SqlMetric( metric_name='avg__2004', expression=f'AVG({col})', )) db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { 'granularity_sqla': '', 'since': '', 'until': '', 'where': '', 'viz_type': 'country_map', 'entity': 'DEPT_ID', 'metric': { 'expressionType': 'SIMPLE', 'column': { 'type': 'INT', 'column_name': '2004', }, 'aggregate': 'AVG', 'label': 'Boys', 'optionName': 'metric_112342', }, 'row_limit': 500000, } print('Creating a slice') slc = Slice( slice_name='Birth in France by department in 2016', viz_type='country_map', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(slice_data), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_long_lat_data(): """Loading lat/long data from a csv file in the repo""" data = get_example_data("san_francisco.csv.gz", make_bytes=True) pdf = pd.read_csv(data, encoding="utf-8") start = datetime.datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) pdf["datetime"] = [ start + datetime.timedelta(hours=i * 24 / (len(pdf) - 1)) for i in range(len(pdf)) ] pdf["occupancy"] = [random.randint(1, 6) for _ in range(len(pdf))] pdf["radius_miles"] = [random.uniform(1, 3) for _ in range(len(pdf))] pdf["geohash"] = pdf[["LAT", "LON"]].apply(lambda x: geohash.encode(*x), axis=1) pdf["delimited"] = pdf["LAT"].map(str).str.cat(pdf["LON"].map(str), sep=",") pdf.to_sql( # pylint: disable=no-member "long_lat", db.engine, if_exists="replace", chunksize=500, dtype={ "longitude": Float(), "latitude": Float(), "number": Float(), "street": String(100), "unit": String(10), "city": String(50), "district": String(50), "region": String(50), "postcode": Float(), "id": String(100), "datetime": DateTime(), "occupancy": Float(), "radius_miles": Float(), "geohash": String(12), "delimited": String(60), }, index=False, ) print("Done loading table!") print("-" * 80) print("Creating table reference") obj = db.session.query(TBL).filter_by(table_name="long_lat").first() if not obj: obj = TBL(table_name="long_lat") obj.main_dttm_col = "datetime" obj.database = utils.get_or_create_main_db() db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { "granularity_sqla": "day", "since": "2014-01-01", "until": "now", "where": "", "viz_type": "mapbox", "all_columns_x": "LON", "all_columns_y": "LAT", "mapbox_style": "mapbox://styles/mapbox/light-v9", "all_columns": ["occupancy"], "row_limit": 500000, } print("Creating a slice") slc = Slice( slice_name="Mapbox Long/Lat", viz_type="mapbox", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_unicode_test_data(): """Loading unicode test dataset from a csv file in the repo""" data = get_example_data( 'unicode_utf8_unixnl_test.csv', is_gzip=False, make_bytes=True) df = pd.read_csv(data, encoding='utf-8') # generate date/numeric data df['dttm'] = datetime.datetime.now().date() df['value'] = [random.randint(1, 100) for _ in range(len(df))] df.to_sql( # pylint: disable=no-member 'unicode_test', db.engine, if_exists='replace', chunksize=500, dtype={ 'phrase': String(500), 'short_phrase': String(10), 'with_missing': String(100), 'dttm': Date(), 'value': Float(), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table [unicode_test] reference') obj = db.session.query(TBL).filter_by(table_name='unicode_test').first() if not obj: obj = TBL(table_name='unicode_test') obj.main_dttm_col = 'dttm' obj.database = utils.get_or_create_main_db() db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { 'granularity_sqla': 'dttm', 'groupby': [], 'metric': 'sum__value', 'row_limit': config.get('ROW_LIMIT'), 'since': '100 years ago', 'until': 'now', 'where': '', 'viz_type': 'word_cloud', 'size_from': '10', 'series': 'short_phrase', 'size_to': '70', 'rotation': 'square', 'limit': '100', } print('Creating a slice') slc = Slice( slice_name='Unicode Cloud', viz_type='word_cloud', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc) print('Creating a dashboard') dash = ( db.session.query(Dash) .filter_by(dashboard_title='Unicode Test') .first() ) if not dash: dash = Dash() js = """\ { "CHART-Hkx6154FEm": { "children": [], "id": "CHART-Hkx6154FEm", "meta": { "chartId": 2225, "height": 30, "sliceName": "slice 1", "width": 4 }, "type": "CHART" }, "GRID_ID": { "children": [ "ROW-SyT19EFEQ" ], "id": "GRID_ID", "type": "GRID" }, "ROOT_ID": { "children": [ "GRID_ID" ], "id": "ROOT_ID", "type": "ROOT" }, "ROW-SyT19EFEQ": { "children": [ "CHART-Hkx6154FEm" ], "id": "ROW-SyT19EFEQ", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "DASHBOARD_VERSION_KEY": "v2" } """ dash.dashboard_title = 'Unicode Test' pos = json.loads(js) update_slice_ids(pos, [slc]) dash.position_json = json.dumps(pos, indent=4) dash.slug = 'unicode-test' dash.slices = [slc] db.session.merge(dash) db.session.commit()
def load_test_users_run(): """ Loads admin, alpha, and gamma user for testing purposes Syncs permissions for those users/roles """ if config.get("TESTING"): security_manager.sync_role_definitions() gamma_sqllab_role = security_manager.add_role("gamma_sqllab") for perm in security_manager.find_role("Gamma").permissions: security_manager.add_permission_role(gamma_sqllab_role, perm) utils.get_or_create_main_db() db_perm = utils.get_main_database().perm security_manager.add_permission_view_menu("database_access", db_perm) db_pvm = security_manager.find_permission_view_menu( view_menu_name=db_perm, permission_name="database_access" ) gamma_sqllab_role.permissions.append(db_pvm) for perm in security_manager.find_role("sql_lab").permissions: security_manager.add_permission_role(gamma_sqllab_role, perm) admin = security_manager.find_user("admin") if not admin: security_manager.add_user( "admin", "admin", " user", "*****@*****.**", security_manager.find_role("Admin"), password="******", ) gamma = security_manager.find_user("gamma") if not gamma: security_manager.add_user( "gamma", "gamma", "user", "*****@*****.**", security_manager.find_role("Gamma"), password="******", ) gamma2 = security_manager.find_user("gamma2") if not gamma2: security_manager.add_user( "gamma2", "gamma2", "user", "*****@*****.**", security_manager.find_role("Gamma"), password="******", ) gamma_sqllab_user = security_manager.find_user("gamma_sqllab") if not gamma_sqllab_user: security_manager.add_user( "gamma_sqllab", "gamma_sqllab", "user", "*****@*****.**", gamma_sqllab_role, password="******", ) alpha = security_manager.find_user("alpha") if not alpha: security_manager.add_user( "alpha", "alpha", "user", "*****@*****.**", security_manager.find_role("Alpha"), password="******", ) security_manager.get_session.commit()
def load_test_users_run(): """ Loads admin, alpha, and gamma user for testing purposes Syncs permissions for those users/roles """ if config.get('TESTING'): security_manager.sync_role_definitions() gamma_sqllab_role = security_manager.add_role('gamma_sqllab') for perm in security_manager.find_role('Gamma').permissions: security_manager.add_permission_role(gamma_sqllab_role, perm) utils.get_or_create_main_db() db_perm = utils.get_main_database(security_manager.get_session).perm security_manager.merge_perm('database_access', db_perm) db_pvm = security_manager.find_permission_view_menu( view_menu_name=db_perm, permission_name='database_access') gamma_sqllab_role.permissions.append(db_pvm) for perm in security_manager.find_role('sql_lab').permissions: security_manager.add_permission_role(gamma_sqllab_role, perm) admin = security_manager.find_user('admin') if not admin: security_manager.add_user('admin', 'admin', ' user', '*****@*****.**', security_manager.find_role('Admin'), password='******') gamma = security_manager.find_user('gamma') if not gamma: security_manager.add_user('gamma', 'gamma', 'user', '*****@*****.**', security_manager.find_role('Gamma'), password='******') gamma2 = security_manager.find_user('gamma2') if not gamma2: security_manager.add_user('gamma2', 'gamma2', 'user', '*****@*****.**', security_manager.find_role('Gamma'), password='******') gamma_sqllab_user = security_manager.find_user('gamma_sqllab') if not gamma_sqllab_user: security_manager.add_user('gamma_sqllab', 'gamma_sqllab', 'user', '*****@*****.**', gamma_sqllab_role, password='******') alpha = security_manager.find_user('alpha') if not alpha: security_manager.add_user('alpha', 'alpha', 'user', '*****@*****.**', security_manager.find_role('Alpha'), password='******') security_manager.get_session.commit()
def load_world_bank_health_n_pop(): """Loads the world bank health dataset, slices and a dashboard""" tbl_name = "wb_health_population" data = get_example_data("countries.json.gz") pdf = pd.read_json(data) pdf.columns = [col.replace(".", "_") for col in pdf.columns] pdf.year = pd.to_datetime(pdf.year) pdf.to_sql( tbl_name, db.engine, if_exists="replace", chunksize=50, dtype={ "year": DateTime(), "country_code": String(3), "country_name": String(255), "region": String(255), }, index=False, ) print("Creating table [wb_health_population] reference") tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = utils.readfile(os.path.join(DATA_FOLDER, "countries.md")) tbl.main_dttm_col = "year" tbl.database = utils.get_or_create_main_db() tbl.filter_select_enabled = True metrics = [ "sum__SP_POP_TOTL", "sum__SH_DYN_AIDS", "sum__SH_DYN_AIDS", "sum__SP_RUR_TOTL_ZS", "sum__SP_DYN_LE00_IN", "sum__SP_RUR_TOTL", ] for m in metrics: if not any(col.metric_name == m for col in tbl.metrics): aggr_func = m[:3] col = str(column(m[5:]).compile(db.engine)) tbl.metrics.append( SqlMetric(metric_name=m, expression=f"{aggr_func}({col})")) db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() defaults = { "compare_lag": "10", "compare_suffix": "o10Y", "limit": "25", "granularity_sqla": "year", "groupby": [], "metric": "sum__SP_POP_TOTL", "metrics": ["sum__SP_POP_TOTL"], "row_limit": config.get("ROW_LIMIT"), "since": "2014-01-01", "until": "2014-01-02", "time_range": "2014-01-01 : 2014-01-02", "where": "", "markup_type": "markdown", "country_fieldtype": "cca3", "secondary_metric": "sum__SP_POP_TOTL", "entity": "country_code", "show_bubbles": True, } print("Creating slices") slices = [ Slice( slice_name="Region Filter", viz_type="filter_box", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="filter_box", date_filter=False, filter_configs=[ { "asc": False, "clearable": True, "column": "region", "key": "2s98dfu", "metric": "sum__SP_POP_TOTL", "multiple": True, }, { "asc": False, "clearable": True, "key": "li3j2lk", "column": "country_name", "metric": "sum__SP_POP_TOTL", "multiple": True, }, ], ), ), Slice( slice_name="World's Population", viz_type="big_number", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="2000", viz_type="big_number", compare_lag="10", metric="sum__SP_POP_TOTL", compare_suffix="over 10Y", ), ), Slice( slice_name="Most Populated Countries", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="table", metrics=["sum__SP_POP_TOTL"], groupby=["country_name"], ), ), Slice( slice_name="Growth Rate", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="line", since="1960-01-01", metrics=["sum__SP_POP_TOTL"], num_period_compare="10", groupby=["country_name"], ), ), Slice( slice_name="% Rural", viz_type="world_map", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="world_map", metric="sum__SP_RUR_TOTL_ZS", num_period_compare="10", ), ), Slice( slice_name="Life Expectancy VS Rural %", viz_type="bubble", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="bubble", since="2011-01-01", until="2011-01-02", series="region", limit=0, entity="country_name", x="sum__SP_RUR_TOTL_ZS", y="sum__SP_DYN_LE00_IN", size="sum__SP_POP_TOTL", max_bubble_size="50", filters=[{ "col": "country_code", "val": [ "TCA", "MNP", "DMA", "MHL", "MCO", "SXM", "CYM", "TUV", "IMY", "KNA", "ASM", "ADO", "AMA", "PLW", ], "op": "not in", }], ), ), Slice( slice_name="Rural Breakdown", viz_type="sunburst", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="sunburst", groupby=["region", "country_name"], secondary_metric="sum__SP_RUR_TOTL", since="2011-01-01", until="2011-01-01", ), ), Slice( slice_name="World's Pop Growth", viz_type="area", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="1960-01-01", until="now", viz_type="area", groupby=["region"], ), ), Slice( slice_name="Box plot", viz_type="box_plot", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="1960-01-01", until="now", whisker_options="Min/max (no outliers)", x_ticks_layout="staggered", viz_type="box_plot", groupby=["region"], ), ), Slice( slice_name="Treemap", viz_type="treemap", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="1960-01-01", until="now", viz_type="treemap", metrics=["sum__SP_POP_TOTL"], groupby=["region", "country_code"], ), ), Slice( slice_name="Parallel Coordinates", viz_type="para", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, since="2011-01-01", until="2011-01-01", viz_type="para", limit=100, metrics=[ "sum__SP_POP_TOTL", "sum__SP_RUR_TOTL_ZS", "sum__SH_DYN_AIDS" ], secondary_metric="sum__SP_POP_TOTL", series="country_name", ), ), ] misc_dash_slices.add(slices[-1].slice_name) for slc in slices: merge_slice(slc) print("Creating a World's Health Bank dashboard") dash_name = "World's Bank Data" slug = "world_health" dash = db.session.query(Dash).filter_by(slug=slug).first() if not dash: dash = Dash() js = textwrap.dedent("""\ { "CHART-36bfc934": { "children": [], "id": "CHART-36bfc934", "meta": { "chartId": 40, "height": 25, "sliceName": "Region Filter", "width": 2 }, "type": "CHART" }, "CHART-37982887": { "children": [], "id": "CHART-37982887", "meta": { "chartId": 41, "height": 25, "sliceName": "World's Population", "width": 2 }, "type": "CHART" }, "CHART-17e0f8d8": { "children": [], "id": "CHART-17e0f8d8", "meta": { "chartId": 42, "height": 92, "sliceName": "Most Populated Countries", "width": 3 }, "type": "CHART" }, "CHART-2ee52f30": { "children": [], "id": "CHART-2ee52f30", "meta": { "chartId": 43, "height": 38, "sliceName": "Growth Rate", "width": 6 }, "type": "CHART" }, "CHART-2d5b6871": { "children": [], "id": "CHART-2d5b6871", "meta": { "chartId": 44, "height": 52, "sliceName": "% Rural", "width": 7 }, "type": "CHART" }, "CHART-0fd0d252": { "children": [], "id": "CHART-0fd0d252", "meta": { "chartId": 45, "height": 50, "sliceName": "Life Expectancy VS Rural %", "width": 8 }, "type": "CHART" }, "CHART-97f4cb48": { "children": [], "id": "CHART-97f4cb48", "meta": { "chartId": 46, "height": 38, "sliceName": "Rural Breakdown", "width": 3 }, "type": "CHART" }, "CHART-b5e05d6f": { "children": [], "id": "CHART-b5e05d6f", "meta": { "chartId": 47, "height": 50, "sliceName": "World's Pop Growth", "width": 4 }, "type": "CHART" }, "CHART-e76e9f5f": { "children": [], "id": "CHART-e76e9f5f", "meta": { "chartId": 48, "height": 50, "sliceName": "Box plot", "width": 4 }, "type": "CHART" }, "CHART-a4808bba": { "children": [], "id": "CHART-a4808bba", "meta": { "chartId": 49, "height": 50, "sliceName": "Treemap", "width": 8 }, "type": "CHART" }, "COLUMN-071bbbad": { "children": [ "ROW-1e064e3c", "ROW-afdefba9" ], "id": "COLUMN-071bbbad", "meta": { "background": "BACKGROUND_TRANSPARENT", "width": 9 }, "type": "COLUMN" }, "COLUMN-fe3914b8": { "children": [ "CHART-36bfc934", "CHART-37982887" ], "id": "COLUMN-fe3914b8", "meta": { "background": "BACKGROUND_TRANSPARENT", "width": 2 }, "type": "COLUMN" }, "GRID_ID": { "children": [ "ROW-46632bc2", "ROW-3fa26c5d", "ROW-812b3f13" ], "id": "GRID_ID", "type": "GRID" }, "HEADER_ID": { "id": "HEADER_ID", "meta": { "text": "World's Bank Data" }, "type": "HEADER" }, "ROOT_ID": { "children": [ "GRID_ID" ], "id": "ROOT_ID", "type": "ROOT" }, "ROW-1e064e3c": { "children": [ "COLUMN-fe3914b8", "CHART-2d5b6871" ], "id": "ROW-1e064e3c", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-3fa26c5d": { "children": [ "CHART-b5e05d6f", "CHART-0fd0d252" ], "id": "ROW-3fa26c5d", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-46632bc2": { "children": [ "COLUMN-071bbbad", "CHART-17e0f8d8" ], "id": "ROW-46632bc2", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-812b3f13": { "children": [ "CHART-a4808bba", "CHART-e76e9f5f" ], "id": "ROW-812b3f13", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-afdefba9": { "children": [ "CHART-2ee52f30", "CHART-97f4cb48" ], "id": "ROW-afdefba9", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "DASHBOARD_VERSION_KEY": "v2" } """) pos = json.loads(js) update_slice_ids(pos, slices) dash.dashboard_title = dash_name dash.position_json = json.dumps(pos, indent=4) dash.slug = slug dash.slices = slices[:-1] db.session.merge(dash) db.session.commit()
def load_multiformat_time_series(): """Loading time series data from a zip file in the repo""" data = get_example_data('multiformat_time_series.json.gz') pdf = pd.read_json(data) pdf.ds = pd.to_datetime(pdf.ds, unit='s') pdf.ds2 = pd.to_datetime(pdf.ds2, unit='s') pdf.to_sql( 'multiformat_time_series', db.engine, if_exists='replace', chunksize=500, dtype={ 'ds': Date, 'ds2': DateTime, 'epoch_s': BigInteger, 'epoch_ms': BigInteger, 'string0': String(100), 'string1': String(100), 'string2': String(100), 'string3': String(100), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table [multiformat_time_series] reference') obj = db.session.query(TBL).filter_by(table_name='multiformat_time_series').first() if not obj: obj = TBL(table_name='multiformat_time_series') obj.main_dttm_col = 'ds' obj.database = utils.get_or_create_main_db() dttm_and_expr_dict = { 'ds': [None, None], 'ds2': [None, None], 'epoch_s': ['epoch_s', None], 'epoch_ms': ['epoch_ms', None], 'string2': ['%Y%m%d-%H%M%S', None], 'string1': ['%Y-%m-%d^%H:%M:%S', None], 'string0': ['%Y-%m-%d %H:%M:%S.%f', None], 'string3': ['%Y/%m/%d%H:%M:%S.%f', None], } for col in obj.columns: dttm_and_expr = dttm_and_expr_dict[col.column_name] col.python_date_format = dttm_and_expr[0] col.dbatabase_expr = dttm_and_expr[1] col.is_dttm = True db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj print('Creating Heatmap charts') for i, col in enumerate(tbl.columns): slice_data = { 'metrics': ['count'], 'granularity_sqla': col.column_name, 'row_limit': config.get('ROW_LIMIT'), 'since': '2015', 'until': '2016', 'where': '', 'viz_type': 'cal_heatmap', 'domain_granularity': 'month', 'subdomain_granularity': 'day', } slc = Slice( slice_name=f'Calendar Heatmap multiformat {i}', viz_type='cal_heatmap', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc) misc_dash_slices.add('Calendar Heatmap multiformat 0')
def load_energy(): """Loads an energy related dataset to use with sankey and graphs""" tbl_name = 'energy_usage' with gzip.open(os.path.join(DATA_FOLDER, 'energy.json.gz')) as f: pdf = pd.read_json(f) pdf.to_sql(tbl_name, db.engine, if_exists='replace', chunksize=500, dtype={ 'source': String(255), 'target': String(255), 'value': Float(), }, index=False) print('Creating table [wb_health_population] reference') tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = 'Energy consumption' tbl.database = utils.get_or_create_main_db() if not any(col.metric_name == 'sum__value' for col in tbl.metrics): tbl.metrics.append( SqlMetric( metric_name='sum__value', expression='SUM(value)', )) db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() slc = Slice( slice_name='Energy Sankey', viz_type='sankey', datasource_type='table', datasource_id=tbl.id, params=textwrap.dedent("""\ { "collapsed_fieldsets": "", "groupby": [ "source", "target" ], "having": "", "metric": "sum__value", "row_limit": "5000", "slice_name": "Energy Sankey", "viz_type": "sankey", "where": "" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc) slc = Slice( slice_name='Energy Force Layout', viz_type='directed_force', datasource_type='table', datasource_id=tbl.id, params=textwrap.dedent("""\ { "charge": "-500", "collapsed_fieldsets": "", "groupby": [ "source", "target" ], "having": "", "link_length": "200", "metric": "sum__value", "row_limit": "5000", "slice_name": "Force", "viz_type": "directed_force", "where": "" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc) slc = Slice( slice_name='Heatmap', viz_type='heatmap', datasource_type='table', datasource_id=tbl.id, params=textwrap.dedent("""\ { "all_columns_x": "source", "all_columns_y": "target", "canvas_image_rendering": "pixelated", "collapsed_fieldsets": "", "having": "", "linear_color_scheme": "blue_white_yellow", "metric": "sum__value", "normalize_across": "heatmap", "slice_name": "Heatmap", "viz_type": "heatmap", "where": "", "xscale_interval": "1", "yscale_interval": "1" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_energy(): """Loads an energy related dataset to use with sankey and graphs""" tbl_name = 'energy_usage' with gzip.open(os.path.join(DATA_FOLDER, 'energy.json.gz')) as f: pdf = pd.read_json(f) pdf.to_sql( tbl_name, db.engine, if_exists='replace', chunksize=500, dtype={ 'source': String(255), 'target': String(255), 'value': Float(), }, index=False) print('Creating table [wb_health_population] reference') tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = 'Energy consumption' tbl.database = utils.get_or_create_main_db() db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() slc = Slice( slice_name='Energy Sankey', viz_type='sankey', datasource_type='table', datasource_id=tbl.id, params=textwrap.dedent("""\ { "collapsed_fieldsets": "", "groupby": [ "source", "target" ], "having": "", "metric": "sum__value", "row_limit": "5000", "slice_name": "Energy Sankey", "viz_type": "sankey", "where": "" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc) slc = Slice( slice_name='Energy Force Layout', viz_type='directed_force', datasource_type='table', datasource_id=tbl.id, params=textwrap.dedent("""\ { "charge": "-500", "collapsed_fieldsets": "", "groupby": [ "source", "target" ], "having": "", "link_length": "200", "metric": "sum__value", "row_limit": "5000", "slice_name": "Force", "viz_type": "directed_force", "where": "" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc) slc = Slice( slice_name='Heatmap', viz_type='heatmap', datasource_type='table', datasource_id=tbl.id, params=textwrap.dedent("""\ { "all_columns_x": "source", "all_columns_y": "target", "canvas_image_rendering": "pixelated", "collapsed_fieldsets": "", "having": "", "linear_color_scheme": "blue_white_yellow", "metric": "sum__value", "normalize_across": "heatmap", "slice_name": "Heatmap", "viz_type": "heatmap", "where": "", "xscale_interval": "1", "yscale_interval": "1" } """), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_country_map_data(): """Loading data for map with country map""" csv_bytes = get_example_data( 'birth_france_data_for_country_map.csv', is_gzip=False, make_bytes=True) data = pd.read_csv(csv_bytes, encoding='utf-8') data['dttm'] = datetime.datetime.now().date() data.to_sql( # pylint: disable=no-member 'birth_france_by_region', db.engine, if_exists='replace', chunksize=500, dtype={ 'DEPT_ID': String(10), '2003': BigInteger, '2004': BigInteger, '2005': BigInteger, '2006': BigInteger, '2007': BigInteger, '2008': BigInteger, '2009': BigInteger, '2010': BigInteger, '2011': BigInteger, '2012': BigInteger, '2013': BigInteger, '2014': BigInteger, 'dttm': Date(), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table reference') obj = db.session.query(TBL).filter_by(table_name='birth_france_by_region').first() if not obj: obj = TBL(table_name='birth_france_by_region') obj.main_dttm_col = 'dttm' obj.database = utils.get_or_create_main_db() if not any(col.metric_name == 'avg__2004' for col in obj.metrics): obj.metrics.append(SqlMetric( metric_name='avg__2004', expression='AVG(2004)', )) db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { 'granularity_sqla': '', 'since': '', 'until': '', 'where': '', 'viz_type': 'country_map', 'entity': 'DEPT_ID', 'metric': { 'expressionType': 'SIMPLE', 'column': { 'type': 'INT', 'column_name': '2004', }, 'aggregate': 'AVG', 'label': 'Boys', 'optionName': 'metric_112342', }, 'row_limit': 500000, } print('Creating a slice') slc = Slice( slice_name='Birth in France by department in 2016', viz_type='country_map', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(slice_data), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_multiformat_time_series(): """Loading time series data from a zip file in the repo""" data = get_example_data("multiformat_time_series.json.gz") pdf = pd.read_json(data) pdf.ds = pd.to_datetime(pdf.ds, unit="s") pdf.ds2 = pd.to_datetime(pdf.ds2, unit="s") pdf.to_sql( "multiformat_time_series", db.engine, if_exists="replace", chunksize=500, dtype={ "ds": Date, "ds2": DateTime, "epoch_s": BigInteger, "epoch_ms": BigInteger, "string0": String(100), "string1": String(100), "string2": String(100), "string3": String(100), }, index=False, ) print("Done loading table!") print("-" * 80) print("Creating table [multiformat_time_series] reference") obj = db.session.query(TBL).filter_by( table_name="multiformat_time_series").first() if not obj: obj = TBL(table_name="multiformat_time_series") obj.main_dttm_col = "ds" obj.database = utils.get_or_create_main_db() dttm_and_expr_dict = { "ds": [None, None], "ds2": [None, None], "epoch_s": ["epoch_s", None], "epoch_ms": ["epoch_ms", None], "string2": ["%Y%m%d-%H%M%S", None], "string1": ["%Y-%m-%d^%H:%M:%S", None], "string0": ["%Y-%m-%d %H:%M:%S.%f", None], "string3": ["%Y/%m/%d%H:%M:%S.%f", None], } for col in obj.columns: dttm_and_expr = dttm_and_expr_dict[col.column_name] col.python_date_format = dttm_and_expr[0] col.dbatabase_expr = dttm_and_expr[1] col.is_dttm = True db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj print("Creating Heatmap charts") for i, col in enumerate(tbl.columns): slice_data = { "metrics": ["count"], "granularity_sqla": col.column_name, "row_limit": config.get("ROW_LIMIT"), "since": "2015", "until": "2016", "where": "", "viz_type": "cal_heatmap", "domain_granularity": "month", "subdomain_granularity": "day", } slc = Slice( slice_name=f"Calendar Heatmap multiformat {i}", viz_type="cal_heatmap", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc) misc_dash_slices.add("Calendar Heatmap multiformat 0")
def init(): """Inits the Superset application""" utils.get_or_create_main_db() security_manager.sync_role_definitions()
def load_unicode_test_data(): """Loading unicode test dataset from a csv file in the repo""" data = get_example_data('unicode_utf8_unixnl_test.csv', is_gzip=False, make_bytes=True) df = pd.read_csv(data, encoding='utf-8') # generate date/numeric data df['dttm'] = datetime.datetime.now().date() df['value'] = [random.randint(1, 100) for _ in range(len(df))] df.to_sql( # pylint: disable=no-member 'unicode_test', db.engine, if_exists='replace', chunksize=500, dtype={ 'phrase': String(500), 'short_phrase': String(10), 'with_missing': String(100), 'dttm': Date(), 'value': Float(), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table [unicode_test] reference') obj = db.session.query(TBL).filter_by(table_name='unicode_test').first() if not obj: obj = TBL(table_name='unicode_test') obj.main_dttm_col = 'dttm' obj.database = utils.get_or_create_main_db() db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { 'granularity_sqla': 'dttm', 'groupby': [], 'metric': 'sum__value', 'row_limit': config.get('ROW_LIMIT'), 'since': '100 years ago', 'until': 'now', 'where': '', 'viz_type': 'word_cloud', 'size_from': '10', 'series': 'short_phrase', 'size_to': '70', 'rotation': 'square', 'limit': '100', } print('Creating a slice') slc = Slice( slice_name='Unicode Cloud', viz_type='word_cloud', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc) print('Creating a dashboard') dash = (db.session.query(Dash).filter_by( dashboard_title='Unicode Test').first()) if not dash: dash = Dash() js = """\ { "CHART-Hkx6154FEm": { "children": [], "id": "CHART-Hkx6154FEm", "meta": { "chartId": 2225, "height": 30, "sliceName": "slice 1", "width": 4 }, "type": "CHART" }, "GRID_ID": { "children": [ "ROW-SyT19EFEQ" ], "id": "GRID_ID", "type": "GRID" }, "ROOT_ID": { "children": [ "GRID_ID" ], "id": "ROOT_ID", "type": "ROOT" }, "ROW-SyT19EFEQ": { "children": [ "CHART-Hkx6154FEm" ], "id": "ROW-SyT19EFEQ", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "DASHBOARD_VERSION_KEY": "v2" } """ dash.dashboard_title = 'Unicode Test' pos = json.loads(js) update_slice_ids(pos, [slc]) dash.position_json = json.dumps(pos, indent=4) dash.slug = 'unicode-test' dash.slices = [slc] db.session.merge(dash) db.session.commit()
def load_birth_names(): """Loading birth name dataset from a zip file in the repo""" with gzip.open(os.path.join(DATA_FOLDER, 'birth_names.json.gz')) as f: pdf = pd.read_json(f) pdf.ds = pd.to_datetime(pdf.ds, unit='ms') pdf.to_sql( 'birth_names', db.engine, if_exists='replace', chunksize=500, dtype={ 'ds': DateTime, 'gender': String(16), 'state': String(10), 'name': String(255), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table [birth_names] reference') obj = db.session.query(TBL).filter_by(table_name='birth_names').first() if not obj: obj = TBL(table_name='birth_names') obj.main_dttm_col = 'ds' obj.database = get_or_create_main_db() obj.filter_select_enabled = True if not any(col.column_name == 'num_california' for col in obj.columns): obj.columns.append(TableColumn( column_name='num_california', expression="CASE WHEN state = 'CA' THEN num ELSE 0 END", )) db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj defaults = { 'compare_lag': '10', 'compare_suffix': 'o10Y', 'limit': '25', 'granularity_sqla': 'ds', 'groupby': [], 'metric': 'sum__num', 'metrics': ['sum__num'], 'row_limit': config.get('ROW_LIMIT'), 'since': '100 years ago', 'until': 'now', 'viz_type': 'table', 'where': '', 'markup_type': 'markdown', } admin = security_manager.find_user('admin') print('Creating some slices') slices = [ Slice( slice_name='Girls', viz_type='table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, groupby=['name'], filters=[{ 'col': 'gender', 'op': 'in', 'val': ['girl'], }], row_limit=50, timeseries_limit_metric='sum__num')), Slice( slice_name='Boys', viz_type='table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, groupby=['name'], filters=[{ 'col': 'gender', 'op': 'in', 'val': ['boy'], }], row_limit=50)), Slice( slice_name='Participants', viz_type='big_number', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='big_number', granularity_sqla='ds', compare_lag='5', compare_suffix='over 5Y')), Slice( slice_name='Genders', viz_type='pie', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='pie', groupby=['gender'])), Slice( slice_name='Genders by State', viz_type='dist_bar', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, adhoc_filters=[ { 'clause': 'WHERE', 'expressionType': 'SIMPLE', 'filterOptionName': '2745eae5', 'comparator': ['other'], 'operator': 'not in', 'subject': 'state', }, ], viz_type='dist_bar', metrics=[ { 'expressionType': 'SIMPLE', 'column': { 'column_name': 'sum_boys', 'type': 'BIGINT(20)', }, 'aggregate': 'SUM', 'label': 'Boys', 'optionName': 'metric_11', }, { 'expressionType': 'SIMPLE', 'column': { 'column_name': 'sum_girls', 'type': 'BIGINT(20)', }, 'aggregate': 'SUM', 'label': 'Girls', 'optionName': 'metric_12', }, ], groupby=['state'])), Slice( slice_name='Trends', viz_type='line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='line', groupby=['name'], granularity_sqla='ds', rich_tooltip=True, show_legend=True)), Slice( slice_name='Average and Sum Trends', viz_type='dual_line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='dual_line', metric={ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num', 'type': 'BIGINT(20)', }, 'aggregate': 'AVG', 'label': 'AVG(num)', 'optionName': 'metric_vgops097wej_g8uff99zhk7', }, metric_2='sum__num', granularity_sqla='ds')), Slice( slice_name='Title', viz_type='markup', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='markup', markup_type='html', code="""\ <div style='text-align:center'> <h1>Birth Names Dashboard</h1> <p> The source dataset came from <a href='https://github.com/hadley/babynames' target='_blank'>[here]</a> </p> <img src='/static/assets/images/babytux.jpg'> </div> """)), Slice( slice_name='Name Cloud', viz_type='word_cloud', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='word_cloud', size_from='10', series='name', size_to='70', rotation='square', limit='100')), Slice( slice_name='Pivot Table', viz_type='pivot_table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='pivot_table', metrics=['sum__num'], groupby=['name'], columns=['state'])), Slice( slice_name='Number of Girls', viz_type='big_number_total', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='big_number_total', granularity_sqla='ds', filters=[{ 'col': 'gender', 'op': 'in', 'val': ['girl'], }], subheader='total female participants')), Slice( slice_name='Number of California Births', viz_type='big_number_total', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, metric={ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num_california', 'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, 'aggregate': 'SUM', 'label': 'SUM(num_california)', }, viz_type='big_number_total', granularity_sqla='ds')), Slice( slice_name='Top 10 California Names Timeseries', viz_type='line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, metrics=[{ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num_california', 'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, 'aggregate': 'SUM', 'label': 'SUM(num_california)', }], viz_type='line', granularity_sqla='ds', groupby=['name'], timeseries_limit_metric={ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num_california', 'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, 'aggregate': 'SUM', 'label': 'SUM(num_california)', }, limit='10')), Slice( slice_name='Names Sorted by Num in California', viz_type='table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, groupby=['name'], row_limit=50, timeseries_limit_metric={ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num_california', 'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, 'aggregate': 'SUM', 'label': 'SUM(num_california)', })), Slice( slice_name='Num Births Trend', viz_type='line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='line')), Slice( slice_name='Daily Totals', viz_type='table', datasource_type='table', datasource_id=tbl.id, created_by=admin, params=get_slice_json( defaults, groupby=['ds'], since='40 years ago', until='now', viz_type='table')), ] for slc in slices: merge_slice(slc) print('Creating a dashboard') dash = db.session.query(Dash).filter_by(dashboard_title='Births').first() if not dash: dash = Dash() js = textwrap.dedent("""\ { "CHART-0dd270f0": { "meta": { "chartId": 51, "width": 2, "height": 50 }, "type": "CHART", "id": "CHART-0dd270f0", "children": [] }, "CHART-a3c21bcc": { "meta": { "chartId": 52, "width": 2, "height": 50 }, "type": "CHART", "id": "CHART-a3c21bcc", "children": [] }, "CHART-976960a5": { "meta": { "chartId": 53, "width": 2, "height": 25 }, "type": "CHART", "id": "CHART-976960a5", "children": [] }, "CHART-58575537": { "meta": { "chartId": 54, "width": 2, "height": 25 }, "type": "CHART", "id": "CHART-58575537", "children": [] }, "CHART-e9cd8f0b": { "meta": { "chartId": 55, "width": 8, "height": 38 }, "type": "CHART", "id": "CHART-e9cd8f0b", "children": [] }, "CHART-e440d205": { "meta": { "chartId": 56, "width": 8, "height": 50 }, "type": "CHART", "id": "CHART-e440d205", "children": [] }, "CHART-59444e0b": { "meta": { "chartId": 57, "width": 3, "height": 38 }, "type": "CHART", "id": "CHART-59444e0b", "children": [] }, "CHART-e2cb4997": { "meta": { "chartId": 59, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-e2cb4997", "children": [] }, "CHART-e8774b49": { "meta": { "chartId": 60, "width": 12, "height": 50 }, "type": "CHART", "id": "CHART-e8774b49", "children": [] }, "CHART-985bfd1e": { "meta": { "chartId": 61, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-985bfd1e", "children": [] }, "CHART-17f13246": { "meta": { "chartId": 62, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-17f13246", "children": [] }, "CHART-729324f6": { "meta": { "chartId": 63, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-729324f6", "children": [] }, "COLUMN-25a865d6": { "meta": { "width": 4, "background": "BACKGROUND_TRANSPARENT" }, "type": "COLUMN", "id": "COLUMN-25a865d6", "children": [ "ROW-cc97c6ac", "CHART-e2cb4997" ] }, "COLUMN-4557b6ba": { "meta": { "width": 8, "background": "BACKGROUND_TRANSPARENT" }, "type": "COLUMN", "id": "COLUMN-4557b6ba", "children": [ "ROW-d2e78e59", "CHART-e9cd8f0b" ] }, "GRID_ID": { "type": "GRID", "id": "GRID_ID", "children": [ "ROW-8515ace3", "ROW-1890385f", "ROW-f0b64094", "ROW-be9526b8" ] }, "HEADER_ID": { "meta": { "text": "Births" }, "type": "HEADER", "id": "HEADER_ID" }, "MARKDOWN-00178c27": { "meta": { "width": 5, "code": "<div style=\\"text-align:center\\">\\n <h1>Birth Names Dashboard</h1>\\n <p>\\n The source dataset came from\\n <a href=\\"https://github.com/hadley/babynames\\" target=\\"_blank\\">[here]</a>\\n </p>\\n <img src=\\"/static/assets/images/babytux.jpg\\">\\n</div>\\n", "height": 38 }, "type": "MARKDOWN", "id": "MARKDOWN-00178c27", "children": [] }, "ROOT_ID": { "type": "ROOT", "id": "ROOT_ID", "children": [ "GRID_ID" ] }, "ROW-1890385f": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-1890385f", "children": [ "CHART-e440d205", "CHART-0dd270f0", "CHART-a3c21bcc" ] }, "ROW-8515ace3": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-8515ace3", "children": [ "COLUMN-25a865d6", "COLUMN-4557b6ba" ] }, "ROW-be9526b8": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-be9526b8", "children": [ "CHART-985bfd1e", "CHART-17f13246", "CHART-729324f6" ] }, "ROW-cc97c6ac": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-cc97c6ac", "children": [ "CHART-976960a5", "CHART-58575537" ] }, "ROW-d2e78e59": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-d2e78e59", "children": [ "MARKDOWN-00178c27", "CHART-59444e0b" ] }, "ROW-f0b64094": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-f0b64094", "children": [ "CHART-e8774b49" ] }, "DASHBOARD_VERSION_KEY": "v2" } """) pos = json.loads(js) # dashboard v2 doesn't allow add markup slice dash.slices = [slc for slc in slices if slc.viz_type != 'markup'] update_slice_ids(pos, dash.slices) dash.dashboard_title = 'Births' dash.position_json = json.dumps(pos, indent=4) dash.slug = 'births' db.session.merge(dash) db.session.commit()
def load_multiformat_time_series(): """Loading time series data from a zip file in the repo""" with gzip.open(os.path.join(DATA_FOLDER, 'multiformat_time_series.json.gz')) as f: pdf = pd.read_json(f) pdf.ds = pd.to_datetime(pdf.ds, unit='s') pdf.ds2 = pd.to_datetime(pdf.ds2, unit='s') pdf.to_sql('multiformat_time_series', db.engine, if_exists='replace', chunksize=500, dtype={ 'ds': Date, 'ds2': DateTime, 'epoch_s': BigInteger, 'epoch_ms': BigInteger, 'string0': String(100), 'string1': String(100), 'string2': String(100), 'string3': String(100), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table [multiformat_time_series] reference') obj = db.session.query(TBL).filter_by( table_name='multiformat_time_series').first() if not obj: obj = TBL(table_name='multiformat_time_series') obj.main_dttm_col = 'ds' obj.database = utils.get_or_create_main_db() dttm_and_expr_dict = { 'ds': [None, None], 'ds2': [None, None], 'epoch_s': ['epoch_s', None], 'epoch_ms': ['epoch_ms', None], 'string2': ['%Y%m%d-%H%M%S', None], 'string1': ['%Y-%m-%d^%H:%M:%S', None], 'string0': ['%Y-%m-%d %H:%M:%S.%f', None], 'string3': ['%Y/%m/%d%H:%M:%S.%f', None], } for col in obj.columns: dttm_and_expr = dttm_and_expr_dict[col.column_name] col.python_date_format = dttm_and_expr[0] col.dbatabase_expr = dttm_and_expr[1] col.is_dttm = True db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj print('Creating Heatmap charts') for i, col in enumerate(tbl.columns): slice_data = { 'metrics': ['count'], 'granularity_sqla': col.column_name, 'row_limit': config.get('ROW_LIMIT'), 'since': '1 year ago', 'until': 'now', 'where': '', 'viz_type': 'cal_heatmap', 'domain_granularity': 'month', 'subdomain_granularity': 'day', } slc = Slice( slice_name='Calendar Heatmap multiformat ' + str(i), viz_type='cal_heatmap', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc) misc_dash_slices.add(slc.slice_name)
def load_country_map_data(): """Loading data for map with country map""" csv_bytes = get_example_data( "birth_france_data_for_country_map.csv", is_gzip=False, make_bytes=True ) data = pd.read_csv(csv_bytes, encoding="utf-8") data["dttm"] = datetime.datetime.now().date() data.to_sql( # pylint: disable=no-member "birth_france_by_region", db.engine, if_exists="replace", chunksize=500, dtype={ "DEPT_ID": String(10), "2003": BigInteger, "2004": BigInteger, "2005": BigInteger, "2006": BigInteger, "2007": BigInteger, "2008": BigInteger, "2009": BigInteger, "2010": BigInteger, "2011": BigInteger, "2012": BigInteger, "2013": BigInteger, "2014": BigInteger, "dttm": Date(), }, index=False, ) print("Done loading table!") print("-" * 80) print("Creating table reference") obj = db.session.query(TBL).filter_by(table_name="birth_france_by_region").first() if not obj: obj = TBL(table_name="birth_france_by_region") obj.main_dttm_col = "dttm" obj.database = utils.get_or_create_main_db() if not any(col.metric_name == "avg__2004" for col in obj.metrics): col = str(column("2004").compile(db.engine)) obj.metrics.append(SqlMetric(metric_name="avg__2004", expression=f"AVG({col})")) db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { "granularity_sqla": "", "since": "", "until": "", "where": "", "viz_type": "country_map", "entity": "DEPT_ID", "metric": { "expressionType": "SIMPLE", "column": {"type": "INT", "column_name": "2004"}, "aggregate": "AVG", "label": "Boys", "optionName": "metric_112342", }, "row_limit": 500000, } print("Creating a slice") slc = Slice( slice_name="Birth in France by department in 2016", viz_type="country_map", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_world_bank_health_n_pop(): """Loads the world bank health dataset, slices and a dashboard""" tbl_name = 'wb_health_population' data = get_example_data('countries.json.gz') pdf = pd.read_json(data) pdf.columns = [col.replace('.', '_') for col in pdf.columns] pdf.year = pd.to_datetime(pdf.year) pdf.to_sql( tbl_name, db.engine, if_exists='replace', chunksize=50, dtype={ 'year': DateTime(), 'country_code': String(3), 'country_name': String(255), 'region': String(255), }, index=False) print('Creating table [wb_health_population] reference') tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = utils.readfile(os.path.join(DATA_FOLDER, 'countries.md')) tbl.main_dttm_col = 'year' tbl.database = utils.get_or_create_main_db() tbl.filter_select_enabled = True metrics = [ 'sum__SP_POP_TOTL', 'sum__SH_DYN_AIDS', 'sum__SH_DYN_AIDS', 'sum__SP_RUR_TOTL_ZS', 'sum__SP_DYN_LE00_IN', ] for m in metrics: if not any(col.metric_name == m for col in tbl.metrics): tbl.metrics.append(SqlMetric( metric_name=m, expression=f'{m[:3]}({m[5:]})', )) db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() defaults = { 'compare_lag': '10', 'compare_suffix': 'o10Y', 'limit': '25', 'granularity_sqla': 'year', 'groupby': [], 'metric': 'sum__SP_POP_TOTL', 'metrics': ['sum__SP_POP_TOTL'], 'row_limit': config.get('ROW_LIMIT'), 'since': '2014-01-01', 'until': '2014-01-02', 'time_range': '2014-01-01 : 2014-01-02', 'where': '', 'markup_type': 'markdown', 'country_fieldtype': 'cca3', 'secondary_metric': 'sum__SP_POP_TOTL', 'entity': 'country_code', 'show_bubbles': True, } print('Creating slices') slices = [ Slice( slice_name='Region Filter', viz_type='filter_box', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='filter_box', date_filter=False, filter_configs=[ { 'asc': False, 'clearable': True, 'column': 'region', 'key': '2s98dfu', 'metric': 'sum__SP_POP_TOTL', 'multiple': True, }, { 'asc': False, 'clearable': True, 'key': 'li3j2lk', 'column': 'country_name', 'metric': 'sum__SP_POP_TOTL', 'multiple': True, }, ])), Slice( slice_name="World's Population", viz_type='big_number', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, since='2000', viz_type='big_number', compare_lag='10', metric='sum__SP_POP_TOTL', compare_suffix='over 10Y')), Slice( slice_name='Most Populated Countries', viz_type='table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='table', metrics=['sum__SP_POP_TOTL'], groupby=['country_name'])), Slice( slice_name='Growth Rate', viz_type='line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='line', since='1960-01-01', metrics=['sum__SP_POP_TOTL'], num_period_compare='10', groupby=['country_name'])), Slice( slice_name='% Rural', viz_type='world_map', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='world_map', metric='sum__SP_RUR_TOTL_ZS', num_period_compare='10')), Slice( slice_name='Life Expectancy VS Rural %', viz_type='bubble', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='bubble', since='2011-01-01', until='2011-01-02', series='region', limit=0, entity='country_name', x='sum__SP_RUR_TOTL_ZS', y='sum__SP_DYN_LE00_IN', size='sum__SP_POP_TOTL', max_bubble_size='50', filters=[{ 'col': 'country_code', 'val': [ 'TCA', 'MNP', 'DMA', 'MHL', 'MCO', 'SXM', 'CYM', 'TUV', 'IMY', 'KNA', 'ASM', 'ADO', 'AMA', 'PLW', ], 'op': 'not in'}], )), Slice( slice_name='Rural Breakdown', viz_type='sunburst', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='sunburst', groupby=['region', 'country_name'], secondary_metric='sum__SP_RUR_TOTL', since='2011-01-01', until='2011-01-01')), Slice( slice_name="World's Pop Growth", viz_type='area', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, since='1960-01-01', until='now', viz_type='area', groupby=['region'])), Slice( slice_name='Box plot', viz_type='box_plot', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, since='1960-01-01', until='now', whisker_options='Min/max (no outliers)', x_ticks_layout='staggered', viz_type='box_plot', groupby=['region'])), Slice( slice_name='Treemap', viz_type='treemap', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, since='1960-01-01', until='now', viz_type='treemap', metrics=['sum__SP_POP_TOTL'], groupby=['region', 'country_code'])), Slice( slice_name='Parallel Coordinates', viz_type='para', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, since='2011-01-01', until='2011-01-01', viz_type='para', limit=100, metrics=[ 'sum__SP_POP_TOTL', 'sum__SP_RUR_TOTL_ZS', 'sum__SH_DYN_AIDS'], secondary_metric='sum__SP_POP_TOTL', series='country_name')), ] misc_dash_slices.add(slices[-1].slice_name) for slc in slices: merge_slice(slc) print("Creating a World's Health Bank dashboard") dash_name = "World's Bank Data" slug = 'world_health' dash = db.session.query(Dash).filter_by(slug=slug).first() if not dash: dash = Dash() js = textwrap.dedent("""\ { "CHART-36bfc934": { "children": [], "id": "CHART-36bfc934", "meta": { "chartId": 40, "height": 25, "sliceName": "Region Filter", "width": 2 }, "type": "CHART" }, "CHART-37982887": { "children": [], "id": "CHART-37982887", "meta": { "chartId": 41, "height": 25, "sliceName": "World's Population", "width": 2 }, "type": "CHART" }, "CHART-17e0f8d8": { "children": [], "id": "CHART-17e0f8d8", "meta": { "chartId": 42, "height": 92, "sliceName": "Most Populated Countries", "width": 3 }, "type": "CHART" }, "CHART-2ee52f30": { "children": [], "id": "CHART-2ee52f30", "meta": { "chartId": 43, "height": 38, "sliceName": "Growth Rate", "width": 6 }, "type": "CHART" }, "CHART-2d5b6871": { "children": [], "id": "CHART-2d5b6871", "meta": { "chartId": 44, "height": 52, "sliceName": "% Rural", "width": 7 }, "type": "CHART" }, "CHART-0fd0d252": { "children": [], "id": "CHART-0fd0d252", "meta": { "chartId": 45, "height": 50, "sliceName": "Life Expectancy VS Rural %", "width": 8 }, "type": "CHART" }, "CHART-97f4cb48": { "children": [], "id": "CHART-97f4cb48", "meta": { "chartId": 46, "height": 38, "sliceName": "Rural Breakdown", "width": 3 }, "type": "CHART" }, "CHART-b5e05d6f": { "children": [], "id": "CHART-b5e05d6f", "meta": { "chartId": 47, "height": 50, "sliceName": "World's Pop Growth", "width": 4 }, "type": "CHART" }, "CHART-e76e9f5f": { "children": [], "id": "CHART-e76e9f5f", "meta": { "chartId": 48, "height": 50, "sliceName": "Box plot", "width": 4 }, "type": "CHART" }, "CHART-a4808bba": { "children": [], "id": "CHART-a4808bba", "meta": { "chartId": 49, "height": 50, "sliceName": "Treemap", "width": 8 }, "type": "CHART" }, "COLUMN-071bbbad": { "children": [ "ROW-1e064e3c", "ROW-afdefba9" ], "id": "COLUMN-071bbbad", "meta": { "background": "BACKGROUND_TRANSPARENT", "width": 9 }, "type": "COLUMN" }, "COLUMN-fe3914b8": { "children": [ "CHART-36bfc934", "CHART-37982887" ], "id": "COLUMN-fe3914b8", "meta": { "background": "BACKGROUND_TRANSPARENT", "width": 2 }, "type": "COLUMN" }, "GRID_ID": { "children": [ "ROW-46632bc2", "ROW-3fa26c5d", "ROW-812b3f13" ], "id": "GRID_ID", "type": "GRID" }, "HEADER_ID": { "id": "HEADER_ID", "meta": { "text": "World's Bank Data" }, "type": "HEADER" }, "ROOT_ID": { "children": [ "GRID_ID" ], "id": "ROOT_ID", "type": "ROOT" }, "ROW-1e064e3c": { "children": [ "COLUMN-fe3914b8", "CHART-2d5b6871" ], "id": "ROW-1e064e3c", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-3fa26c5d": { "children": [ "CHART-b5e05d6f", "CHART-0fd0d252" ], "id": "ROW-3fa26c5d", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-46632bc2": { "children": [ "COLUMN-071bbbad", "CHART-17e0f8d8" ], "id": "ROW-46632bc2", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-812b3f13": { "children": [ "CHART-a4808bba", "CHART-e76e9f5f" ], "id": "ROW-812b3f13", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-afdefba9": { "children": [ "CHART-2ee52f30", "CHART-97f4cb48" ], "id": "ROW-afdefba9", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "DASHBOARD_VERSION_KEY": "v2" } """) pos = json.loads(js) update_slice_ids(pos, slices) dash.dashboard_title = dash_name dash.position_json = json.dumps(pos, indent=4) dash.slug = slug dash.slices = slices[:-1] db.session.merge(dash) db.session.commit()
def load_long_lat_data(): """Loading lat/long data from a csv file in the repo""" with gzip.open(os.path.join(DATA_FOLDER, 'san_francisco.csv.gz')) as f: pdf = pd.read_csv(f, encoding='utf-8') start = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0) pdf['datetime'] = [ start + datetime.timedelta(hours=i * 24 / (len(pdf) - 1)) for i in range(len(pdf)) ] pdf['occupancy'] = [random.randint(1, 6) for _ in range(len(pdf))] pdf['radius_miles'] = [random.uniform(1, 3) for _ in range(len(pdf))] pdf['geohash'] = pdf[['LAT', 'LON']].apply( lambda x: geohash.encode(*x), axis=1) pdf['delimited'] = pdf['LAT'].map(str).str.cat(pdf['LON'].map(str), sep=',') pdf.to_sql( # pylint: disable=no-member 'long_lat', db.engine, if_exists='replace', chunksize=500, dtype={ 'longitude': Float(), 'latitude': Float(), 'number': Float(), 'street': String(100), 'unit': String(10), 'city': String(50), 'district': String(50), 'region': String(50), 'postcode': Float(), 'id': String(100), 'datetime': DateTime(), 'occupancy': Float(), 'radius_miles': Float(), 'geohash': String(12), 'delimited': String(60), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table reference') obj = db.session.query(TBL).filter_by(table_name='long_lat').first() if not obj: obj = TBL(table_name='long_lat') obj.main_dttm_col = 'datetime' obj.database = utils.get_or_create_main_db() db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { 'granularity_sqla': 'day', 'since': '2014-01-01', 'until': 'now', 'where': '', 'viz_type': 'mapbox', 'all_columns_x': 'LON', 'all_columns_y': 'LAT', 'mapbox_style': 'mapbox://styles/mapbox/light-v9', 'all_columns': ['occupancy'], 'row_limit': 500000, } print('Creating a slice') slc = Slice( slice_name='Mapbox Long/Lat', viz_type='mapbox', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(slice_data), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_country_map_data(): """Loading data for map with country map""" csv_path = os.path.join(DATA_FOLDER, 'birth_france_data_for_country_map.csv') data = pd.read_csv(csv_path, encoding='utf-8') data['dttm'] = datetime.datetime.now().date() data.to_sql( # pylint: disable=no-member 'birth_france_by_region', db.engine, if_exists='replace', chunksize=500, dtype={ 'DEPT_ID': String(10), '2003': BigInteger, '2004': BigInteger, '2005': BigInteger, '2006': BigInteger, '2007': BigInteger, '2008': BigInteger, '2009': BigInteger, '2010': BigInteger, '2011': BigInteger, '2012': BigInteger, '2013': BigInteger, '2014': BigInteger, 'dttm': Date(), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table reference') obj = db.session.query(TBL).filter_by( table_name='birth_france_by_region').first() if not obj: obj = TBL(table_name='birth_france_by_region') obj.main_dttm_col = 'dttm' obj.database = utils.get_or_create_main_db() db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { 'granularity_sqla': '', 'since': '', 'until': '', 'where': '', 'viz_type': 'country_map', 'entity': 'DEPT_ID', 'metric': 'avg__2004', 'row_limit': 500000, } print('Creating a slice') slc = Slice( slice_name='Birth in France by department in 2016', viz_type='country_map', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(slice_data), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_country_map_data(): """Loading data for map with country map""" csv_path = os.path.join(DATA_FOLDER, 'birth_france_data_for_country_map.csv') data = pd.read_csv(csv_path, encoding='utf-8') data['dttm'] = datetime.datetime.now().date() data.to_sql( # pylint: disable=no-member 'birth_france_by_region', db.engine, if_exists='replace', chunksize=500, dtype={ 'DEPT_ID': String(10), '2003': BigInteger, '2004': BigInteger, '2005': BigInteger, '2006': BigInteger, '2007': BigInteger, '2008': BigInteger, '2009': BigInteger, '2010': BigInteger, '2011': BigInteger, '2012': BigInteger, '2013': BigInteger, '2014': BigInteger, 'dttm': Date(), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table reference') obj = db.session.query(TBL).filter_by(table_name='birth_france_by_region').first() if not obj: obj = TBL(table_name='birth_france_by_region') obj.main_dttm_col = 'dttm' obj.database = utils.get_or_create_main_db() db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { 'granularity_sqla': '', 'since': '', 'until': '', 'where': '', 'viz_type': 'country_map', 'entity': 'DEPT_ID', 'metric': 'avg__2004', 'row_limit': 500000, } print('Creating a slice') slc = Slice( slice_name='Birth in France by department in 2016', viz_type='country_map', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(slice_data), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_world_bank_health_n_pop(): """Loads the world bank health dataset, slices and a dashboard""" tbl_name = 'wb_health_population' with gzip.open(os.path.join(DATA_FOLDER, 'countries.json.gz')) as f: pdf = pd.read_json(f) pdf.columns = [col.replace('.', '_') for col in pdf.columns] pdf.year = pd.to_datetime(pdf.year) pdf.to_sql(tbl_name, db.engine, if_exists='replace', chunksize=50, dtype={ 'year': DateTime(), 'country_code': String(3), 'country_name': String(255), 'region': String(255), }, index=False) print('Creating table [wb_health_population] reference') tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = utils.readfile(os.path.join(DATA_FOLDER, 'countries.md')) tbl.main_dttm_col = 'year' tbl.database = utils.get_or_create_main_db() tbl.filter_select_enabled = True db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() defaults = { 'compare_lag': '10', 'compare_suffix': 'o10Y', 'limit': '25', 'granularity_sqla': 'year', 'groupby': [], 'metric': 'sum__SP_POP_TOTL', 'metrics': ['sum__SP_POP_TOTL'], 'row_limit': config.get('ROW_LIMIT'), 'since': '2014-01-01', 'until': '2014-01-02', 'time_range': '2014-01-01 : 2014-01-02', 'where': '', 'markup_type': 'markdown', 'country_fieldtype': 'cca3', 'secondary_metric': 'sum__SP_POP_TOTL', 'entity': 'country_code', 'show_bubbles': True, } print('Creating slices') slices = [ Slice(slice_name='Region Filter', viz_type='filter_box', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='filter_box', date_filter=False, filter_configs=[ { 'asc': False, 'clearable': True, 'column': 'region', 'key': '2s98dfu', 'metric': 'sum__SP_POP_TOTL', 'multiple': True, }, { 'asc': False, 'clearable': True, 'key': 'li3j2lk', 'column': 'country_name', 'metric': 'sum__SP_POP_TOTL', 'multiple': True, }, ])), Slice(slice_name="World's Population", viz_type='big_number', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, since='2000', viz_type='big_number', compare_lag='10', metric='sum__SP_POP_TOTL', compare_suffix='over 10Y')), Slice(slice_name='Most Populated Countries', viz_type='table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='table', metrics=['sum__SP_POP_TOTL'], groupby=['country_name'])), Slice(slice_name='Growth Rate', viz_type='line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='line', since='1960-01-01', metrics=['sum__SP_POP_TOTL'], num_period_compare='10', groupby=['country_name'])), Slice(slice_name='% Rural', viz_type='world_map', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='world_map', metric='sum__SP_RUR_TOTL_ZS', num_period_compare='10')), Slice(slice_name='Life Expectancy VS Rural %', viz_type='bubble', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, viz_type='bubble', since='2011-01-01', until='2011-01-02', series='region', limit=0, entity='country_name', x='sum__SP_RUR_TOTL_ZS', y='sum__SP_DYN_LE00_IN', size='sum__SP_POP_TOTL', max_bubble_size='50', filters=[{ 'col': 'country_code', 'val': [ 'TCA', 'MNP', 'DMA', 'MHL', 'MCO', 'SXM', 'CYM', 'TUV', 'IMY', 'KNA', 'ASM', 'ADO', 'AMA', 'PLW', ], 'op': 'not in' }], )), Slice(slice_name='Rural Breakdown', viz_type='sunburst', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='sunburst', groupby=['region', 'country_name'], secondary_metric='sum__SP_RUR_TOTL', since='2011-01-01', until='2011-01-01')), Slice(slice_name="World's Pop Growth", viz_type='area', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, since='1960-01-01', until='now', viz_type='area', groupby=['region'])), Slice(slice_name='Box plot', viz_type='box_plot', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, since='1960-01-01', until='now', whisker_options='Min/max (no outliers)', viz_type='box_plot', groupby=['region'])), Slice(slice_name='Treemap', viz_type='treemap', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, since='1960-01-01', until='now', viz_type='treemap', metrics=['sum__SP_POP_TOTL'], groupby=['region', 'country_code'])), Slice(slice_name='Parallel Coordinates', viz_type='para', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, since='2011-01-01', until='2011-01-01', viz_type='para', limit=100, metrics=[ 'sum__SP_POP_TOTL', 'sum__SP_RUR_TOTL_ZS', 'sum__SH_DYN_AIDS' ], secondary_metric='sum__SP_POP_TOTL', series='country_name')), ] misc_dash_slices.add(slices[-1].slice_name) for slc in slices: merge_slice(slc) print("Creating a World's Health Bank dashboard") dash_name = "World's Bank Data" slug = 'world_health' dash = db.session.query(Dash).filter_by(slug=slug).first() if not dash: dash = Dash() js = textwrap.dedent("""\ { "CHART-36bfc934": { "children": [], "id": "CHART-36bfc934", "meta": { "chartId": 40, "height": 25, "sliceName": "Region Filter", "width": 2 }, "type": "CHART" }, "CHART-37982887": { "children": [], "id": "CHART-37982887", "meta": { "chartId": 41, "height": 25, "sliceName": "World's Population", "width": 2 }, "type": "CHART" }, "CHART-17e0f8d8": { "children": [], "id": "CHART-17e0f8d8", "meta": { "chartId": 42, "height": 92, "sliceName": "Most Populated Countries", "width": 3 }, "type": "CHART" }, "CHART-2ee52f30": { "children": [], "id": "CHART-2ee52f30", "meta": { "chartId": 43, "height": 38, "sliceName": "Growth Rate", "width": 6 }, "type": "CHART" }, "CHART-2d5b6871": { "children": [], "id": "CHART-2d5b6871", "meta": { "chartId": 44, "height": 52, "sliceName": "% Rural", "width": 7 }, "type": "CHART" }, "CHART-0fd0d252": { "children": [], "id": "CHART-0fd0d252", "meta": { "chartId": 45, "height": 50, "sliceName": "Life Expectancy VS Rural %", "width": 8 }, "type": "CHART" }, "CHART-97f4cb48": { "children": [], "id": "CHART-97f4cb48", "meta": { "chartId": 46, "height": 38, "sliceName": "Rural Breakdown", "width": 3 }, "type": "CHART" }, "CHART-b5e05d6f": { "children": [], "id": "CHART-b5e05d6f", "meta": { "chartId": 47, "height": 50, "sliceName": "World's Pop Growth", "width": 4 }, "type": "CHART" }, "CHART-e76e9f5f": { "children": [], "id": "CHART-e76e9f5f", "meta": { "chartId": 48, "height": 50, "sliceName": "Box plot", "width": 4 }, "type": "CHART" }, "CHART-a4808bba": { "children": [], "id": "CHART-a4808bba", "meta": { "chartId": 49, "height": 50, "sliceName": "Treemap", "width": 8 }, "type": "CHART" }, "COLUMN-071bbbad": { "children": [ "ROW-1e064e3c", "ROW-afdefba9" ], "id": "COLUMN-071bbbad", "meta": { "background": "BACKGROUND_TRANSPARENT", "width": 9 }, "type": "COLUMN" }, "COLUMN-fe3914b8": { "children": [ "CHART-36bfc934", "CHART-37982887" ], "id": "COLUMN-fe3914b8", "meta": { "background": "BACKGROUND_TRANSPARENT", "width": 2 }, "type": "COLUMN" }, "GRID_ID": { "children": [ "ROW-46632bc2", "ROW-3fa26c5d", "ROW-812b3f13" ], "id": "GRID_ID", "type": "GRID" }, "HEADER_ID": { "id": "HEADER_ID", "meta": { "text": "World's Bank Data" }, "type": "HEADER" }, "ROOT_ID": { "children": [ "GRID_ID" ], "id": "ROOT_ID", "type": "ROOT" }, "ROW-1e064e3c": { "children": [ "COLUMN-fe3914b8", "CHART-2d5b6871" ], "id": "ROW-1e064e3c", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-3fa26c5d": { "children": [ "CHART-b5e05d6f", "CHART-0fd0d252" ], "id": "ROW-3fa26c5d", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-46632bc2": { "children": [ "COLUMN-071bbbad", "CHART-17e0f8d8" ], "id": "ROW-46632bc2", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-812b3f13": { "children": [ "CHART-a4808bba", "CHART-e76e9f5f" ], "id": "ROW-812b3f13", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "ROW-afdefba9": { "children": [ "CHART-2ee52f30", "CHART-97f4cb48" ], "id": "ROW-afdefba9", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "DASHBOARD_VERSION_KEY": "v2" } """) pos = json.loads(js) update_slice_ids(pos, slices) dash.dashboard_title = dash_name dash.position_json = json.dumps(pos, indent=4) dash.slug = slug dash.slices = slices[:-1] db.session.merge(dash) db.session.commit()
def load_birth_names(): """Loading birth name dataset from a zip file in the repo""" data = get_example_data("birth_names.json.gz") pdf = pd.read_json(data) pdf.ds = pd.to_datetime(pdf.ds, unit="ms") pdf.to_sql( "birth_names", db.engine, if_exists="replace", chunksize=500, dtype={ "ds": DateTime, "gender": String(16), "state": String(10), "name": String(255), }, index=False, ) print("Done loading table!") print("-" * 80) print("Creating table [birth_names] reference") obj = db.session.query(TBL).filter_by(table_name="birth_names").first() if not obj: obj = TBL(table_name="birth_names") obj.main_dttm_col = "ds" obj.database = get_or_create_main_db() obj.filter_select_enabled = True if not any(col.column_name == "num_california" for col in obj.columns): col_state = str(column("state").compile(db.engine)) col_num = str(column("num").compile(db.engine)) obj.columns.append( TableColumn( column_name="num_california", expression= f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END", )) if not any(col.metric_name == "sum__num" for col in obj.metrics): col = str(column("num").compile(db.engine)) obj.metrics.append( SqlMetric(metric_name="sum__num", expression=f"SUM({col})")) db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj defaults = { "compare_lag": "10", "compare_suffix": "o10Y", "limit": "25", "granularity_sqla": "ds", "groupby": [], "metric": "sum__num", "metrics": ["sum__num"], "row_limit": config.get("ROW_LIMIT"), "since": "100 years ago", "until": "now", "viz_type": "table", "where": "", "markup_type": "markdown", } admin = security_manager.find_user("admin") print("Creating some slices") slices = [ Slice( slice_name="Girls", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], filters=[{ "col": "gender", "op": "in", "val": ["girl"] }], row_limit=50, timeseries_limit_metric="sum__num", ), ), Slice( slice_name="Boys", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], filters=[{ "col": "gender", "op": "in", "val": ["boy"] }], row_limit=50, ), ), Slice( slice_name="Participants", viz_type="big_number", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="big_number", granularity_sqla="ds", compare_lag="5", compare_suffix="over 5Y", ), ), Slice( slice_name="Genders", viz_type="pie", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="pie", groupby=["gender"]), ), Slice( slice_name="Genders by State", viz_type="dist_bar", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, adhoc_filters=[{ "clause": "WHERE", "expressionType": "SIMPLE", "filterOptionName": "2745eae5", "comparator": ["other"], "operator": "not in", "subject": "state", }], viz_type="dist_bar", metrics=[ { "expressionType": "SIMPLE", "column": { "column_name": "sum_boys", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Boys", "optionName": "metric_11", }, { "expressionType": "SIMPLE", "column": { "column_name": "sum_girls", "type": "BIGINT(20)" }, "aggregate": "SUM", "label": "Girls", "optionName": "metric_12", }, ], groupby=["state"], ), ), Slice( slice_name="Trends", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="line", groupby=["name"], granularity_sqla="ds", rich_tooltip=True, show_legend=True, ), ), Slice( slice_name="Average and Sum Trends", viz_type="dual_line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="dual_line", metric={ "expressionType": "SIMPLE", "column": { "column_name": "num", "type": "BIGINT(20)" }, "aggregate": "AVG", "label": "AVG(num)", "optionName": "metric_vgops097wej_g8uff99zhk7", }, metric_2="sum__num", granularity_sqla="ds", ), ), Slice( slice_name="Title", viz_type="markup", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="markup", markup_type="html", code="""\ <div style='text-align:center'> <h1>Birth Names Dashboard</h1> <p> The source dataset came from <a href='https://github.com/hadley/babynames' target='_blank'>[here]</a> </p> <img src='/static/assets/images/babytux.jpg'> </div> """, ), ), Slice( slice_name="Name Cloud", viz_type="word_cloud", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="word_cloud", size_from="10", series="name", size_to="70", rotation="square", limit="100", ), ), Slice( slice_name="Pivot Table", viz_type="pivot_table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="pivot_table", metrics=["sum__num"], groupby=["name"], columns=["state"], ), ), Slice( slice_name="Number of Girls", viz_type="big_number_total", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, viz_type="big_number_total", granularity_sqla="ds", filters=[{ "col": "gender", "op": "in", "val": ["girl"] }], subheader="total female participants", ), ), Slice( slice_name="Number of California Births", viz_type="big_number_total", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, viz_type="big_number_total", granularity_sqla="ds", ), ), Slice( slice_name="Top 10 California Names Timeseries", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, metrics=[{ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }], viz_type="line", granularity_sqla="ds", groupby=["name"], timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, limit="10", ), ), Slice( slice_name="Names Sorted by Num in California", viz_type="table", datasource_type="table", datasource_id=tbl.id, params=get_slice_json( defaults, groupby=["name"], row_limit=50, timeseries_limit_metric={ "expressionType": "SIMPLE", "column": { "column_name": "num_california", "expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, "aggregate": "SUM", "label": "SUM(num_california)", }, ), ), Slice( slice_name="Num Births Trend", viz_type="line", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(defaults, viz_type="line"), ), Slice( slice_name="Daily Totals", viz_type="table", datasource_type="table", datasource_id=tbl.id, created_by=admin, params=get_slice_json( defaults, groupby=["ds"], since="40 years ago", until="now", viz_type="table", ), ), ] for slc in slices: merge_slice(slc) print("Creating a dashboard") dash = db.session.query(Dash).filter_by(dashboard_title="Births").first() if not dash: dash = Dash() js = textwrap.dedent( # pylint: disable=line-too-long """\ { "CHART-0dd270f0": { "meta": { "chartId": 51, "width": 2, "height": 50 }, "type": "CHART", "id": "CHART-0dd270f0", "children": [] }, "CHART-a3c21bcc": { "meta": { "chartId": 52, "width": 2, "height": 50 }, "type": "CHART", "id": "CHART-a3c21bcc", "children": [] }, "CHART-976960a5": { "meta": { "chartId": 53, "width": 2, "height": 25 }, "type": "CHART", "id": "CHART-976960a5", "children": [] }, "CHART-58575537": { "meta": { "chartId": 54, "width": 2, "height": 25 }, "type": "CHART", "id": "CHART-58575537", "children": [] }, "CHART-e9cd8f0b": { "meta": { "chartId": 55, "width": 8, "height": 38 }, "type": "CHART", "id": "CHART-e9cd8f0b", "children": [] }, "CHART-e440d205": { "meta": { "chartId": 56, "width": 8, "height": 50 }, "type": "CHART", "id": "CHART-e440d205", "children": [] }, "CHART-59444e0b": { "meta": { "chartId": 57, "width": 3, "height": 38 }, "type": "CHART", "id": "CHART-59444e0b", "children": [] }, "CHART-e2cb4997": { "meta": { "chartId": 59, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-e2cb4997", "children": [] }, "CHART-e8774b49": { "meta": { "chartId": 60, "width": 12, "height": 50 }, "type": "CHART", "id": "CHART-e8774b49", "children": [] }, "CHART-985bfd1e": { "meta": { "chartId": 61, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-985bfd1e", "children": [] }, "CHART-17f13246": { "meta": { "chartId": 62, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-17f13246", "children": [] }, "CHART-729324f6": { "meta": { "chartId": 63, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-729324f6", "children": [] }, "COLUMN-25a865d6": { "meta": { "width": 4, "background": "BACKGROUND_TRANSPARENT" }, "type": "COLUMN", "id": "COLUMN-25a865d6", "children": [ "ROW-cc97c6ac", "CHART-e2cb4997" ] }, "COLUMN-4557b6ba": { "meta": { "width": 8, "background": "BACKGROUND_TRANSPARENT" }, "type": "COLUMN", "id": "COLUMN-4557b6ba", "children": [ "ROW-d2e78e59", "CHART-e9cd8f0b" ] }, "GRID_ID": { "type": "GRID", "id": "GRID_ID", "children": [ "ROW-8515ace3", "ROW-1890385f", "ROW-f0b64094", "ROW-be9526b8" ] }, "HEADER_ID": { "meta": { "text": "Births" }, "type": "HEADER", "id": "HEADER_ID" }, "MARKDOWN-00178c27": { "meta": { "width": 5, "code": "<div style=\\"text-align:center\\">\\n <h1>Birth Names Dashboard</h1>\\n <p>\\n The source dataset came from\\n <a href=\\"https://github.com/hadley/babynames\\" target=\\"_blank\\">[here]</a>\\n </p>\\n <img src=\\"/static/assets/images/babytux.jpg\\">\\n</div>\\n", "height": 38 }, "type": "MARKDOWN", "id": "MARKDOWN-00178c27", "children": [] }, "ROOT_ID": { "type": "ROOT", "id": "ROOT_ID", "children": [ "GRID_ID" ] }, "ROW-1890385f": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-1890385f", "children": [ "CHART-e440d205", "CHART-0dd270f0", "CHART-a3c21bcc" ] }, "ROW-8515ace3": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-8515ace3", "children": [ "COLUMN-25a865d6", "COLUMN-4557b6ba" ] }, "ROW-be9526b8": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-be9526b8", "children": [ "CHART-985bfd1e", "CHART-17f13246", "CHART-729324f6" ] }, "ROW-cc97c6ac": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-cc97c6ac", "children": [ "CHART-976960a5", "CHART-58575537" ] }, "ROW-d2e78e59": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-d2e78e59", "children": [ "MARKDOWN-00178c27", "CHART-59444e0b" ] }, "ROW-f0b64094": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-f0b64094", "children": [ "CHART-e8774b49" ] }, "DASHBOARD_VERSION_KEY": "v2" } """ # pylint: enable=line-too-long ) pos = json.loads(js) # dashboard v2 doesn't allow add markup slice dash.slices = [slc for slc in slices if slc.viz_type != "markup"] update_slice_ids(pos, dash.slices) dash.dashboard_title = "Births" dash.position_json = json.dumps(pos, indent=4) dash.slug = "births" db.session.merge(dash) db.session.commit()
def init(): """Inits the Superset application""" utils.get_or_create_main_db() security_manager.sync_role_definitions()
def load_long_lat_data(): """Loading lat/long data from a csv file in the repo""" with gzip.open(os.path.join(DATA_FOLDER, 'san_francisco.csv.gz')) as f: pdf = pd.read_csv(f, encoding='utf-8') start = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0) pdf['datetime'] = [ start + datetime.timedelta(hours=i * 24 / (len(pdf) - 1)) for i in range(len(pdf)) ] pdf['occupancy'] = [random.randint(1, 6) for _ in range(len(pdf))] pdf['radius_miles'] = [random.uniform(1, 3) for _ in range(len(pdf))] pdf['geohash'] = pdf[['LAT', 'LON']].apply( lambda x: geohash.encode(*x), axis=1) pdf['delimited'] = pdf['LAT'].map(str).str.cat(pdf['LON'].map(str), sep=',') pdf.to_sql( # pylint: disable=no-member 'long_lat', db.engine, if_exists='replace', chunksize=500, dtype={ 'longitude': Float(), 'latitude': Float(), 'number': Float(), 'street': String(100), 'unit': String(10), 'city': String(50), 'district': String(50), 'region': String(50), 'postcode': Float(), 'id': String(100), 'datetime': DateTime(), 'occupancy': Float(), 'radius_miles': Float(), 'geohash': String(12), 'delimited': String(60), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table reference') obj = db.session.query(TBL).filter_by(table_name='long_lat').first() if not obj: obj = TBL(table_name='long_lat') obj.main_dttm_col = 'datetime' obj.database = utils.get_or_create_main_db() db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { 'granularity_sqla': 'day', 'since': '2014-01-01', 'until': 'now', 'where': '', 'viz_type': 'mapbox', 'all_columns_x': 'LON', 'all_columns_y': 'LAT', 'mapbox_style': 'mapbox://styles/mapbox/light-v9', 'all_columns': ['occupancy'], 'row_limit': 500000, } print('Creating a slice') slc = Slice( slice_name='Mapbox Long/Lat', viz_type='mapbox', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(slice_data), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)
def load_unicode_test_data(): """Loading unicode test dataset from a csv file in the repo""" data = get_example_data("unicode_utf8_unixnl_test.csv", is_gzip=False, make_bytes=True) df = pd.read_csv(data, encoding="utf-8") # generate date/numeric data df["dttm"] = datetime.datetime.now().date() df["value"] = [random.randint(1, 100) for _ in range(len(df))] df.to_sql( # pylint: disable=no-member "unicode_test", db.engine, if_exists="replace", chunksize=500, dtype={ "phrase": String(500), "short_phrase": String(10), "with_missing": String(100), "dttm": Date(), "value": Float(), }, index=False, ) print("Done loading table!") print("-" * 80) print("Creating table [unicode_test] reference") obj = db.session.query(TBL).filter_by(table_name="unicode_test").first() if not obj: obj = TBL(table_name="unicode_test") obj.main_dttm_col = "dttm" obj.database = utils.get_or_create_main_db() db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj slice_data = { "granularity_sqla": "dttm", "groupby": [], "metric": { "expressionType": "SIMPLE", "column": { "column_name": "value", "type": "INT" }, "aggregate": "SUM", "label": "value", "optionName": "metric_11", }, "row_limit": config.get("ROW_LIMIT"), "since": "100 years ago", "until": "now", "where": "", "viz_type": "word_cloud", "size_from": "10", "series": "short_phrase", "size_to": "70", "rotation": "square", "limit": "100", } print("Creating a slice") slc = Slice( slice_name="Unicode Cloud", viz_type="word_cloud", datasource_type="table", datasource_id=tbl.id, params=get_slice_json(slice_data), ) merge_slice(slc) print("Creating a dashboard") dash = db.session.query(Dash).filter_by( dashboard_title="Unicode Test").first() if not dash: dash = Dash() js = """\ { "CHART-Hkx6154FEm": { "children": [], "id": "CHART-Hkx6154FEm", "meta": { "chartId": 2225, "height": 30, "sliceName": "slice 1", "width": 4 }, "type": "CHART" }, "GRID_ID": { "children": [ "ROW-SyT19EFEQ" ], "id": "GRID_ID", "type": "GRID" }, "ROOT_ID": { "children": [ "GRID_ID" ], "id": "ROOT_ID", "type": "ROOT" }, "ROW-SyT19EFEQ": { "children": [ "CHART-Hkx6154FEm" ], "id": "ROW-SyT19EFEQ", "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW" }, "DASHBOARD_VERSION_KEY": "v2" } """ dash.dashboard_title = "Unicode Test" pos = json.loads(js) update_slice_ids(pos, [slc]) dash.position_json = json.dumps(pos, indent=4) dash.slug = "unicode-test" dash.slices = [slc] db.session.merge(dash) db.session.commit()
def load_birth_names(): """Loading birth name dataset from a zip file in the repo""" with gzip.open(os.path.join(DATA_FOLDER, 'birth_names.json.gz')) as f: pdf = pd.read_json(f) pdf.ds = pd.to_datetime(pdf.ds, unit='ms') pdf.to_sql('birth_names', db.engine, if_exists='replace', chunksize=500, dtype={ 'ds': DateTime, 'gender': String(16), 'state': String(10), 'name': String(255), }, index=False) print('Done loading table!') print('-' * 80) print('Creating table [birth_names] reference') obj = db.session.query(TBL).filter_by(table_name='birth_names').first() if not obj: obj = TBL(table_name='birth_names') obj.main_dttm_col = 'ds' obj.database = get_or_create_main_db() obj.filter_select_enabled = True if not any(col.column_name == 'num_california' for col in obj.columns): obj.columns.append( TableColumn( column_name='num_california', expression="CASE WHEN state = 'CA' THEN num ELSE 0 END", )) db.session.merge(obj) db.session.commit() obj.fetch_metadata() tbl = obj defaults = { 'compare_lag': '10', 'compare_suffix': 'o10Y', 'limit': '25', 'granularity_sqla': 'ds', 'groupby': [], 'metric': 'sum__num', 'metrics': ['sum__num'], 'row_limit': config.get('ROW_LIMIT'), 'since': '100 years ago', 'until': 'now', 'viz_type': 'table', 'where': '', 'markup_type': 'markdown', } admin = security_manager.find_user('admin') print('Creating some slices') slices = [ Slice(slice_name='Girls', viz_type='table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, groupby=['name'], filters=[{ 'col': 'gender', 'op': 'in', 'val': ['girl'], }], row_limit=50, timeseries_limit_metric='sum__num')), Slice(slice_name='Boys', viz_type='table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, groupby=['name'], filters=[{ 'col': 'gender', 'op': 'in', 'val': ['boy'], }], row_limit=50)), Slice(slice_name='Participants', viz_type='big_number', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='big_number', granularity_sqla='ds', compare_lag='5', compare_suffix='over 5Y')), Slice(slice_name='Genders', viz_type='pie', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='pie', groupby=['gender'])), Slice(slice_name='Genders by State', viz_type='dist_bar', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, adhoc_filters=[ { 'clause': 'WHERE', 'expressionType': 'SIMPLE', 'filterOptionName': '2745eae5', 'comparator': ['other'], 'operator': 'not in', 'subject': 'state', }, ], viz_type='dist_bar', metrics=[ { 'expressionType': 'SIMPLE', 'column': { 'column_name': 'sum_boys', 'type': 'BIGINT(20)', }, 'aggregate': 'SUM', 'label': 'Boys', 'optionName': 'metric_11', }, { 'expressionType': 'SIMPLE', 'column': { 'column_name': 'sum_girls', 'type': 'BIGINT(20)', }, 'aggregate': 'SUM', 'label': 'Girls', 'optionName': 'metric_12', }, ], groupby=['state'])), Slice(slice_name='Trends', viz_type='line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='line', groupby=['name'], granularity_sqla='ds', rich_tooltip=True, show_legend=True)), Slice(slice_name='Average and Sum Trends', viz_type='dual_line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='dual_line', metric={ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num', 'type': 'BIGINT(20)', }, 'aggregate': 'AVG', 'label': 'AVG(num)', 'optionName': 'metric_vgops097wej_g8uff99zhk7', }, metric_2='sum__num', granularity_sqla='ds')), Slice(slice_name='Title', viz_type='markup', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='markup', markup_type='html', code="""\ <div style='text-align:center'> <h1>Birth Names Dashboard</h1> <p> The source dataset came from <a href='https://github.com/hadley/babynames' target='_blank'>[here]</a> </p> <img src='/static/assets/images/babytux.jpg'> </div> """)), Slice(slice_name='Name Cloud', viz_type='word_cloud', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='word_cloud', size_from='10', series='name', size_to='70', rotation='square', limit='100')), Slice(slice_name='Pivot Table', viz_type='pivot_table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='pivot_table', metrics=['sum__num'], groupby=['name'], columns=['state'])), Slice(slice_name='Number of Girls', viz_type='big_number_total', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='big_number_total', granularity_sqla='ds', filters=[{ 'col': 'gender', 'op': 'in', 'val': ['girl'], }], subheader='total female participants')), Slice(slice_name='Number of California Births', viz_type='big_number_total', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, metric={ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num_california', 'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, 'aggregate': 'SUM', 'label': 'SUM(num_california)', }, viz_type='big_number_total', granularity_sqla='ds')), Slice(slice_name='Top 10 California Names Timeseries', viz_type='line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, metrics=[{ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num_california', 'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, 'aggregate': 'SUM', 'label': 'SUM(num_california)', }], viz_type='line', granularity_sqla='ds', groupby=['name'], timeseries_limit_metric={ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num_california', 'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, 'aggregate': 'SUM', 'label': 'SUM(num_california)', }, limit='10')), Slice(slice_name='Names Sorted by Num in California', viz_type='table', datasource_type='table', datasource_id=tbl.id, params=get_slice_json( defaults, groupby=['name'], row_limit=50, timeseries_limit_metric={ 'expressionType': 'SIMPLE', 'column': { 'column_name': 'num_california', 'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END", }, 'aggregate': 'SUM', 'label': 'SUM(num_california)', })), Slice(slice_name='Num Births Trend', viz_type='line', datasource_type='table', datasource_id=tbl.id, params=get_slice_json(defaults, viz_type='line')), Slice(slice_name='Daily Totals', viz_type='table', datasource_type='table', datasource_id=tbl.id, created_by=admin, params=get_slice_json(defaults, groupby=['ds'], since='40 years ago', until='now', viz_type='table')), ] for slc in slices: merge_slice(slc) print('Creating a dashboard') dash = db.session.query(Dash).filter_by(dashboard_title='Births').first() if not dash: dash = Dash() js = textwrap.dedent("""\ { "CHART-0dd270f0": { "meta": { "chartId": 51, "width": 2, "height": 50 }, "type": "CHART", "id": "CHART-0dd270f0", "children": [] }, "CHART-a3c21bcc": { "meta": { "chartId": 52, "width": 2, "height": 50 }, "type": "CHART", "id": "CHART-a3c21bcc", "children": [] }, "CHART-976960a5": { "meta": { "chartId": 53, "width": 2, "height": 25 }, "type": "CHART", "id": "CHART-976960a5", "children": [] }, "CHART-58575537": { "meta": { "chartId": 54, "width": 2, "height": 25 }, "type": "CHART", "id": "CHART-58575537", "children": [] }, "CHART-e9cd8f0b": { "meta": { "chartId": 55, "width": 8, "height": 38 }, "type": "CHART", "id": "CHART-e9cd8f0b", "children": [] }, "CHART-e440d205": { "meta": { "chartId": 56, "width": 8, "height": 50 }, "type": "CHART", "id": "CHART-e440d205", "children": [] }, "CHART-59444e0b": { "meta": { "chartId": 57, "width": 3, "height": 38 }, "type": "CHART", "id": "CHART-59444e0b", "children": [] }, "CHART-e2cb4997": { "meta": { "chartId": 59, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-e2cb4997", "children": [] }, "CHART-e8774b49": { "meta": { "chartId": 60, "width": 12, "height": 50 }, "type": "CHART", "id": "CHART-e8774b49", "children": [] }, "CHART-985bfd1e": { "meta": { "chartId": 61, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-985bfd1e", "children": [] }, "CHART-17f13246": { "meta": { "chartId": 62, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-17f13246", "children": [] }, "CHART-729324f6": { "meta": { "chartId": 63, "width": 4, "height": 50 }, "type": "CHART", "id": "CHART-729324f6", "children": [] }, "COLUMN-25a865d6": { "meta": { "width": 4, "background": "BACKGROUND_TRANSPARENT" }, "type": "COLUMN", "id": "COLUMN-25a865d6", "children": [ "ROW-cc97c6ac", "CHART-e2cb4997" ] }, "COLUMN-4557b6ba": { "meta": { "width": 8, "background": "BACKGROUND_TRANSPARENT" }, "type": "COLUMN", "id": "COLUMN-4557b6ba", "children": [ "ROW-d2e78e59", "CHART-e9cd8f0b" ] }, "GRID_ID": { "type": "GRID", "id": "GRID_ID", "children": [ "ROW-8515ace3", "ROW-1890385f", "ROW-f0b64094", "ROW-be9526b8" ] }, "HEADER_ID": { "meta": { "text": "Births" }, "type": "HEADER", "id": "HEADER_ID" }, "MARKDOWN-00178c27": { "meta": { "width": 5, "code": "<div style=\\"text-align:center\\">\\n <h1>Birth Names Dashboard</h1>\\n <p>\\n The source dataset came from\\n <a href=\\"https://github.com/hadley/babynames\\" target=\\"_blank\\">[here]</a>\\n </p>\\n <img src=\\"/static/assets/images/babytux.jpg\\">\\n</div>\\n", "height": 38 }, "type": "MARKDOWN", "id": "MARKDOWN-00178c27", "children": [] }, "ROOT_ID": { "type": "ROOT", "id": "ROOT_ID", "children": [ "GRID_ID" ] }, "ROW-1890385f": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-1890385f", "children": [ "CHART-e440d205", "CHART-0dd270f0", "CHART-a3c21bcc" ] }, "ROW-8515ace3": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-8515ace3", "children": [ "COLUMN-25a865d6", "COLUMN-4557b6ba" ] }, "ROW-be9526b8": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-be9526b8", "children": [ "CHART-985bfd1e", "CHART-17f13246", "CHART-729324f6" ] }, "ROW-cc97c6ac": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-cc97c6ac", "children": [ "CHART-976960a5", "CHART-58575537" ] }, "ROW-d2e78e59": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-d2e78e59", "children": [ "MARKDOWN-00178c27", "CHART-59444e0b" ] }, "ROW-f0b64094": { "meta": { "background": "BACKGROUND_TRANSPARENT" }, "type": "ROW", "id": "ROW-f0b64094", "children": [ "CHART-e8774b49" ] }, "DASHBOARD_VERSION_KEY": "v2" } """) pos = json.loads(js) # dashboard v2 doesn't allow add markup slice dash.slices = [slc for slc in slices if slc.viz_type != 'markup'] update_slice_ids(pos, dash.slices) dash.dashboard_title = 'Births' dash.position_json = json.dumps(pos, indent=4) dash.slug = 'births' db.session.merge(dash) db.session.commit()
def init(): """Inits the Superset application""" utils.get_or_create_main_db() utils.get_example_database() appbuilder.add_permissions(update_perms=True) security_manager.sync_role_definitions()
def load_energy(): """Loads an energy related dataset to use with sankey and graphs""" tbl_name = "energy_usage" data = get_example_data("energy.json.gz") pdf = pd.read_json(data) pdf.to_sql( tbl_name, db.engine, if_exists="replace", chunksize=500, dtype={"source": String(255), "target": String(255), "value": Float()}, index=False, ) print("Creating table [wb_health_population] reference") tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first() if not tbl: tbl = TBL(table_name=tbl_name) tbl.description = "Energy consumption" tbl.database = utils.get_or_create_main_db() if not any(col.metric_name == "sum__value" for col in tbl.metrics): col = str(column("value").compile(db.engine)) tbl.metrics.append( SqlMetric(metric_name="sum__value", expression=f"SUM({col})") ) db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() slc = Slice( slice_name="Energy Sankey", viz_type="sankey", datasource_type="table", datasource_id=tbl.id, params=textwrap.dedent( """\ { "collapsed_fieldsets": "", "groupby": [ "source", "target" ], "having": "", "metric": "sum__value", "row_limit": "5000", "slice_name": "Energy Sankey", "viz_type": "sankey", "where": "" } """ ), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc) slc = Slice( slice_name="Energy Force Layout", viz_type="directed_force", datasource_type="table", datasource_id=tbl.id, params=textwrap.dedent( """\ { "charge": "-500", "collapsed_fieldsets": "", "groupby": [ "source", "target" ], "having": "", "link_length": "200", "metric": "sum__value", "row_limit": "5000", "slice_name": "Force", "viz_type": "directed_force", "where": "" } """ ), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc) slc = Slice( slice_name="Heatmap", viz_type="heatmap", datasource_type="table", datasource_id=tbl.id, params=textwrap.dedent( """\ { "all_columns_x": "source", "all_columns_y": "target", "canvas_image_rendering": "pixelated", "collapsed_fieldsets": "", "having": "", "linear_color_scheme": "blue_white_yellow", "metric": "sum__value", "normalize_across": "heatmap", "slice_name": "Heatmap", "viz_type": "heatmap", "where": "", "xscale_interval": "1", "yscale_interval": "1" } """ ), ) misc_dash_slices.add(slc.slice_name) merge_slice(slc)