Exemple #1
0
def load_test_users_run():
    """
    Loads admin, alpha, and gamma user for testing purposes

    Syncs permissions for those users/roles
    """
    if config.get('TESTING'):
        security_manager.sync_role_definitions()
        gamma_sqllab_role = security_manager.add_role('gamma_sqllab')
        for perm in security_manager.find_role('Gamma').permissions:
            security_manager.add_permission_role(gamma_sqllab_role, perm)
        utils.get_or_create_main_db()
        db_perm = utils.get_main_database(security_manager.get_session).perm
        security_manager.merge_perm('database_access', db_perm)
        db_pvm = security_manager.find_permission_view_menu(
            view_menu_name=db_perm, permission_name='database_access')
        gamma_sqllab_role.permissions.append(db_pvm)
        for perm in security_manager.find_role('sql_lab').permissions:
            security_manager.add_permission_role(gamma_sqllab_role, perm)

        admin = security_manager.find_user('admin')
        if not admin:
            security_manager.add_user(
                'admin', 'admin', ' user', '*****@*****.**',
                security_manager.find_role('Admin'),
                password='******')

        gamma = security_manager.find_user('gamma')
        if not gamma:
            security_manager.add_user(
                'gamma', 'gamma', 'user', '*****@*****.**',
                security_manager.find_role('Gamma'),
                password='******')

        gamma2 = security_manager.find_user('gamma2')
        if not gamma2:
            security_manager.add_user(
                'gamma2', 'gamma2', 'user', '*****@*****.**',
                security_manager.find_role('Gamma'),
                password='******')

        gamma_sqllab_user = security_manager.find_user('gamma_sqllab')
        if not gamma_sqllab_user:
            security_manager.add_user(
                'gamma_sqllab', 'gamma_sqllab', 'user', '*****@*****.**',
                gamma_sqllab_role, password='******')

        alpha = security_manager.find_user('alpha')
        if not alpha:
            security_manager.add_user(
                'alpha', 'alpha', 'user', '*****@*****.**',
                security_manager.find_role('Alpha'),
                password='******')
        security_manager.get_session.commit()
Exemple #2
0
def load_bart_lines():
    tbl_name = 'bart_lines'
    content = get_example_data('bart-lines.json.gz')
    df = pd.read_json(content, encoding='latin-1')
    df['path_json'] = df.path.map(json.dumps)
    df['polyline'] = df.path.map(polyline.encode)
    del df['path']

    df.to_sql(
        tbl_name,
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'color': String(255),
            'name': String(255),
            'polyline': Text,
            'path_json': Text,
        },
        index=False)
    print('Creating table {} reference'.format(tbl_name))
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = 'BART lines'
    tbl.database = get_or_create_main_db()
    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()
def load_sf_population_polygons():
    tbl_name = 'sf_population_polygons'

    with gzip.open(os.path.join(DATA_FOLDER, 'sf_population.json.gz')) as f:
        df = pd.read_json(f)
        df['contour'] = df.contour.map(json.dumps)

    df.to_sql(
        tbl_name,
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'zipcode': BigInteger,
            'population': BigInteger,
            'contour': Text,
            'area': BigInteger,
        },
        index=False)
    print('Creating table {} reference'.format(tbl_name))
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = 'Population density of San Francisco'
    tbl.database = utils.get_or_create_main_db()
    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()
Exemple #4
0
def load_paris_iris_geojson():
    tbl_name = 'paris_iris_mapping'

    with gzip.open(os.path.join(DATA_FOLDER, 'paris_iris.json.gz')) as f:
        df = pd.read_json(f)
        df['features'] = df.features.map(json.dumps)

    df.to_sql(
        tbl_name,
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'color': String(255),
            'name': String(255),
            'features': Text,
            'type': Text,
        },
        index=False)
    print('Creating table {} reference'.format(tbl_name))
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = 'Map of Paris'
    tbl.database = utils.get_or_create_main_db()
    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()
def load_random_time_series_data():
    """Loading random time series data from a zip file in the repo"""
    with gzip.open(os.path.join(DATA_FOLDER, 'random_time_series.json.gz')) as f:
        pdf = pd.read_json(f)
    pdf.ds = pd.to_datetime(pdf.ds, unit='s')
    pdf.to_sql(
        'random_time_series',
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'ds': DateTime,
        },
        index=False)
    print('Done loading table!')
    print('-' * 80)

    print('Creating table [random_time_series] reference')
    obj = db.session.query(TBL).filter_by(table_name='random_time_series').first()
    if not obj:
        obj = TBL(table_name='random_time_series')
    obj.main_dttm_col = 'ds'
    obj.database = utils.get_or_create_main_db()
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        'granularity_sqla': 'day',
        'row_limit': config.get('ROW_LIMIT'),
        'since': '1 year ago',
        'until': 'now',
        'metric': 'count',
        'where': '',
        'viz_type': 'cal_heatmap',
        'domain_granularity': 'month',
        'subdomain_granularity': 'day',
    }

    print('Creating a slice')
    slc = Slice(
        slice_name='Calendar Heatmap',
        viz_type='cal_heatmap',
        datasource_type='table',
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    merge_slice(slc)
Exemple #6
0
def load_flights():
    """Loading random time series data from a zip file in the repo"""
    tbl_name = 'flights'
    with gzip.open(os.path.join(DATA_FOLDER, 'flight_data.csv.gz')) as f:
        pdf = pd.read_csv(f, encoding='latin-1')

    # Loading airports info to join and get lat/long
    with gzip.open(os.path.join(DATA_FOLDER, 'airports.csv.gz')) as f:
        airports = pd.read_csv(f, encoding='latin-1')
    airports = airports.set_index('IATA_CODE')

    pdf['ds'] = pdf.YEAR.map(str) + '-0' + pdf.MONTH.map(str) + '-0' + pdf.DAY.map(str)
    pdf.ds = pd.to_datetime(pdf.ds)
    del pdf['YEAR']
    del pdf['MONTH']
    del pdf['DAY']

    pdf = pdf.join(airports, on='ORIGIN_AIRPORT', rsuffix='_ORIG')
    pdf = pdf.join(airports, on='DESTINATION_AIRPORT', rsuffix='_DEST')
    pdf.to_sql(
        tbl_name,
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'ds': DateTime,
        },
        index=False)
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = 'Random set of flights in the US'
    tbl.database = utils.get_or_create_main_db()
    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()
    print('Done loading table!')
Exemple #7
0
def load_country_map_data():
    """Loading data for map with country map"""
    csv_bytes = get_example_data(
        'birth_france_data_for_country_map.csv', is_gzip=False, make_bytes=True)
    data = pd.read_csv(csv_bytes, encoding='utf-8')
    data['dttm'] = datetime.datetime.now().date()
    data.to_sql(  # pylint: disable=no-member
        'birth_france_by_region',
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'DEPT_ID': String(10),
            '2003': BigInteger,
            '2004': BigInteger,
            '2005': BigInteger,
            '2006': BigInteger,
            '2007': BigInteger,
            '2008': BigInteger,
            '2009': BigInteger,
            '2010': BigInteger,
            '2011': BigInteger,
            '2012': BigInteger,
            '2013': BigInteger,
            '2014': BigInteger,
            'dttm': Date(),
        },
        index=False)
    print('Done loading table!')
    print('-' * 80)
    print('Creating table reference')
    obj = db.session.query(TBL).filter_by(table_name='birth_france_by_region').first()
    if not obj:
        obj = TBL(table_name='birth_france_by_region')
    obj.main_dttm_col = 'dttm'
    obj.database = utils.get_or_create_main_db()
    if not any(col.metric_name == 'avg__2004' for col in obj.metrics):
        col = str(column('2004').compile(db.engine))
        obj.metrics.append(SqlMetric(
            metric_name='avg__2004',
            expression=f'AVG({col})',
        ))
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        'granularity_sqla': '',
        'since': '',
        'until': '',
        'where': '',
        'viz_type': 'country_map',
        'entity': 'DEPT_ID',
        'metric': {
            'expressionType': 'SIMPLE',
            'column': {
                'type': 'INT',
                'column_name': '2004',
            },
            'aggregate': 'AVG',
            'label': 'Boys',
            'optionName': 'metric_112342',
        },
        'row_limit': 500000,
    }

    print('Creating a slice')
    slc = Slice(
        slice_name='Birth in France by department in 2016',
        viz_type='country_map',
        datasource_type='table',
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
def load_long_lat_data():
    """Loading lat/long data from a csv file in the repo"""
    data = get_example_data("san_francisco.csv.gz", make_bytes=True)
    pdf = pd.read_csv(data, encoding="utf-8")
    start = datetime.datetime.now().replace(hour=0,
                                            minute=0,
                                            second=0,
                                            microsecond=0)
    pdf["datetime"] = [
        start + datetime.timedelta(hours=i * 24 / (len(pdf) - 1))
        for i in range(len(pdf))
    ]
    pdf["occupancy"] = [random.randint(1, 6) for _ in range(len(pdf))]
    pdf["radius_miles"] = [random.uniform(1, 3) for _ in range(len(pdf))]
    pdf["geohash"] = pdf[["LAT", "LON"]].apply(lambda x: geohash.encode(*x),
                                               axis=1)
    pdf["delimited"] = pdf["LAT"].map(str).str.cat(pdf["LON"].map(str),
                                                   sep=",")
    pdf.to_sql(  # pylint: disable=no-member
        "long_lat",
        db.engine,
        if_exists="replace",
        chunksize=500,
        dtype={
            "longitude": Float(),
            "latitude": Float(),
            "number": Float(),
            "street": String(100),
            "unit": String(10),
            "city": String(50),
            "district": String(50),
            "region": String(50),
            "postcode": Float(),
            "id": String(100),
            "datetime": DateTime(),
            "occupancy": Float(),
            "radius_miles": Float(),
            "geohash": String(12),
            "delimited": String(60),
        },
        index=False,
    )
    print("Done loading table!")
    print("-" * 80)

    print("Creating table reference")
    obj = db.session.query(TBL).filter_by(table_name="long_lat").first()
    if not obj:
        obj = TBL(table_name="long_lat")
    obj.main_dttm_col = "datetime"
    obj.database = utils.get_or_create_main_db()
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        "granularity_sqla": "day",
        "since": "2014-01-01",
        "until": "now",
        "where": "",
        "viz_type": "mapbox",
        "all_columns_x": "LON",
        "all_columns_y": "LAT",
        "mapbox_style": "mapbox://styles/mapbox/light-v9",
        "all_columns": ["occupancy"],
        "row_limit": 500000,
    }

    print("Creating a slice")
    slc = Slice(
        slice_name="Mapbox Long/Lat",
        viz_type="mapbox",
        datasource_type="table",
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
def load_unicode_test_data():
    """Loading unicode test dataset from a csv file in the repo"""
    data = get_example_data(
        'unicode_utf8_unixnl_test.csv', is_gzip=False, make_bytes=True)
    df = pd.read_csv(data, encoding='utf-8')
    # generate date/numeric data
    df['dttm'] = datetime.datetime.now().date()
    df['value'] = [random.randint(1, 100) for _ in range(len(df))]
    df.to_sql(  # pylint: disable=no-member
        'unicode_test',
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'phrase': String(500),
            'short_phrase': String(10),
            'with_missing': String(100),
            'dttm': Date(),
            'value': Float(),
        },
        index=False)
    print('Done loading table!')
    print('-' * 80)

    print('Creating table [unicode_test] reference')
    obj = db.session.query(TBL).filter_by(table_name='unicode_test').first()
    if not obj:
        obj = TBL(table_name='unicode_test')
    obj.main_dttm_col = 'dttm'
    obj.database = utils.get_or_create_main_db()
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        'granularity_sqla': 'dttm',
        'groupby': [],
        'metric': 'sum__value',
        'row_limit': config.get('ROW_LIMIT'),
        'since': '100 years ago',
        'until': 'now',
        'where': '',
        'viz_type': 'word_cloud',
        'size_from': '10',
        'series': 'short_phrase',
        'size_to': '70',
        'rotation': 'square',
        'limit': '100',
    }

    print('Creating a slice')
    slc = Slice(
        slice_name='Unicode Cloud',
        viz_type='word_cloud',
        datasource_type='table',
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    merge_slice(slc)

    print('Creating a dashboard')
    dash = (
        db.session.query(Dash)
        .filter_by(dashboard_title='Unicode Test')
        .first()
    )

    if not dash:
        dash = Dash()
    js = """\
{
    "CHART-Hkx6154FEm": {
        "children": [],
        "id": "CHART-Hkx6154FEm",
        "meta": {
            "chartId": 2225,
            "height": 30,
            "sliceName": "slice 1",
            "width": 4
        },
        "type": "CHART"
    },
    "GRID_ID": {
        "children": [
            "ROW-SyT19EFEQ"
        ],
        "id": "GRID_ID",
        "type": "GRID"
    },
    "ROOT_ID": {
        "children": [
            "GRID_ID"
        ],
        "id": "ROOT_ID",
        "type": "ROOT"
    },
    "ROW-SyT19EFEQ": {
        "children": [
            "CHART-Hkx6154FEm"
        ],
        "id": "ROW-SyT19EFEQ",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "DASHBOARD_VERSION_KEY": "v2"
}
    """
    dash.dashboard_title = 'Unicode Test'
    pos = json.loads(js)
    update_slice_ids(pos, [slc])
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = 'unicode-test'
    dash.slices = [slc]
    db.session.merge(dash)
    db.session.commit()
Exemple #10
0
def load_test_users_run():
    """
    Loads admin, alpha, and gamma user for testing purposes

    Syncs permissions for those users/roles
    """
    if config.get("TESTING"):
        security_manager.sync_role_definitions()
        gamma_sqllab_role = security_manager.add_role("gamma_sqllab")
        for perm in security_manager.find_role("Gamma").permissions:
            security_manager.add_permission_role(gamma_sqllab_role, perm)
        utils.get_or_create_main_db()
        db_perm = utils.get_main_database().perm
        security_manager.add_permission_view_menu("database_access", db_perm)
        db_pvm = security_manager.find_permission_view_menu(
            view_menu_name=db_perm, permission_name="database_access"
        )
        gamma_sqllab_role.permissions.append(db_pvm)
        for perm in security_manager.find_role("sql_lab").permissions:
            security_manager.add_permission_role(gamma_sqllab_role, perm)

        admin = security_manager.find_user("admin")
        if not admin:
            security_manager.add_user(
                "admin",
                "admin",
                " user",
                "*****@*****.**",
                security_manager.find_role("Admin"),
                password="******",
            )

        gamma = security_manager.find_user("gamma")
        if not gamma:
            security_manager.add_user(
                "gamma",
                "gamma",
                "user",
                "*****@*****.**",
                security_manager.find_role("Gamma"),
                password="******",
            )

        gamma2 = security_manager.find_user("gamma2")
        if not gamma2:
            security_manager.add_user(
                "gamma2",
                "gamma2",
                "user",
                "*****@*****.**",
                security_manager.find_role("Gamma"),
                password="******",
            )

        gamma_sqllab_user = security_manager.find_user("gamma_sqllab")
        if not gamma_sqllab_user:
            security_manager.add_user(
                "gamma_sqllab",
                "gamma_sqllab",
                "user",
                "*****@*****.**",
                gamma_sqllab_role,
                password="******",
            )

        alpha = security_manager.find_user("alpha")
        if not alpha:
            security_manager.add_user(
                "alpha",
                "alpha",
                "user",
                "*****@*****.**",
                security_manager.find_role("Alpha"),
                password="******",
            )
        security_manager.get_session.commit()
Exemple #11
0
def load_test_users_run():
    """
    Loads admin, alpha, and gamma user for testing purposes

    Syncs permissions for those users/roles
    """
    if config.get('TESTING'):
        security_manager.sync_role_definitions()
        gamma_sqllab_role = security_manager.add_role('gamma_sqllab')
        for perm in security_manager.find_role('Gamma').permissions:
            security_manager.add_permission_role(gamma_sqllab_role, perm)
        utils.get_or_create_main_db()
        db_perm = utils.get_main_database(security_manager.get_session).perm
        security_manager.merge_perm('database_access', db_perm)
        db_pvm = security_manager.find_permission_view_menu(
            view_menu_name=db_perm, permission_name='database_access')
        gamma_sqllab_role.permissions.append(db_pvm)
        for perm in security_manager.find_role('sql_lab').permissions:
            security_manager.add_permission_role(gamma_sqllab_role, perm)

        admin = security_manager.find_user('admin')
        if not admin:
            security_manager.add_user('admin',
                                      'admin',
                                      ' user',
                                      '*****@*****.**',
                                      security_manager.find_role('Admin'),
                                      password='******')

        gamma = security_manager.find_user('gamma')
        if not gamma:
            security_manager.add_user('gamma',
                                      'gamma',
                                      'user',
                                      '*****@*****.**',
                                      security_manager.find_role('Gamma'),
                                      password='******')

        gamma2 = security_manager.find_user('gamma2')
        if not gamma2:
            security_manager.add_user('gamma2',
                                      'gamma2',
                                      'user',
                                      '*****@*****.**',
                                      security_manager.find_role('Gamma'),
                                      password='******')

        gamma_sqllab_user = security_manager.find_user('gamma_sqllab')
        if not gamma_sqllab_user:
            security_manager.add_user('gamma_sqllab',
                                      'gamma_sqllab',
                                      'user',
                                      '*****@*****.**',
                                      gamma_sqllab_role,
                                      password='******')

        alpha = security_manager.find_user('alpha')
        if not alpha:
            security_manager.add_user('alpha',
                                      'alpha',
                                      'user',
                                      '*****@*****.**',
                                      security_manager.find_role('Alpha'),
                                      password='******')
        security_manager.get_session.commit()
Exemple #12
0
def load_world_bank_health_n_pop():
    """Loads the world bank health dataset, slices and a dashboard"""
    tbl_name = "wb_health_population"
    data = get_example_data("countries.json.gz")
    pdf = pd.read_json(data)
    pdf.columns = [col.replace(".", "_") for col in pdf.columns]
    pdf.year = pd.to_datetime(pdf.year)
    pdf.to_sql(
        tbl_name,
        db.engine,
        if_exists="replace",
        chunksize=50,
        dtype={
            "year": DateTime(),
            "country_code": String(3),
            "country_name": String(255),
            "region": String(255),
        },
        index=False,
    )

    print("Creating table [wb_health_population] reference")
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = utils.readfile(os.path.join(DATA_FOLDER, "countries.md"))
    tbl.main_dttm_col = "year"
    tbl.database = utils.get_or_create_main_db()
    tbl.filter_select_enabled = True

    metrics = [
        "sum__SP_POP_TOTL",
        "sum__SH_DYN_AIDS",
        "sum__SH_DYN_AIDS",
        "sum__SP_RUR_TOTL_ZS",
        "sum__SP_DYN_LE00_IN",
        "sum__SP_RUR_TOTL",
    ]
    for m in metrics:
        if not any(col.metric_name == m for col in tbl.metrics):
            aggr_func = m[:3]
            col = str(column(m[5:]).compile(db.engine))
            tbl.metrics.append(
                SqlMetric(metric_name=m, expression=f"{aggr_func}({col})"))

    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()

    defaults = {
        "compare_lag": "10",
        "compare_suffix": "o10Y",
        "limit": "25",
        "granularity_sqla": "year",
        "groupby": [],
        "metric": "sum__SP_POP_TOTL",
        "metrics": ["sum__SP_POP_TOTL"],
        "row_limit": config.get("ROW_LIMIT"),
        "since": "2014-01-01",
        "until": "2014-01-02",
        "time_range": "2014-01-01 : 2014-01-02",
        "where": "",
        "markup_type": "markdown",
        "country_fieldtype": "cca3",
        "secondary_metric": "sum__SP_POP_TOTL",
        "entity": "country_code",
        "show_bubbles": True,
    }

    print("Creating slices")
    slices = [
        Slice(
            slice_name="Region Filter",
            viz_type="filter_box",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="filter_box",
                date_filter=False,
                filter_configs=[
                    {
                        "asc": False,
                        "clearable": True,
                        "column": "region",
                        "key": "2s98dfu",
                        "metric": "sum__SP_POP_TOTL",
                        "multiple": True,
                    },
                    {
                        "asc": False,
                        "clearable": True,
                        "key": "li3j2lk",
                        "column": "country_name",
                        "metric": "sum__SP_POP_TOTL",
                        "multiple": True,
                    },
                ],
            ),
        ),
        Slice(
            slice_name="World's Population",
            viz_type="big_number",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="2000",
                viz_type="big_number",
                compare_lag="10",
                metric="sum__SP_POP_TOTL",
                compare_suffix="over 10Y",
            ),
        ),
        Slice(
            slice_name="Most Populated Countries",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="table",
                metrics=["sum__SP_POP_TOTL"],
                groupby=["country_name"],
            ),
        ),
        Slice(
            slice_name="Growth Rate",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="line",
                since="1960-01-01",
                metrics=["sum__SP_POP_TOTL"],
                num_period_compare="10",
                groupby=["country_name"],
            ),
        ),
        Slice(
            slice_name="% Rural",
            viz_type="world_map",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="world_map",
                metric="sum__SP_RUR_TOTL_ZS",
                num_period_compare="10",
            ),
        ),
        Slice(
            slice_name="Life Expectancy VS Rural %",
            viz_type="bubble",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="bubble",
                since="2011-01-01",
                until="2011-01-02",
                series="region",
                limit=0,
                entity="country_name",
                x="sum__SP_RUR_TOTL_ZS",
                y="sum__SP_DYN_LE00_IN",
                size="sum__SP_POP_TOTL",
                max_bubble_size="50",
                filters=[{
                    "col":
                    "country_code",
                    "val": [
                        "TCA",
                        "MNP",
                        "DMA",
                        "MHL",
                        "MCO",
                        "SXM",
                        "CYM",
                        "TUV",
                        "IMY",
                        "KNA",
                        "ASM",
                        "ADO",
                        "AMA",
                        "PLW",
                    ],
                    "op":
                    "not in",
                }],
            ),
        ),
        Slice(
            slice_name="Rural Breakdown",
            viz_type="sunburst",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="sunburst",
                groupby=["region", "country_name"],
                secondary_metric="sum__SP_RUR_TOTL",
                since="2011-01-01",
                until="2011-01-01",
            ),
        ),
        Slice(
            slice_name="World's Pop Growth",
            viz_type="area",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="1960-01-01",
                until="now",
                viz_type="area",
                groupby=["region"],
            ),
        ),
        Slice(
            slice_name="Box plot",
            viz_type="box_plot",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="1960-01-01",
                until="now",
                whisker_options="Min/max (no outliers)",
                x_ticks_layout="staggered",
                viz_type="box_plot",
                groupby=["region"],
            ),
        ),
        Slice(
            slice_name="Treemap",
            viz_type="treemap",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="1960-01-01",
                until="now",
                viz_type="treemap",
                metrics=["sum__SP_POP_TOTL"],
                groupby=["region", "country_code"],
            ),
        ),
        Slice(
            slice_name="Parallel Coordinates",
            viz_type="para",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since="2011-01-01",
                until="2011-01-01",
                viz_type="para",
                limit=100,
                metrics=[
                    "sum__SP_POP_TOTL", "sum__SP_RUR_TOTL_ZS",
                    "sum__SH_DYN_AIDS"
                ],
                secondary_metric="sum__SP_POP_TOTL",
                series="country_name",
            ),
        ),
    ]
    misc_dash_slices.add(slices[-1].slice_name)
    for slc in slices:
        merge_slice(slc)

    print("Creating a World's Health Bank dashboard")
    dash_name = "World's Bank Data"
    slug = "world_health"
    dash = db.session.query(Dash).filter_by(slug=slug).first()

    if not dash:
        dash = Dash()
    js = textwrap.dedent("""\
{
    "CHART-36bfc934": {
        "children": [],
        "id": "CHART-36bfc934",
        "meta": {
            "chartId": 40,
            "height": 25,
            "sliceName": "Region Filter",
            "width": 2
        },
        "type": "CHART"
    },
    "CHART-37982887": {
        "children": [],
        "id": "CHART-37982887",
        "meta": {
            "chartId": 41,
            "height": 25,
            "sliceName": "World's Population",
            "width": 2
        },
        "type": "CHART"
    },
    "CHART-17e0f8d8": {
        "children": [],
        "id": "CHART-17e0f8d8",
        "meta": {
            "chartId": 42,
            "height": 92,
            "sliceName": "Most Populated Countries",
            "width": 3
        },
        "type": "CHART"
    },
    "CHART-2ee52f30": {
        "children": [],
        "id": "CHART-2ee52f30",
        "meta": {
            "chartId": 43,
            "height": 38,
            "sliceName": "Growth Rate",
            "width": 6
        },
        "type": "CHART"
    },
    "CHART-2d5b6871": {
        "children": [],
        "id": "CHART-2d5b6871",
        "meta": {
            "chartId": 44,
            "height": 52,
            "sliceName": "% Rural",
            "width": 7
        },
        "type": "CHART"
    },
    "CHART-0fd0d252": {
        "children": [],
        "id": "CHART-0fd0d252",
        "meta": {
            "chartId": 45,
            "height": 50,
            "sliceName": "Life Expectancy VS Rural %",
            "width": 8
        },
        "type": "CHART"
    },
    "CHART-97f4cb48": {
        "children": [],
        "id": "CHART-97f4cb48",
        "meta": {
            "chartId": 46,
            "height": 38,
            "sliceName": "Rural Breakdown",
            "width": 3
        },
        "type": "CHART"
    },
    "CHART-b5e05d6f": {
        "children": [],
        "id": "CHART-b5e05d6f",
        "meta": {
            "chartId": 47,
            "height": 50,
            "sliceName": "World's Pop Growth",
            "width": 4
        },
        "type": "CHART"
    },
    "CHART-e76e9f5f": {
        "children": [],
        "id": "CHART-e76e9f5f",
        "meta": {
            "chartId": 48,
            "height": 50,
            "sliceName": "Box plot",
            "width": 4
        },
        "type": "CHART"
    },
    "CHART-a4808bba": {
        "children": [],
        "id": "CHART-a4808bba",
        "meta": {
            "chartId": 49,
            "height": 50,
            "sliceName": "Treemap",
            "width": 8
        },
        "type": "CHART"
    },
    "COLUMN-071bbbad": {
        "children": [
            "ROW-1e064e3c",
            "ROW-afdefba9"
        ],
        "id": "COLUMN-071bbbad",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT",
            "width": 9
        },
        "type": "COLUMN"
    },
    "COLUMN-fe3914b8": {
        "children": [
            "CHART-36bfc934",
            "CHART-37982887"
        ],
        "id": "COLUMN-fe3914b8",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT",
            "width": 2
        },
        "type": "COLUMN"
    },
    "GRID_ID": {
        "children": [
            "ROW-46632bc2",
            "ROW-3fa26c5d",
            "ROW-812b3f13"
        ],
        "id": "GRID_ID",
        "type": "GRID"
    },
    "HEADER_ID": {
        "id": "HEADER_ID",
        "meta": {
            "text": "World's Bank Data"
        },
        "type": "HEADER"
    },
    "ROOT_ID": {
        "children": [
            "GRID_ID"
        ],
        "id": "ROOT_ID",
        "type": "ROOT"
    },
    "ROW-1e064e3c": {
        "children": [
            "COLUMN-fe3914b8",
            "CHART-2d5b6871"
        ],
        "id": "ROW-1e064e3c",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-3fa26c5d": {
        "children": [
            "CHART-b5e05d6f",
            "CHART-0fd0d252"
        ],
        "id": "ROW-3fa26c5d",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-46632bc2": {
        "children": [
            "COLUMN-071bbbad",
            "CHART-17e0f8d8"
        ],
        "id": "ROW-46632bc2",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-812b3f13": {
        "children": [
            "CHART-a4808bba",
            "CHART-e76e9f5f"
        ],
        "id": "ROW-812b3f13",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-afdefba9": {
        "children": [
            "CHART-2ee52f30",
            "CHART-97f4cb48"
        ],
        "id": "ROW-afdefba9",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "DASHBOARD_VERSION_KEY": "v2"
}
    """)
    pos = json.loads(js)
    update_slice_ids(pos, slices)

    dash.dashboard_title = dash_name
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = slug

    dash.slices = slices[:-1]
    db.session.merge(dash)
    db.session.commit()
def load_multiformat_time_series():
    """Loading time series data from a zip file in the repo"""
    data = get_example_data('multiformat_time_series.json.gz')
    pdf = pd.read_json(data)

    pdf.ds = pd.to_datetime(pdf.ds, unit='s')
    pdf.ds2 = pd.to_datetime(pdf.ds2, unit='s')
    pdf.to_sql(
        'multiformat_time_series',
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'ds': Date,
            'ds2': DateTime,
            'epoch_s': BigInteger,
            'epoch_ms': BigInteger,
            'string0': String(100),
            'string1': String(100),
            'string2': String(100),
            'string3': String(100),
        },
        index=False)
    print('Done loading table!')
    print('-' * 80)
    print('Creating table [multiformat_time_series] reference')
    obj = db.session.query(TBL).filter_by(table_name='multiformat_time_series').first()
    if not obj:
        obj = TBL(table_name='multiformat_time_series')
    obj.main_dttm_col = 'ds'
    obj.database = utils.get_or_create_main_db()
    dttm_and_expr_dict = {
        'ds': [None, None],
        'ds2': [None, None],
        'epoch_s': ['epoch_s', None],
        'epoch_ms': ['epoch_ms', None],
        'string2': ['%Y%m%d-%H%M%S', None],
        'string1': ['%Y-%m-%d^%H:%M:%S', None],
        'string0': ['%Y-%m-%d %H:%M:%S.%f', None],
        'string3': ['%Y/%m/%d%H:%M:%S.%f', None],
    }
    for col in obj.columns:
        dttm_and_expr = dttm_and_expr_dict[col.column_name]
        col.python_date_format = dttm_and_expr[0]
        col.dbatabase_expr = dttm_and_expr[1]
        col.is_dttm = True
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    print('Creating Heatmap charts')
    for i, col in enumerate(tbl.columns):
        slice_data = {
            'metrics': ['count'],
            'granularity_sqla': col.column_name,
            'row_limit': config.get('ROW_LIMIT'),
            'since': '2015',
            'until': '2016',
            'where': '',
            'viz_type': 'cal_heatmap',
            'domain_granularity': 'month',
            'subdomain_granularity': 'day',
        }

        slc = Slice(
            slice_name=f'Calendar Heatmap multiformat {i}',
            viz_type='cal_heatmap',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(slice_data),
        )
        merge_slice(slc)
    misc_dash_slices.add('Calendar Heatmap multiformat 0')
def load_energy():
    """Loads an energy related dataset to use with sankey and graphs"""
    tbl_name = 'energy_usage'
    with gzip.open(os.path.join(DATA_FOLDER, 'energy.json.gz')) as f:
        pdf = pd.read_json(f)
    pdf.to_sql(tbl_name,
               db.engine,
               if_exists='replace',
               chunksize=500,
               dtype={
                   'source': String(255),
                   'target': String(255),
                   'value': Float(),
               },
               index=False)

    print('Creating table [wb_health_population] reference')
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = 'Energy consumption'
    tbl.database = utils.get_or_create_main_db()

    if not any(col.metric_name == 'sum__value' for col in tbl.metrics):
        tbl.metrics.append(
            SqlMetric(
                metric_name='sum__value',
                expression='SUM(value)',
            ))

    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()

    slc = Slice(
        slice_name='Energy Sankey',
        viz_type='sankey',
        datasource_type='table',
        datasource_id=tbl.id,
        params=textwrap.dedent("""\
        {
            "collapsed_fieldsets": "",
            "groupby": [
                "source",
                "target"
            ],
            "having": "",
            "metric": "sum__value",
            "row_limit": "5000",
            "slice_name": "Energy Sankey",
            "viz_type": "sankey",
            "where": ""
        }
        """),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)

    slc = Slice(
        slice_name='Energy Force Layout',
        viz_type='directed_force',
        datasource_type='table',
        datasource_id=tbl.id,
        params=textwrap.dedent("""\
        {
            "charge": "-500",
            "collapsed_fieldsets": "",
            "groupby": [
                "source",
                "target"
            ],
            "having": "",
            "link_length": "200",
            "metric": "sum__value",
            "row_limit": "5000",
            "slice_name": "Force",
            "viz_type": "directed_force",
            "where": ""
        }
        """),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)

    slc = Slice(
        slice_name='Heatmap',
        viz_type='heatmap',
        datasource_type='table',
        datasource_id=tbl.id,
        params=textwrap.dedent("""\
        {
            "all_columns_x": "source",
            "all_columns_y": "target",
            "canvas_image_rendering": "pixelated",
            "collapsed_fieldsets": "",
            "having": "",
            "linear_color_scheme": "blue_white_yellow",
            "metric": "sum__value",
            "normalize_across": "heatmap",
            "slice_name": "Heatmap",
            "viz_type": "heatmap",
            "where": "",
            "xscale_interval": "1",
            "yscale_interval": "1"
        }
        """),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
Exemple #15
0
def load_energy():
    """Loads an energy related dataset to use with sankey and graphs"""
    tbl_name = 'energy_usage'
    with gzip.open(os.path.join(DATA_FOLDER, 'energy.json.gz')) as f:
        pdf = pd.read_json(f)
    pdf.to_sql(
        tbl_name,
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'source': String(255),
            'target': String(255),
            'value': Float(),
        },
        index=False)

    print('Creating table [wb_health_population] reference')
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = 'Energy consumption'
    tbl.database = utils.get_or_create_main_db()
    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()

    slc = Slice(
        slice_name='Energy Sankey',
        viz_type='sankey',
        datasource_type='table',
        datasource_id=tbl.id,
        params=textwrap.dedent("""\
        {
            "collapsed_fieldsets": "",
            "groupby": [
                "source",
                "target"
            ],
            "having": "",
            "metric": "sum__value",
            "row_limit": "5000",
            "slice_name": "Energy Sankey",
            "viz_type": "sankey",
            "where": ""
        }
        """),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)

    slc = Slice(
        slice_name='Energy Force Layout',
        viz_type='directed_force',
        datasource_type='table',
        datasource_id=tbl.id,
        params=textwrap.dedent("""\
        {
            "charge": "-500",
            "collapsed_fieldsets": "",
            "groupby": [
                "source",
                "target"
            ],
            "having": "",
            "link_length": "200",
            "metric": "sum__value",
            "row_limit": "5000",
            "slice_name": "Force",
            "viz_type": "directed_force",
            "where": ""
        }
        """),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)

    slc = Slice(
        slice_name='Heatmap',
        viz_type='heatmap',
        datasource_type='table',
        datasource_id=tbl.id,
        params=textwrap.dedent("""\
        {
            "all_columns_x": "source",
            "all_columns_y": "target",
            "canvas_image_rendering": "pixelated",
            "collapsed_fieldsets": "",
            "having": "",
            "linear_color_scheme": "blue_white_yellow",
            "metric": "sum__value",
            "normalize_across": "heatmap",
            "slice_name": "Heatmap",
            "viz_type": "heatmap",
            "where": "",
            "xscale_interval": "1",
            "yscale_interval": "1"
        }
        """),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
Exemple #16
0
def load_country_map_data():
    """Loading data for map with country map"""
    csv_bytes = get_example_data(
        'birth_france_data_for_country_map.csv', is_gzip=False, make_bytes=True)
    data = pd.read_csv(csv_bytes, encoding='utf-8')
    data['dttm'] = datetime.datetime.now().date()
    data.to_sql(  # pylint: disable=no-member
        'birth_france_by_region',
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'DEPT_ID': String(10),
            '2003': BigInteger,
            '2004': BigInteger,
            '2005': BigInteger,
            '2006': BigInteger,
            '2007': BigInteger,
            '2008': BigInteger,
            '2009': BigInteger,
            '2010': BigInteger,
            '2011': BigInteger,
            '2012': BigInteger,
            '2013': BigInteger,
            '2014': BigInteger,
            'dttm': Date(),
        },
        index=False)
    print('Done loading table!')
    print('-' * 80)
    print('Creating table reference')
    obj = db.session.query(TBL).filter_by(table_name='birth_france_by_region').first()
    if not obj:
        obj = TBL(table_name='birth_france_by_region')
    obj.main_dttm_col = 'dttm'
    obj.database = utils.get_or_create_main_db()
    if not any(col.metric_name == 'avg__2004' for col in obj.metrics):
        obj.metrics.append(SqlMetric(
            metric_name='avg__2004',
            expression='AVG(2004)',
        ))
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        'granularity_sqla': '',
        'since': '',
        'until': '',
        'where': '',
        'viz_type': 'country_map',
        'entity': 'DEPT_ID',
        'metric': {
            'expressionType': 'SIMPLE',
            'column': {
                'type': 'INT',
                'column_name': '2004',
            },
            'aggregate': 'AVG',
            'label': 'Boys',
            'optionName': 'metric_112342',
        },
        'row_limit': 500000,
    }

    print('Creating a slice')
    slc = Slice(
        slice_name='Birth in France by department in 2016',
        viz_type='country_map',
        datasource_type='table',
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
def load_multiformat_time_series():
    """Loading time series data from a zip file in the repo"""
    data = get_example_data("multiformat_time_series.json.gz")
    pdf = pd.read_json(data)

    pdf.ds = pd.to_datetime(pdf.ds, unit="s")
    pdf.ds2 = pd.to_datetime(pdf.ds2, unit="s")
    pdf.to_sql(
        "multiformat_time_series",
        db.engine,
        if_exists="replace",
        chunksize=500,
        dtype={
            "ds": Date,
            "ds2": DateTime,
            "epoch_s": BigInteger,
            "epoch_ms": BigInteger,
            "string0": String(100),
            "string1": String(100),
            "string2": String(100),
            "string3": String(100),
        },
        index=False,
    )
    print("Done loading table!")
    print("-" * 80)
    print("Creating table [multiformat_time_series] reference")
    obj = db.session.query(TBL).filter_by(
        table_name="multiformat_time_series").first()
    if not obj:
        obj = TBL(table_name="multiformat_time_series")
    obj.main_dttm_col = "ds"
    obj.database = utils.get_or_create_main_db()
    dttm_and_expr_dict = {
        "ds": [None, None],
        "ds2": [None, None],
        "epoch_s": ["epoch_s", None],
        "epoch_ms": ["epoch_ms", None],
        "string2": ["%Y%m%d-%H%M%S", None],
        "string1": ["%Y-%m-%d^%H:%M:%S", None],
        "string0": ["%Y-%m-%d %H:%M:%S.%f", None],
        "string3": ["%Y/%m/%d%H:%M:%S.%f", None],
    }
    for col in obj.columns:
        dttm_and_expr = dttm_and_expr_dict[col.column_name]
        col.python_date_format = dttm_and_expr[0]
        col.dbatabase_expr = dttm_and_expr[1]
        col.is_dttm = True
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    print("Creating Heatmap charts")
    for i, col in enumerate(tbl.columns):
        slice_data = {
            "metrics": ["count"],
            "granularity_sqla": col.column_name,
            "row_limit": config.get("ROW_LIMIT"),
            "since": "2015",
            "until": "2016",
            "where": "",
            "viz_type": "cal_heatmap",
            "domain_granularity": "month",
            "subdomain_granularity": "day",
        }

        slc = Slice(
            slice_name=f"Calendar Heatmap multiformat {i}",
            viz_type="cal_heatmap",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(slice_data),
        )
        merge_slice(slc)
    misc_dash_slices.add("Calendar Heatmap multiformat 0")
Exemple #18
0
def init():
    """Inits the Superset application"""
    utils.get_or_create_main_db()
    security_manager.sync_role_definitions()
def load_unicode_test_data():
    """Loading unicode test dataset from a csv file in the repo"""
    data = get_example_data('unicode_utf8_unixnl_test.csv',
                            is_gzip=False,
                            make_bytes=True)
    df = pd.read_csv(data, encoding='utf-8')
    # generate date/numeric data
    df['dttm'] = datetime.datetime.now().date()
    df['value'] = [random.randint(1, 100) for _ in range(len(df))]
    df.to_sql(  # pylint: disable=no-member
        'unicode_test',
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'phrase': String(500),
            'short_phrase': String(10),
            'with_missing': String(100),
            'dttm': Date(),
            'value': Float(),
        },
        index=False)
    print('Done loading table!')
    print('-' * 80)

    print('Creating table [unicode_test] reference')
    obj = db.session.query(TBL).filter_by(table_name='unicode_test').first()
    if not obj:
        obj = TBL(table_name='unicode_test')
    obj.main_dttm_col = 'dttm'
    obj.database = utils.get_or_create_main_db()
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        'granularity_sqla': 'dttm',
        'groupby': [],
        'metric': 'sum__value',
        'row_limit': config.get('ROW_LIMIT'),
        'since': '100 years ago',
        'until': 'now',
        'where': '',
        'viz_type': 'word_cloud',
        'size_from': '10',
        'series': 'short_phrase',
        'size_to': '70',
        'rotation': 'square',
        'limit': '100',
    }

    print('Creating a slice')
    slc = Slice(
        slice_name='Unicode Cloud',
        viz_type='word_cloud',
        datasource_type='table',
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    merge_slice(slc)

    print('Creating a dashboard')
    dash = (db.session.query(Dash).filter_by(
        dashboard_title='Unicode Test').first())

    if not dash:
        dash = Dash()
    js = """\
{
    "CHART-Hkx6154FEm": {
        "children": [],
        "id": "CHART-Hkx6154FEm",
        "meta": {
            "chartId": 2225,
            "height": 30,
            "sliceName": "slice 1",
            "width": 4
        },
        "type": "CHART"
    },
    "GRID_ID": {
        "children": [
            "ROW-SyT19EFEQ"
        ],
        "id": "GRID_ID",
        "type": "GRID"
    },
    "ROOT_ID": {
        "children": [
            "GRID_ID"
        ],
        "id": "ROOT_ID",
        "type": "ROOT"
    },
    "ROW-SyT19EFEQ": {
        "children": [
            "CHART-Hkx6154FEm"
        ],
        "id": "ROW-SyT19EFEQ",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "DASHBOARD_VERSION_KEY": "v2"
}
    """
    dash.dashboard_title = 'Unicode Test'
    pos = json.loads(js)
    update_slice_ids(pos, [slc])
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = 'unicode-test'
    dash.slices = [slc]
    db.session.merge(dash)
    db.session.commit()
Exemple #20
0
def load_birth_names():
    """Loading birth name dataset from a zip file in the repo"""
    with gzip.open(os.path.join(DATA_FOLDER, 'birth_names.json.gz')) as f:
        pdf = pd.read_json(f)
    pdf.ds = pd.to_datetime(pdf.ds, unit='ms')
    pdf.to_sql(
        'birth_names',
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'ds': DateTime,
            'gender': String(16),
            'state': String(10),
            'name': String(255),
        },
        index=False)
    print('Done loading table!')
    print('-' * 80)

    print('Creating table [birth_names] reference')
    obj = db.session.query(TBL).filter_by(table_name='birth_names').first()
    if not obj:
        obj = TBL(table_name='birth_names')
    obj.main_dttm_col = 'ds'
    obj.database = get_or_create_main_db()
    obj.filter_select_enabled = True

    if not any(col.column_name == 'num_california' for col in obj.columns):
        obj.columns.append(TableColumn(
            column_name='num_california',
            expression="CASE WHEN state = 'CA' THEN num ELSE 0 END",
        ))

    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    defaults = {
        'compare_lag': '10',
        'compare_suffix': 'o10Y',
        'limit': '25',
        'granularity_sqla': 'ds',
        'groupby': [],
        'metric': 'sum__num',
        'metrics': ['sum__num'],
        'row_limit': config.get('ROW_LIMIT'),
        'since': '100 years ago',
        'until': 'now',
        'viz_type': 'table',
        'where': '',
        'markup_type': 'markdown',
    }

    admin = security_manager.find_user('admin')

    print('Creating some slices')
    slices = [
        Slice(
            slice_name='Girls',
            viz_type='table',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=['name'],
                filters=[{
                    'col': 'gender',
                    'op': 'in',
                    'val': ['girl'],
                }],
                row_limit=50,
                timeseries_limit_metric='sum__num')),
        Slice(
            slice_name='Boys',
            viz_type='table',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=['name'],
                filters=[{
                    'col': 'gender',
                    'op': 'in',
                    'val': ['boy'],
                }],
                row_limit=50)),
        Slice(
            slice_name='Participants',
            viz_type='big_number',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='big_number', granularity_sqla='ds',
                compare_lag='5', compare_suffix='over 5Y')),
        Slice(
            slice_name='Genders',
            viz_type='pie',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='pie', groupby=['gender'])),
        Slice(
            slice_name='Genders by State',
            viz_type='dist_bar',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                adhoc_filters=[
                    {
                        'clause': 'WHERE',
                        'expressionType': 'SIMPLE',
                        'filterOptionName': '2745eae5',
                        'comparator': ['other'],
                        'operator': 'not in',
                        'subject': 'state',
                    },
                ],
                viz_type='dist_bar',
                metrics=[
                    {
                        'expressionType': 'SIMPLE',
                        'column': {
                            'column_name': 'sum_boys',
                            'type': 'BIGINT(20)',
                        },
                        'aggregate': 'SUM',
                        'label': 'Boys',
                        'optionName': 'metric_11',
                    },
                    {
                        'expressionType': 'SIMPLE',
                        'column': {
                            'column_name': 'sum_girls',
                            'type': 'BIGINT(20)',
                        },
                        'aggregate': 'SUM',
                        'label': 'Girls',
                        'optionName': 'metric_12',
                    },
                ],
                groupby=['state'])),
        Slice(
            slice_name='Trends',
            viz_type='line',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='line', groupby=['name'],
                granularity_sqla='ds', rich_tooltip=True, show_legend=True)),
        Slice(
            slice_name='Average and Sum Trends',
            viz_type='dual_line',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='dual_line',
                metric={
                    'expressionType': 'SIMPLE',
                    'column': {
                        'column_name': 'num',
                        'type': 'BIGINT(20)',
                    },
                    'aggregate': 'AVG',
                    'label': 'AVG(num)',
                    'optionName': 'metric_vgops097wej_g8uff99zhk7',
                },
                metric_2='sum__num',
                granularity_sqla='ds')),
        Slice(
            slice_name='Title',
            viz_type='markup',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='markup', markup_type='html',
                code="""\
    <div style='text-align:center'>
        <h1>Birth Names Dashboard</h1>
        <p>
            The source dataset came from
            <a href='https://github.com/hadley/babynames' target='_blank'>[here]</a>
        </p>
        <img src='/static/assets/images/babytux.jpg'>
    </div>
    """)),
        Slice(
            slice_name='Name Cloud',
            viz_type='word_cloud',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='word_cloud', size_from='10',
                series='name', size_to='70', rotation='square',
                limit='100')),
        Slice(
            slice_name='Pivot Table',
            viz_type='pivot_table',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='pivot_table', metrics=['sum__num'],
                groupby=['name'], columns=['state'])),
        Slice(
            slice_name='Number of Girls',
            viz_type='big_number_total',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='big_number_total', granularity_sqla='ds',
                filters=[{
                    'col': 'gender',
                    'op': 'in',
                    'val': ['girl'],
                }],
                subheader='total female participants')),
        Slice(
            slice_name='Number of California Births',
            viz_type='big_number_total',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metric={
                    'expressionType': 'SIMPLE',
                    'column': {
                        'column_name': 'num_california',
                        'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    'aggregate': 'SUM',
                    'label': 'SUM(num_california)',
                },
                viz_type='big_number_total',
                granularity_sqla='ds')),
        Slice(
            slice_name='Top 10 California Names Timeseries',
            viz_type='line',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metrics=[{
                    'expressionType': 'SIMPLE',
                    'column': {
                        'column_name': 'num_california',
                        'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    'aggregate': 'SUM',
                    'label': 'SUM(num_california)',
                }],
                viz_type='line',
                granularity_sqla='ds',
                groupby=['name'],
                timeseries_limit_metric={
                    'expressionType': 'SIMPLE',
                    'column': {
                        'column_name': 'num_california',
                        'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    'aggregate': 'SUM',
                    'label': 'SUM(num_california)',
                },
                limit='10')),
        Slice(
            slice_name='Names Sorted by Num in California',
            viz_type='table',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=['name'],
                row_limit=50,
                timeseries_limit_metric={
                    'expressionType': 'SIMPLE',
                    'column': {
                        'column_name': 'num_california',
                        'expression': "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    'aggregate': 'SUM',
                    'label': 'SUM(num_california)',
                })),
        Slice(
            slice_name='Num Births Trend',
            viz_type='line',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='line')),
        Slice(
            slice_name='Daily Totals',
            viz_type='table',
            datasource_type='table',
            datasource_id=tbl.id,
            created_by=admin,
            params=get_slice_json(
                defaults,
                groupby=['ds'],
                since='40 years ago',
                until='now',
                viz_type='table')),
    ]
    for slc in slices:
        merge_slice(slc)

    print('Creating a dashboard')
    dash = db.session.query(Dash).filter_by(dashboard_title='Births').first()

    if not dash:
        dash = Dash()
    js = textwrap.dedent("""\
{
    "CHART-0dd270f0": {
        "meta": {
            "chartId": 51,
            "width": 2,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-0dd270f0",
        "children": []
    },
    "CHART-a3c21bcc": {
        "meta": {
            "chartId": 52,
            "width": 2,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-a3c21bcc",
        "children": []
    },
    "CHART-976960a5": {
        "meta": {
            "chartId": 53,
            "width": 2,
            "height": 25
        },
        "type": "CHART",
        "id": "CHART-976960a5",
        "children": []
    },
    "CHART-58575537": {
        "meta": {
            "chartId": 54,
            "width": 2,
            "height": 25
        },
        "type": "CHART",
        "id": "CHART-58575537",
        "children": []
    },
    "CHART-e9cd8f0b": {
        "meta": {
            "chartId": 55,
            "width": 8,
            "height": 38
        },
        "type": "CHART",
        "id": "CHART-e9cd8f0b",
        "children": []
    },
    "CHART-e440d205": {
        "meta": {
            "chartId": 56,
            "width": 8,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e440d205",
        "children": []
    },
    "CHART-59444e0b": {
        "meta": {
            "chartId": 57,
            "width": 3,
            "height": 38
        },
        "type": "CHART",
        "id": "CHART-59444e0b",
        "children": []
    },
    "CHART-e2cb4997": {
        "meta": {
            "chartId": 59,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e2cb4997",
        "children": []
    },
    "CHART-e8774b49": {
        "meta": {
            "chartId": 60,
            "width": 12,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e8774b49",
        "children": []
    },
    "CHART-985bfd1e": {
        "meta": {
            "chartId": 61,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-985bfd1e",
        "children": []
    },
    "CHART-17f13246": {
        "meta": {
            "chartId": 62,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-17f13246",
        "children": []
    },
    "CHART-729324f6": {
        "meta": {
            "chartId": 63,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-729324f6",
        "children": []
    },
    "COLUMN-25a865d6": {
        "meta": {
            "width": 4,
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "COLUMN",
        "id": "COLUMN-25a865d6",
        "children": [
            "ROW-cc97c6ac",
            "CHART-e2cb4997"
        ]
    },
    "COLUMN-4557b6ba": {
        "meta": {
            "width": 8,
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "COLUMN",
        "id": "COLUMN-4557b6ba",
        "children": [
            "ROW-d2e78e59",
            "CHART-e9cd8f0b"
        ]
    },
    "GRID_ID": {
        "type": "GRID",
        "id": "GRID_ID",
        "children": [
            "ROW-8515ace3",
            "ROW-1890385f",
            "ROW-f0b64094",
            "ROW-be9526b8"
        ]
    },
    "HEADER_ID": {
        "meta": {
            "text": "Births"
        },
        "type": "HEADER",
        "id": "HEADER_ID"
    },
    "MARKDOWN-00178c27": {
        "meta": {
            "width": 5,
            "code": "<div style=\\"text-align:center\\">\\n <h1>Birth Names Dashboard</h1>\\n <p>\\n The source dataset came from\\n <a href=\\"https://github.com/hadley/babynames\\" target=\\"_blank\\">[here]</a>\\n </p>\\n <img src=\\"/static/assets/images/babytux.jpg\\">\\n</div>\\n",
            "height": 38
        },
        "type": "MARKDOWN",
        "id": "MARKDOWN-00178c27",
        "children": []
    },
    "ROOT_ID": {
        "type": "ROOT",
        "id": "ROOT_ID",
        "children": [
            "GRID_ID"
        ]
    },
    "ROW-1890385f": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-1890385f",
        "children": [
            "CHART-e440d205",
            "CHART-0dd270f0",
            "CHART-a3c21bcc"
        ]
    },
    "ROW-8515ace3": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-8515ace3",
        "children": [
            "COLUMN-25a865d6",
            "COLUMN-4557b6ba"
        ]
    },
    "ROW-be9526b8": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-be9526b8",
        "children": [
            "CHART-985bfd1e",
            "CHART-17f13246",
            "CHART-729324f6"
        ]
    },
    "ROW-cc97c6ac": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-cc97c6ac",
        "children": [
            "CHART-976960a5",
            "CHART-58575537"
        ]
    },
    "ROW-d2e78e59": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-d2e78e59",
        "children": [
            "MARKDOWN-00178c27",
            "CHART-59444e0b"
        ]
    },
    "ROW-f0b64094": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-f0b64094",
        "children": [
            "CHART-e8774b49"
        ]
    },
    "DASHBOARD_VERSION_KEY": "v2"
}
        """)
    pos = json.loads(js)
    # dashboard v2 doesn't allow add markup slice
    dash.slices = [slc for slc in slices if slc.viz_type != 'markup']
    update_slice_ids(pos, dash.slices)
    dash.dashboard_title = 'Births'
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = 'births'
    db.session.merge(dash)
    db.session.commit()
Exemple #21
0
def load_multiformat_time_series():
    """Loading time series data from a zip file in the repo"""
    with gzip.open(os.path.join(DATA_FOLDER,
                                'multiformat_time_series.json.gz')) as f:
        pdf = pd.read_json(f)
    pdf.ds = pd.to_datetime(pdf.ds, unit='s')
    pdf.ds2 = pd.to_datetime(pdf.ds2, unit='s')
    pdf.to_sql('multiformat_time_series',
               db.engine,
               if_exists='replace',
               chunksize=500,
               dtype={
                   'ds': Date,
                   'ds2': DateTime,
                   'epoch_s': BigInteger,
                   'epoch_ms': BigInteger,
                   'string0': String(100),
                   'string1': String(100),
                   'string2': String(100),
                   'string3': String(100),
               },
               index=False)
    print('Done loading table!')
    print('-' * 80)
    print('Creating table [multiformat_time_series] reference')
    obj = db.session.query(TBL).filter_by(
        table_name='multiformat_time_series').first()
    if not obj:
        obj = TBL(table_name='multiformat_time_series')
    obj.main_dttm_col = 'ds'
    obj.database = utils.get_or_create_main_db()
    dttm_and_expr_dict = {
        'ds': [None, None],
        'ds2': [None, None],
        'epoch_s': ['epoch_s', None],
        'epoch_ms': ['epoch_ms', None],
        'string2': ['%Y%m%d-%H%M%S', None],
        'string1': ['%Y-%m-%d^%H:%M:%S', None],
        'string0': ['%Y-%m-%d %H:%M:%S.%f', None],
        'string3': ['%Y/%m/%d%H:%M:%S.%f', None],
    }
    for col in obj.columns:
        dttm_and_expr = dttm_and_expr_dict[col.column_name]
        col.python_date_format = dttm_and_expr[0]
        col.dbatabase_expr = dttm_and_expr[1]
        col.is_dttm = True
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    print('Creating Heatmap charts')
    for i, col in enumerate(tbl.columns):
        slice_data = {
            'metrics': ['count'],
            'granularity_sqla': col.column_name,
            'row_limit': config.get('ROW_LIMIT'),
            'since': '1 year ago',
            'until': 'now',
            'where': '',
            'viz_type': 'cal_heatmap',
            'domain_granularity': 'month',
            'subdomain_granularity': 'day',
        }

        slc = Slice(
            slice_name='Calendar Heatmap multiformat ' + str(i),
            viz_type='cal_heatmap',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(slice_data),
        )
        merge_slice(slc)
    misc_dash_slices.add(slc.slice_name)
def load_country_map_data():
    """Loading data for map with country map"""
    csv_bytes = get_example_data(
        "birth_france_data_for_country_map.csv", is_gzip=False, make_bytes=True
    )
    data = pd.read_csv(csv_bytes, encoding="utf-8")
    data["dttm"] = datetime.datetime.now().date()
    data.to_sql(  # pylint: disable=no-member
        "birth_france_by_region",
        db.engine,
        if_exists="replace",
        chunksize=500,
        dtype={
            "DEPT_ID": String(10),
            "2003": BigInteger,
            "2004": BigInteger,
            "2005": BigInteger,
            "2006": BigInteger,
            "2007": BigInteger,
            "2008": BigInteger,
            "2009": BigInteger,
            "2010": BigInteger,
            "2011": BigInteger,
            "2012": BigInteger,
            "2013": BigInteger,
            "2014": BigInteger,
            "dttm": Date(),
        },
        index=False,
    )
    print("Done loading table!")
    print("-" * 80)
    print("Creating table reference")
    obj = db.session.query(TBL).filter_by(table_name="birth_france_by_region").first()
    if not obj:
        obj = TBL(table_name="birth_france_by_region")
    obj.main_dttm_col = "dttm"
    obj.database = utils.get_or_create_main_db()
    if not any(col.metric_name == "avg__2004" for col in obj.metrics):
        col = str(column("2004").compile(db.engine))
        obj.metrics.append(SqlMetric(metric_name="avg__2004", expression=f"AVG({col})"))
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        "granularity_sqla": "",
        "since": "",
        "until": "",
        "where": "",
        "viz_type": "country_map",
        "entity": "DEPT_ID",
        "metric": {
            "expressionType": "SIMPLE",
            "column": {"type": "INT", "column_name": "2004"},
            "aggregate": "AVG",
            "label": "Boys",
            "optionName": "metric_112342",
        },
        "row_limit": 500000,
    }

    print("Creating a slice")
    slc = Slice(
        slice_name="Birth in France by department in 2016",
        viz_type="country_map",
        datasource_type="table",
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
Exemple #23
0
def load_world_bank_health_n_pop():
    """Loads the world bank health dataset, slices and a dashboard"""
    tbl_name = 'wb_health_population'
    data = get_example_data('countries.json.gz')
    pdf = pd.read_json(data)
    pdf.columns = [col.replace('.', '_') for col in pdf.columns]
    pdf.year = pd.to_datetime(pdf.year)
    pdf.to_sql(
        tbl_name,
        db.engine,
        if_exists='replace',
        chunksize=50,
        dtype={
            'year': DateTime(),
            'country_code': String(3),
            'country_name': String(255),
            'region': String(255),
        },
        index=False)

    print('Creating table [wb_health_population] reference')
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = utils.readfile(os.path.join(DATA_FOLDER, 'countries.md'))
    tbl.main_dttm_col = 'year'
    tbl.database = utils.get_or_create_main_db()
    tbl.filter_select_enabled = True

    metrics = [
        'sum__SP_POP_TOTL', 'sum__SH_DYN_AIDS', 'sum__SH_DYN_AIDS',
        'sum__SP_RUR_TOTL_ZS', 'sum__SP_DYN_LE00_IN',
    ]
    for m in metrics:
        if not any(col.metric_name == m for col in tbl.metrics):
            tbl.metrics.append(SqlMetric(
                metric_name=m,
                expression=f'{m[:3]}({m[5:]})',
            ))

    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()

    defaults = {
        'compare_lag': '10',
        'compare_suffix': 'o10Y',
        'limit': '25',
        'granularity_sqla': 'year',
        'groupby': [],
        'metric': 'sum__SP_POP_TOTL',
        'metrics': ['sum__SP_POP_TOTL'],
        'row_limit': config.get('ROW_LIMIT'),
        'since': '2014-01-01',
        'until': '2014-01-02',
        'time_range': '2014-01-01 : 2014-01-02',
        'where': '',
        'markup_type': 'markdown',
        'country_fieldtype': 'cca3',
        'secondary_metric': 'sum__SP_POP_TOTL',
        'entity': 'country_code',
        'show_bubbles': True,
    }

    print('Creating slices')
    slices = [
        Slice(
            slice_name='Region Filter',
            viz_type='filter_box',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='filter_box',
                date_filter=False,
                filter_configs=[
                    {
                        'asc': False,
                        'clearable': True,
                        'column': 'region',
                        'key': '2s98dfu',
                        'metric': 'sum__SP_POP_TOTL',
                        'multiple': True,
                    }, {
                        'asc': False,
                        'clearable': True,
                        'key': 'li3j2lk',
                        'column': 'country_name',
                        'metric': 'sum__SP_POP_TOTL',
                        'multiple': True,
                    },
                ])),
        Slice(
            slice_name="World's Population",
            viz_type='big_number',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since='2000',
                viz_type='big_number',
                compare_lag='10',
                metric='sum__SP_POP_TOTL',
                compare_suffix='over 10Y')),
        Slice(
            slice_name='Most Populated Countries',
            viz_type='table',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='table',
                metrics=['sum__SP_POP_TOTL'],
                groupby=['country_name'])),
        Slice(
            slice_name='Growth Rate',
            viz_type='line',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='line',
                since='1960-01-01',
                metrics=['sum__SP_POP_TOTL'],
                num_period_compare='10',
                groupby=['country_name'])),
        Slice(
            slice_name='% Rural',
            viz_type='world_map',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='world_map',
                metric='sum__SP_RUR_TOTL_ZS',
                num_period_compare='10')),
        Slice(
            slice_name='Life Expectancy VS Rural %',
            viz_type='bubble',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='bubble',
                since='2011-01-01',
                until='2011-01-02',
                series='region',
                limit=0,
                entity='country_name',
                x='sum__SP_RUR_TOTL_ZS',
                y='sum__SP_DYN_LE00_IN',
                size='sum__SP_POP_TOTL',
                max_bubble_size='50',
                filters=[{
                    'col': 'country_code',
                    'val': [
                        'TCA', 'MNP', 'DMA', 'MHL', 'MCO', 'SXM', 'CYM',
                        'TUV', 'IMY', 'KNA', 'ASM', 'ADO', 'AMA', 'PLW',
                    ],
                    'op': 'not in'}],
            )),
        Slice(
            slice_name='Rural Breakdown',
            viz_type='sunburst',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type='sunburst',
                groupby=['region', 'country_name'],
                secondary_metric='sum__SP_RUR_TOTL',
                since='2011-01-01',
                until='2011-01-01')),
        Slice(
            slice_name="World's Pop Growth",
            viz_type='area',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since='1960-01-01',
                until='now',
                viz_type='area',
                groupby=['region'])),
        Slice(
            slice_name='Box plot',
            viz_type='box_plot',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since='1960-01-01',
                until='now',
                whisker_options='Min/max (no outliers)',
                x_ticks_layout='staggered',
                viz_type='box_plot',
                groupby=['region'])),
        Slice(
            slice_name='Treemap',
            viz_type='treemap',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since='1960-01-01',
                until='now',
                viz_type='treemap',
                metrics=['sum__SP_POP_TOTL'],
                groupby=['region', 'country_code'])),
        Slice(
            slice_name='Parallel Coordinates',
            viz_type='para',
            datasource_type='table',
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                since='2011-01-01',
                until='2011-01-01',
                viz_type='para',
                limit=100,
                metrics=[
                    'sum__SP_POP_TOTL',
                    'sum__SP_RUR_TOTL_ZS',
                    'sum__SH_DYN_AIDS'],
                secondary_metric='sum__SP_POP_TOTL',
                series='country_name')),
    ]
    misc_dash_slices.add(slices[-1].slice_name)
    for slc in slices:
        merge_slice(slc)

    print("Creating a World's Health Bank dashboard")
    dash_name = "World's Bank Data"
    slug = 'world_health'
    dash = db.session.query(Dash).filter_by(slug=slug).first()

    if not dash:
        dash = Dash()
    js = textwrap.dedent("""\
{
    "CHART-36bfc934": {
        "children": [],
        "id": "CHART-36bfc934",
        "meta": {
            "chartId": 40,
            "height": 25,
            "sliceName": "Region Filter",
            "width": 2
        },
        "type": "CHART"
    },
    "CHART-37982887": {
        "children": [],
        "id": "CHART-37982887",
        "meta": {
            "chartId": 41,
            "height": 25,
            "sliceName": "World's Population",
            "width": 2
        },
        "type": "CHART"
    },
    "CHART-17e0f8d8": {
        "children": [],
        "id": "CHART-17e0f8d8",
        "meta": {
            "chartId": 42,
            "height": 92,
            "sliceName": "Most Populated Countries",
            "width": 3
        },
        "type": "CHART"
    },
    "CHART-2ee52f30": {
        "children": [],
        "id": "CHART-2ee52f30",
        "meta": {
            "chartId": 43,
            "height": 38,
            "sliceName": "Growth Rate",
            "width": 6
        },
        "type": "CHART"
    },
    "CHART-2d5b6871": {
        "children": [],
        "id": "CHART-2d5b6871",
        "meta": {
            "chartId": 44,
            "height": 52,
            "sliceName": "% Rural",
            "width": 7
        },
        "type": "CHART"
    },
    "CHART-0fd0d252": {
        "children": [],
        "id": "CHART-0fd0d252",
        "meta": {
            "chartId": 45,
            "height": 50,
            "sliceName": "Life Expectancy VS Rural %",
            "width": 8
        },
        "type": "CHART"
    },
    "CHART-97f4cb48": {
        "children": [],
        "id": "CHART-97f4cb48",
        "meta": {
            "chartId": 46,
            "height": 38,
            "sliceName": "Rural Breakdown",
            "width": 3
        },
        "type": "CHART"
    },
    "CHART-b5e05d6f": {
        "children": [],
        "id": "CHART-b5e05d6f",
        "meta": {
            "chartId": 47,
            "height": 50,
            "sliceName": "World's Pop Growth",
            "width": 4
        },
        "type": "CHART"
    },
    "CHART-e76e9f5f": {
        "children": [],
        "id": "CHART-e76e9f5f",
        "meta": {
            "chartId": 48,
            "height": 50,
            "sliceName": "Box plot",
            "width": 4
        },
        "type": "CHART"
    },
    "CHART-a4808bba": {
        "children": [],
        "id": "CHART-a4808bba",
        "meta": {
            "chartId": 49,
            "height": 50,
            "sliceName": "Treemap",
            "width": 8
        },
        "type": "CHART"
    },
    "COLUMN-071bbbad": {
        "children": [
            "ROW-1e064e3c",
            "ROW-afdefba9"
        ],
        "id": "COLUMN-071bbbad",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT",
            "width": 9
        },
        "type": "COLUMN"
    },
    "COLUMN-fe3914b8": {
        "children": [
            "CHART-36bfc934",
            "CHART-37982887"
        ],
        "id": "COLUMN-fe3914b8",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT",
            "width": 2
        },
        "type": "COLUMN"
    },
    "GRID_ID": {
        "children": [
            "ROW-46632bc2",
            "ROW-3fa26c5d",
            "ROW-812b3f13"
        ],
        "id": "GRID_ID",
        "type": "GRID"
    },
    "HEADER_ID": {
        "id": "HEADER_ID",
        "meta": {
            "text": "World's Bank Data"
        },
        "type": "HEADER"
    },
    "ROOT_ID": {
        "children": [
            "GRID_ID"
        ],
        "id": "ROOT_ID",
        "type": "ROOT"
    },
    "ROW-1e064e3c": {
        "children": [
            "COLUMN-fe3914b8",
            "CHART-2d5b6871"
        ],
        "id": "ROW-1e064e3c",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-3fa26c5d": {
        "children": [
            "CHART-b5e05d6f",
            "CHART-0fd0d252"
        ],
        "id": "ROW-3fa26c5d",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-46632bc2": {
        "children": [
            "COLUMN-071bbbad",
            "CHART-17e0f8d8"
        ],
        "id": "ROW-46632bc2",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-812b3f13": {
        "children": [
            "CHART-a4808bba",
            "CHART-e76e9f5f"
        ],
        "id": "ROW-812b3f13",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-afdefba9": {
        "children": [
            "CHART-2ee52f30",
            "CHART-97f4cb48"
        ],
        "id": "ROW-afdefba9",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "DASHBOARD_VERSION_KEY": "v2"
}
    """)
    pos = json.loads(js)
    update_slice_ids(pos, slices)

    dash.dashboard_title = dash_name
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = slug

    dash.slices = slices[:-1]
    db.session.merge(dash)
    db.session.commit()
Exemple #24
0
def load_long_lat_data():
    """Loading lat/long data from a csv file in the repo"""
    with gzip.open(os.path.join(DATA_FOLDER, 'san_francisco.csv.gz')) as f:
        pdf = pd.read_csv(f, encoding='utf-8')
    start = datetime.datetime.now().replace(
        hour=0, minute=0, second=0, microsecond=0)
    pdf['datetime'] = [
        start + datetime.timedelta(hours=i * 24 / (len(pdf) - 1))
        for i in range(len(pdf))
    ]
    pdf['occupancy'] = [random.randint(1, 6) for _ in range(len(pdf))]
    pdf['radius_miles'] = [random.uniform(1, 3) for _ in range(len(pdf))]
    pdf['geohash'] = pdf[['LAT', 'LON']].apply(
        lambda x: geohash.encode(*x), axis=1)
    pdf['delimited'] = pdf['LAT'].map(str).str.cat(pdf['LON'].map(str), sep=',')
    pdf.to_sql(  # pylint: disable=no-member
        'long_lat',
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'longitude': Float(),
            'latitude': Float(),
            'number': Float(),
            'street': String(100),
            'unit': String(10),
            'city': String(50),
            'district': String(50),
            'region': String(50),
            'postcode': Float(),
            'id': String(100),
            'datetime': DateTime(),
            'occupancy': Float(),
            'radius_miles': Float(),
            'geohash': String(12),
            'delimited': String(60),
        },
        index=False)
    print('Done loading table!')
    print('-' * 80)

    print('Creating table reference')
    obj = db.session.query(TBL).filter_by(table_name='long_lat').first()
    if not obj:
        obj = TBL(table_name='long_lat')
    obj.main_dttm_col = 'datetime'
    obj.database = utils.get_or_create_main_db()
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        'granularity_sqla': 'day',
        'since': '2014-01-01',
        'until': 'now',
        'where': '',
        'viz_type': 'mapbox',
        'all_columns_x': 'LON',
        'all_columns_y': 'LAT',
        'mapbox_style': 'mapbox://styles/mapbox/light-v9',
        'all_columns': ['occupancy'],
        'row_limit': 500000,
    }

    print('Creating a slice')
    slc = Slice(
        slice_name='Mapbox Long/Lat',
        viz_type='mapbox',
        datasource_type='table',
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
def load_country_map_data():
    """Loading data for map with country map"""
    csv_path = os.path.join(DATA_FOLDER,
                            'birth_france_data_for_country_map.csv')
    data = pd.read_csv(csv_path, encoding='utf-8')
    data['dttm'] = datetime.datetime.now().date()
    data.to_sql(  # pylint: disable=no-member
        'birth_france_by_region',
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'DEPT_ID': String(10),
            '2003': BigInteger,
            '2004': BigInteger,
            '2005': BigInteger,
            '2006': BigInteger,
            '2007': BigInteger,
            '2008': BigInteger,
            '2009': BigInteger,
            '2010': BigInteger,
            '2011': BigInteger,
            '2012': BigInteger,
            '2013': BigInteger,
            '2014': BigInteger,
            'dttm': Date(),
        },
        index=False)
    print('Done loading table!')
    print('-' * 80)
    print('Creating table reference')
    obj = db.session.query(TBL).filter_by(
        table_name='birth_france_by_region').first()
    if not obj:
        obj = TBL(table_name='birth_france_by_region')
    obj.main_dttm_col = 'dttm'
    obj.database = utils.get_or_create_main_db()
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        'granularity_sqla': '',
        'since': '',
        'until': '',
        'where': '',
        'viz_type': 'country_map',
        'entity': 'DEPT_ID',
        'metric': 'avg__2004',
        'row_limit': 500000,
    }

    print('Creating a slice')
    slc = Slice(
        slice_name='Birth in France by department in 2016',
        viz_type='country_map',
        datasource_type='table',
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
Exemple #26
0
def load_country_map_data():
    """Loading data for map with country map"""
    csv_path = os.path.join(DATA_FOLDER, 'birth_france_data_for_country_map.csv')
    data = pd.read_csv(csv_path, encoding='utf-8')
    data['dttm'] = datetime.datetime.now().date()
    data.to_sql(  # pylint: disable=no-member
        'birth_france_by_region',
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'DEPT_ID': String(10),
            '2003': BigInteger,
            '2004': BigInteger,
            '2005': BigInteger,
            '2006': BigInteger,
            '2007': BigInteger,
            '2008': BigInteger,
            '2009': BigInteger,
            '2010': BigInteger,
            '2011': BigInteger,
            '2012': BigInteger,
            '2013': BigInteger,
            '2014': BigInteger,
            'dttm': Date(),
        },
        index=False)
    print('Done loading table!')
    print('-' * 80)
    print('Creating table reference')
    obj = db.session.query(TBL).filter_by(table_name='birth_france_by_region').first()
    if not obj:
        obj = TBL(table_name='birth_france_by_region')
    obj.main_dttm_col = 'dttm'
    obj.database = utils.get_or_create_main_db()
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        'granularity_sqla': '',
        'since': '',
        'until': '',
        'where': '',
        'viz_type': 'country_map',
        'entity': 'DEPT_ID',
        'metric': 'avg__2004',
        'row_limit': 500000,
    }

    print('Creating a slice')
    slc = Slice(
        slice_name='Birth in France by department in 2016',
        viz_type='country_map',
        datasource_type='table',
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
def load_world_bank_health_n_pop():
    """Loads the world bank health dataset, slices and a dashboard"""
    tbl_name = 'wb_health_population'
    with gzip.open(os.path.join(DATA_FOLDER, 'countries.json.gz')) as f:
        pdf = pd.read_json(f)
    pdf.columns = [col.replace('.', '_') for col in pdf.columns]
    pdf.year = pd.to_datetime(pdf.year)
    pdf.to_sql(tbl_name,
               db.engine,
               if_exists='replace',
               chunksize=50,
               dtype={
                   'year': DateTime(),
                   'country_code': String(3),
                   'country_name': String(255),
                   'region': String(255),
               },
               index=False)

    print('Creating table [wb_health_population] reference')
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = utils.readfile(os.path.join(DATA_FOLDER, 'countries.md'))
    tbl.main_dttm_col = 'year'
    tbl.database = utils.get_or_create_main_db()
    tbl.filter_select_enabled = True
    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()

    defaults = {
        'compare_lag': '10',
        'compare_suffix': 'o10Y',
        'limit': '25',
        'granularity_sqla': 'year',
        'groupby': [],
        'metric': 'sum__SP_POP_TOTL',
        'metrics': ['sum__SP_POP_TOTL'],
        'row_limit': config.get('ROW_LIMIT'),
        'since': '2014-01-01',
        'until': '2014-01-02',
        'time_range': '2014-01-01 : 2014-01-02',
        'where': '',
        'markup_type': 'markdown',
        'country_fieldtype': 'cca3',
        'secondary_metric': 'sum__SP_POP_TOTL',
        'entity': 'country_code',
        'show_bubbles': True,
    }

    print('Creating slices')
    slices = [
        Slice(slice_name='Region Filter',
              viz_type='filter_box',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    viz_type='filter_box',
                                    date_filter=False,
                                    filter_configs=[
                                        {
                                            'asc': False,
                                            'clearable': True,
                                            'column': 'region',
                                            'key': '2s98dfu',
                                            'metric': 'sum__SP_POP_TOTL',
                                            'multiple': True,
                                        },
                                        {
                                            'asc': False,
                                            'clearable': True,
                                            'key': 'li3j2lk',
                                            'column': 'country_name',
                                            'metric': 'sum__SP_POP_TOTL',
                                            'multiple': True,
                                        },
                                    ])),
        Slice(slice_name="World's Population",
              viz_type='big_number',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    since='2000',
                                    viz_type='big_number',
                                    compare_lag='10',
                                    metric='sum__SP_POP_TOTL',
                                    compare_suffix='over 10Y')),
        Slice(slice_name='Most Populated Countries',
              viz_type='table',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    viz_type='table',
                                    metrics=['sum__SP_POP_TOTL'],
                                    groupby=['country_name'])),
        Slice(slice_name='Growth Rate',
              viz_type='line',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    viz_type='line',
                                    since='1960-01-01',
                                    metrics=['sum__SP_POP_TOTL'],
                                    num_period_compare='10',
                                    groupby=['country_name'])),
        Slice(slice_name='% Rural',
              viz_type='world_map',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    viz_type='world_map',
                                    metric='sum__SP_RUR_TOTL_ZS',
                                    num_period_compare='10')),
        Slice(slice_name='Life Expectancy VS Rural %',
              viz_type='bubble',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(
                  defaults,
                  viz_type='bubble',
                  since='2011-01-01',
                  until='2011-01-02',
                  series='region',
                  limit=0,
                  entity='country_name',
                  x='sum__SP_RUR_TOTL_ZS',
                  y='sum__SP_DYN_LE00_IN',
                  size='sum__SP_POP_TOTL',
                  max_bubble_size='50',
                  filters=[{
                      'col':
                      'country_code',
                      'val': [
                          'TCA',
                          'MNP',
                          'DMA',
                          'MHL',
                          'MCO',
                          'SXM',
                          'CYM',
                          'TUV',
                          'IMY',
                          'KNA',
                          'ASM',
                          'ADO',
                          'AMA',
                          'PLW',
                      ],
                      'op':
                      'not in'
                  }],
              )),
        Slice(slice_name='Rural Breakdown',
              viz_type='sunburst',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    viz_type='sunburst',
                                    groupby=['region', 'country_name'],
                                    secondary_metric='sum__SP_RUR_TOTL',
                                    since='2011-01-01',
                                    until='2011-01-01')),
        Slice(slice_name="World's Pop Growth",
              viz_type='area',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    since='1960-01-01',
                                    until='now',
                                    viz_type='area',
                                    groupby=['region'])),
        Slice(slice_name='Box plot',
              viz_type='box_plot',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    since='1960-01-01',
                                    until='now',
                                    whisker_options='Min/max (no outliers)',
                                    viz_type='box_plot',
                                    groupby=['region'])),
        Slice(slice_name='Treemap',
              viz_type='treemap',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    since='1960-01-01',
                                    until='now',
                                    viz_type='treemap',
                                    metrics=['sum__SP_POP_TOTL'],
                                    groupby=['region', 'country_code'])),
        Slice(slice_name='Parallel Coordinates',
              viz_type='para',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    since='2011-01-01',
                                    until='2011-01-01',
                                    viz_type='para',
                                    limit=100,
                                    metrics=[
                                        'sum__SP_POP_TOTL',
                                        'sum__SP_RUR_TOTL_ZS',
                                        'sum__SH_DYN_AIDS'
                                    ],
                                    secondary_metric='sum__SP_POP_TOTL',
                                    series='country_name')),
    ]
    misc_dash_slices.add(slices[-1].slice_name)
    for slc in slices:
        merge_slice(slc)

    print("Creating a World's Health Bank dashboard")
    dash_name = "World's Bank Data"
    slug = 'world_health'
    dash = db.session.query(Dash).filter_by(slug=slug).first()

    if not dash:
        dash = Dash()
    js = textwrap.dedent("""\
{
    "CHART-36bfc934": {
        "children": [],
        "id": "CHART-36bfc934",
        "meta": {
            "chartId": 40,
            "height": 25,
            "sliceName": "Region Filter",
            "width": 2
        },
        "type": "CHART"
    },
    "CHART-37982887": {
        "children": [],
        "id": "CHART-37982887",
        "meta": {
            "chartId": 41,
            "height": 25,
            "sliceName": "World's Population",
            "width": 2
        },
        "type": "CHART"
    },
    "CHART-17e0f8d8": {
        "children": [],
        "id": "CHART-17e0f8d8",
        "meta": {
            "chartId": 42,
            "height": 92,
            "sliceName": "Most Populated Countries",
            "width": 3
        },
        "type": "CHART"
    },
    "CHART-2ee52f30": {
        "children": [],
        "id": "CHART-2ee52f30",
        "meta": {
            "chartId": 43,
            "height": 38,
            "sliceName": "Growth Rate",
            "width": 6
        },
        "type": "CHART"
    },
    "CHART-2d5b6871": {
        "children": [],
        "id": "CHART-2d5b6871",
        "meta": {
            "chartId": 44,
            "height": 52,
            "sliceName": "% Rural",
            "width": 7
        },
        "type": "CHART"
    },
    "CHART-0fd0d252": {
        "children": [],
        "id": "CHART-0fd0d252",
        "meta": {
            "chartId": 45,
            "height": 50,
            "sliceName": "Life Expectancy VS Rural %",
            "width": 8
        },
        "type": "CHART"
    },
    "CHART-97f4cb48": {
        "children": [],
        "id": "CHART-97f4cb48",
        "meta": {
            "chartId": 46,
            "height": 38,
            "sliceName": "Rural Breakdown",
            "width": 3
        },
        "type": "CHART"
    },
    "CHART-b5e05d6f": {
        "children": [],
        "id": "CHART-b5e05d6f",
        "meta": {
            "chartId": 47,
            "height": 50,
            "sliceName": "World's Pop Growth",
            "width": 4
        },
        "type": "CHART"
    },
    "CHART-e76e9f5f": {
        "children": [],
        "id": "CHART-e76e9f5f",
        "meta": {
            "chartId": 48,
            "height": 50,
            "sliceName": "Box plot",
            "width": 4
        },
        "type": "CHART"
    },
    "CHART-a4808bba": {
        "children": [],
        "id": "CHART-a4808bba",
        "meta": {
            "chartId": 49,
            "height": 50,
            "sliceName": "Treemap",
            "width": 8
        },
        "type": "CHART"
    },
    "COLUMN-071bbbad": {
        "children": [
            "ROW-1e064e3c",
            "ROW-afdefba9"
        ],
        "id": "COLUMN-071bbbad",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT",
            "width": 9
        },
        "type": "COLUMN"
    },
    "COLUMN-fe3914b8": {
        "children": [
            "CHART-36bfc934",
            "CHART-37982887"
        ],
        "id": "COLUMN-fe3914b8",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT",
            "width": 2
        },
        "type": "COLUMN"
    },
    "GRID_ID": {
        "children": [
            "ROW-46632bc2",
            "ROW-3fa26c5d",
            "ROW-812b3f13"
        ],
        "id": "GRID_ID",
        "type": "GRID"
    },
    "HEADER_ID": {
        "id": "HEADER_ID",
        "meta": {
            "text": "World's Bank Data"
        },
        "type": "HEADER"
    },
    "ROOT_ID": {
        "children": [
            "GRID_ID"
        ],
        "id": "ROOT_ID",
        "type": "ROOT"
    },
    "ROW-1e064e3c": {
        "children": [
            "COLUMN-fe3914b8",
            "CHART-2d5b6871"
        ],
        "id": "ROW-1e064e3c",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-3fa26c5d": {
        "children": [
            "CHART-b5e05d6f",
            "CHART-0fd0d252"
        ],
        "id": "ROW-3fa26c5d",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-46632bc2": {
        "children": [
            "COLUMN-071bbbad",
            "CHART-17e0f8d8"
        ],
        "id": "ROW-46632bc2",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-812b3f13": {
        "children": [
            "CHART-a4808bba",
            "CHART-e76e9f5f"
        ],
        "id": "ROW-812b3f13",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "ROW-afdefba9": {
        "children": [
            "CHART-2ee52f30",
            "CHART-97f4cb48"
        ],
        "id": "ROW-afdefba9",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "DASHBOARD_VERSION_KEY": "v2"
}
    """)
    pos = json.loads(js)
    update_slice_ids(pos, slices)

    dash.dashboard_title = dash_name
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = slug

    dash.slices = slices[:-1]
    db.session.merge(dash)
    db.session.commit()
def load_birth_names():
    """Loading birth name dataset from a zip file in the repo"""
    data = get_example_data("birth_names.json.gz")
    pdf = pd.read_json(data)
    pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
    pdf.to_sql(
        "birth_names",
        db.engine,
        if_exists="replace",
        chunksize=500,
        dtype={
            "ds": DateTime,
            "gender": String(16),
            "state": String(10),
            "name": String(255),
        },
        index=False,
    )
    print("Done loading table!")
    print("-" * 80)

    print("Creating table [birth_names] reference")
    obj = db.session.query(TBL).filter_by(table_name="birth_names").first()
    if not obj:
        obj = TBL(table_name="birth_names")
    obj.main_dttm_col = "ds"
    obj.database = get_or_create_main_db()
    obj.filter_select_enabled = True

    if not any(col.column_name == "num_california" for col in obj.columns):
        col_state = str(column("state").compile(db.engine))
        col_num = str(column("num").compile(db.engine))
        obj.columns.append(
            TableColumn(
                column_name="num_california",
                expression=
                f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END",
            ))

    if not any(col.metric_name == "sum__num" for col in obj.metrics):
        col = str(column("num").compile(db.engine))
        obj.metrics.append(
            SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))

    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    defaults = {
        "compare_lag": "10",
        "compare_suffix": "o10Y",
        "limit": "25",
        "granularity_sqla": "ds",
        "groupby": [],
        "metric": "sum__num",
        "metrics": ["sum__num"],
        "row_limit": config.get("ROW_LIMIT"),
        "since": "100 years ago",
        "until": "now",
        "viz_type": "table",
        "where": "",
        "markup_type": "markdown",
    }

    admin = security_manager.find_user("admin")

    print("Creating some slices")
    slices = [
        Slice(
            slice_name="Girls",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=["name"],
                filters=[{
                    "col": "gender",
                    "op": "in",
                    "val": ["girl"]
                }],
                row_limit=50,
                timeseries_limit_metric="sum__num",
            ),
        ),
        Slice(
            slice_name="Boys",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=["name"],
                filters=[{
                    "col": "gender",
                    "op": "in",
                    "val": ["boy"]
                }],
                row_limit=50,
            ),
        ),
        Slice(
            slice_name="Participants",
            viz_type="big_number",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="big_number",
                granularity_sqla="ds",
                compare_lag="5",
                compare_suffix="over 5Y",
            ),
        ),
        Slice(
            slice_name="Genders",
            viz_type="pie",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(defaults, viz_type="pie",
                                  groupby=["gender"]),
        ),
        Slice(
            slice_name="Genders by State",
            viz_type="dist_bar",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                adhoc_filters=[{
                    "clause": "WHERE",
                    "expressionType": "SIMPLE",
                    "filterOptionName": "2745eae5",
                    "comparator": ["other"],
                    "operator": "not in",
                    "subject": "state",
                }],
                viz_type="dist_bar",
                metrics=[
                    {
                        "expressionType": "SIMPLE",
                        "column": {
                            "column_name": "sum_boys",
                            "type": "BIGINT(20)"
                        },
                        "aggregate": "SUM",
                        "label": "Boys",
                        "optionName": "metric_11",
                    },
                    {
                        "expressionType": "SIMPLE",
                        "column": {
                            "column_name": "sum_girls",
                            "type": "BIGINT(20)"
                        },
                        "aggregate": "SUM",
                        "label": "Girls",
                        "optionName": "metric_12",
                    },
                ],
                groupby=["state"],
            ),
        ),
        Slice(
            slice_name="Trends",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="line",
                groupby=["name"],
                granularity_sqla="ds",
                rich_tooltip=True,
                show_legend=True,
            ),
        ),
        Slice(
            slice_name="Average and Sum Trends",
            viz_type="dual_line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="dual_line",
                metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num",
                        "type": "BIGINT(20)"
                    },
                    "aggregate": "AVG",
                    "label": "AVG(num)",
                    "optionName": "metric_vgops097wej_g8uff99zhk7",
                },
                metric_2="sum__num",
                granularity_sqla="ds",
            ),
        ),
        Slice(
            slice_name="Title",
            viz_type="markup",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="markup",
                markup_type="html",
                code="""\
    <div style='text-align:center'>
        <h1>Birth Names Dashboard</h1>
        <p>
            The source dataset came from
            <a href='https://github.com/hadley/babynames' target='_blank'>[here]</a>
        </p>
        <img src='/static/assets/images/babytux.jpg'>
    </div>
    """,
            ),
        ),
        Slice(
            slice_name="Name Cloud",
            viz_type="word_cloud",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="word_cloud",
                size_from="10",
                series="name",
                size_to="70",
                rotation="square",
                limit="100",
            ),
        ),
        Slice(
            slice_name="Pivot Table",
            viz_type="pivot_table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="pivot_table",
                metrics=["sum__num"],
                groupby=["name"],
                columns=["state"],
            ),
        ),
        Slice(
            slice_name="Number of Girls",
            viz_type="big_number_total",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                viz_type="big_number_total",
                granularity_sqla="ds",
                filters=[{
                    "col": "gender",
                    "op": "in",
                    "val": ["girl"]
                }],
                subheader="total female participants",
            ),
        ),
        Slice(
            slice_name="Number of California Births",
            viz_type="big_number_total",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
                viz_type="big_number_total",
                granularity_sqla="ds",
            ),
        ),
        Slice(
            slice_name="Top 10 California Names Timeseries",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                metrics=[{
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                }],
                viz_type="line",
                granularity_sqla="ds",
                groupby=["name"],
                timeseries_limit_metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
                limit="10",
            ),
        ),
        Slice(
            slice_name="Names Sorted by Num in California",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(
                defaults,
                groupby=["name"],
                row_limit=50,
                timeseries_limit_metric={
                    "expressionType": "SIMPLE",
                    "column": {
                        "column_name": "num_california",
                        "expression":
                        "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                    },
                    "aggregate": "SUM",
                    "label": "SUM(num_california)",
                },
            ),
        ),
        Slice(
            slice_name="Num Births Trend",
            viz_type="line",
            datasource_type="table",
            datasource_id=tbl.id,
            params=get_slice_json(defaults, viz_type="line"),
        ),
        Slice(
            slice_name="Daily Totals",
            viz_type="table",
            datasource_type="table",
            datasource_id=tbl.id,
            created_by=admin,
            params=get_slice_json(
                defaults,
                groupby=["ds"],
                since="40 years ago",
                until="now",
                viz_type="table",
            ),
        ),
    ]
    for slc in slices:
        merge_slice(slc)

    print("Creating a dashboard")
    dash = db.session.query(Dash).filter_by(dashboard_title="Births").first()

    if not dash:
        dash = Dash()
    js = textwrap.dedent(
        # pylint: disable=line-too-long
        """\
{
    "CHART-0dd270f0": {
        "meta": {
            "chartId": 51,
            "width": 2,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-0dd270f0",
        "children": []
    },
    "CHART-a3c21bcc": {
        "meta": {
            "chartId": 52,
            "width": 2,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-a3c21bcc",
        "children": []
    },
    "CHART-976960a5": {
        "meta": {
            "chartId": 53,
            "width": 2,
            "height": 25
        },
        "type": "CHART",
        "id": "CHART-976960a5",
        "children": []
    },
    "CHART-58575537": {
        "meta": {
            "chartId": 54,
            "width": 2,
            "height": 25
        },
        "type": "CHART",
        "id": "CHART-58575537",
        "children": []
    },
    "CHART-e9cd8f0b": {
        "meta": {
            "chartId": 55,
            "width": 8,
            "height": 38
        },
        "type": "CHART",
        "id": "CHART-e9cd8f0b",
        "children": []
    },
    "CHART-e440d205": {
        "meta": {
            "chartId": 56,
            "width": 8,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e440d205",
        "children": []
    },
    "CHART-59444e0b": {
        "meta": {
            "chartId": 57,
            "width": 3,
            "height": 38
        },
        "type": "CHART",
        "id": "CHART-59444e0b",
        "children": []
    },
    "CHART-e2cb4997": {
        "meta": {
            "chartId": 59,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e2cb4997",
        "children": []
    },
    "CHART-e8774b49": {
        "meta": {
            "chartId": 60,
            "width": 12,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e8774b49",
        "children": []
    },
    "CHART-985bfd1e": {
        "meta": {
            "chartId": 61,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-985bfd1e",
        "children": []
    },
    "CHART-17f13246": {
        "meta": {
            "chartId": 62,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-17f13246",
        "children": []
    },
    "CHART-729324f6": {
        "meta": {
            "chartId": 63,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-729324f6",
        "children": []
    },
    "COLUMN-25a865d6": {
        "meta": {
            "width": 4,
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "COLUMN",
        "id": "COLUMN-25a865d6",
        "children": [
            "ROW-cc97c6ac",
            "CHART-e2cb4997"
        ]
    },
    "COLUMN-4557b6ba": {
        "meta": {
            "width": 8,
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "COLUMN",
        "id": "COLUMN-4557b6ba",
        "children": [
            "ROW-d2e78e59",
            "CHART-e9cd8f0b"
        ]
    },
    "GRID_ID": {
        "type": "GRID",
        "id": "GRID_ID",
        "children": [
            "ROW-8515ace3",
            "ROW-1890385f",
            "ROW-f0b64094",
            "ROW-be9526b8"
        ]
    },
    "HEADER_ID": {
        "meta": {
            "text": "Births"
        },
        "type": "HEADER",
        "id": "HEADER_ID"
    },
    "MARKDOWN-00178c27": {
        "meta": {
            "width": 5,
            "code": "<div style=\\"text-align:center\\">\\n <h1>Birth Names Dashboard</h1>\\n <p>\\n The source dataset came from\\n <a href=\\"https://github.com/hadley/babynames\\" target=\\"_blank\\">[here]</a>\\n </p>\\n <img src=\\"/static/assets/images/babytux.jpg\\">\\n</div>\\n",
            "height": 38
        },
        "type": "MARKDOWN",
        "id": "MARKDOWN-00178c27",
        "children": []
    },
    "ROOT_ID": {
        "type": "ROOT",
        "id": "ROOT_ID",
        "children": [
            "GRID_ID"
        ]
    },
    "ROW-1890385f": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-1890385f",
        "children": [
            "CHART-e440d205",
            "CHART-0dd270f0",
            "CHART-a3c21bcc"
        ]
    },
    "ROW-8515ace3": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-8515ace3",
        "children": [
            "COLUMN-25a865d6",
            "COLUMN-4557b6ba"
        ]
    },
    "ROW-be9526b8": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-be9526b8",
        "children": [
            "CHART-985bfd1e",
            "CHART-17f13246",
            "CHART-729324f6"
        ]
    },
    "ROW-cc97c6ac": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-cc97c6ac",
        "children": [
            "CHART-976960a5",
            "CHART-58575537"
        ]
    },
    "ROW-d2e78e59": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-d2e78e59",
        "children": [
            "MARKDOWN-00178c27",
            "CHART-59444e0b"
        ]
    },
    "ROW-f0b64094": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-f0b64094",
        "children": [
            "CHART-e8774b49"
        ]
    },
    "DASHBOARD_VERSION_KEY": "v2"
}
        """

        # pylint: enable=line-too-long
    )
    pos = json.loads(js)
    # dashboard v2 doesn't allow add markup slice
    dash.slices = [slc for slc in slices if slc.viz_type != "markup"]
    update_slice_ids(pos, dash.slices)
    dash.dashboard_title = "Births"
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = "births"
    db.session.merge(dash)
    db.session.commit()
Exemple #29
0
def init():
    """Inits the Superset application"""
    utils.get_or_create_main_db()
    security_manager.sync_role_definitions()
Exemple #30
0
def load_long_lat_data():
    """Loading lat/long data from a csv file in the repo"""
    with gzip.open(os.path.join(DATA_FOLDER, 'san_francisco.csv.gz')) as f:
        pdf = pd.read_csv(f, encoding='utf-8')
    start = datetime.datetime.now().replace(
        hour=0, minute=0, second=0, microsecond=0)
    pdf['datetime'] = [
        start + datetime.timedelta(hours=i * 24 / (len(pdf) - 1))
        for i in range(len(pdf))
    ]
    pdf['occupancy'] = [random.randint(1, 6) for _ in range(len(pdf))]
    pdf['radius_miles'] = [random.uniform(1, 3) for _ in range(len(pdf))]
    pdf['geohash'] = pdf[['LAT', 'LON']].apply(
        lambda x: geohash.encode(*x), axis=1)
    pdf['delimited'] = pdf['LAT'].map(str).str.cat(pdf['LON'].map(str), sep=',')
    pdf.to_sql(  # pylint: disable=no-member
        'long_lat',
        db.engine,
        if_exists='replace',
        chunksize=500,
        dtype={
            'longitude': Float(),
            'latitude': Float(),
            'number': Float(),
            'street': String(100),
            'unit': String(10),
            'city': String(50),
            'district': String(50),
            'region': String(50),
            'postcode': Float(),
            'id': String(100),
            'datetime': DateTime(),
            'occupancy': Float(),
            'radius_miles': Float(),
            'geohash': String(12),
            'delimited': String(60),
        },
        index=False)
    print('Done loading table!')
    print('-' * 80)

    print('Creating table reference')
    obj = db.session.query(TBL).filter_by(table_name='long_lat').first()
    if not obj:
        obj = TBL(table_name='long_lat')
    obj.main_dttm_col = 'datetime'
    obj.database = utils.get_or_create_main_db()
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        'granularity_sqla': 'day',
        'since': '2014-01-01',
        'until': 'now',
        'where': '',
        'viz_type': 'mapbox',
        'all_columns_x': 'LON',
        'all_columns_y': 'LAT',
        'mapbox_style': 'mapbox://styles/mapbox/light-v9',
        'all_columns': ['occupancy'],
        'row_limit': 500000,
    }

    print('Creating a slice')
    slc = Slice(
        slice_name='Mapbox Long/Lat',
        viz_type='mapbox',
        datasource_type='table',
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)
Exemple #31
0
def load_unicode_test_data():
    """Loading unicode test dataset from a csv file in the repo"""
    data = get_example_data("unicode_utf8_unixnl_test.csv",
                            is_gzip=False,
                            make_bytes=True)
    df = pd.read_csv(data, encoding="utf-8")
    # generate date/numeric data
    df["dttm"] = datetime.datetime.now().date()
    df["value"] = [random.randint(1, 100) for _ in range(len(df))]
    df.to_sql(  # pylint: disable=no-member
        "unicode_test",
        db.engine,
        if_exists="replace",
        chunksize=500,
        dtype={
            "phrase": String(500),
            "short_phrase": String(10),
            "with_missing": String(100),
            "dttm": Date(),
            "value": Float(),
        },
        index=False,
    )
    print("Done loading table!")
    print("-" * 80)

    print("Creating table [unicode_test] reference")
    obj = db.session.query(TBL).filter_by(table_name="unicode_test").first()
    if not obj:
        obj = TBL(table_name="unicode_test")
    obj.main_dttm_col = "dttm"
    obj.database = utils.get_or_create_main_db()
    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    slice_data = {
        "granularity_sqla": "dttm",
        "groupby": [],
        "metric": {
            "expressionType": "SIMPLE",
            "column": {
                "column_name": "value",
                "type": "INT"
            },
            "aggregate": "SUM",
            "label": "value",
            "optionName": "metric_11",
        },
        "row_limit": config.get("ROW_LIMIT"),
        "since": "100 years ago",
        "until": "now",
        "where": "",
        "viz_type": "word_cloud",
        "size_from": "10",
        "series": "short_phrase",
        "size_to": "70",
        "rotation": "square",
        "limit": "100",
    }

    print("Creating a slice")
    slc = Slice(
        slice_name="Unicode Cloud",
        viz_type="word_cloud",
        datasource_type="table",
        datasource_id=tbl.id,
        params=get_slice_json(slice_data),
    )
    merge_slice(slc)

    print("Creating a dashboard")
    dash = db.session.query(Dash).filter_by(
        dashboard_title="Unicode Test").first()

    if not dash:
        dash = Dash()
    js = """\
{
    "CHART-Hkx6154FEm": {
        "children": [],
        "id": "CHART-Hkx6154FEm",
        "meta": {
            "chartId": 2225,
            "height": 30,
            "sliceName": "slice 1",
            "width": 4
        },
        "type": "CHART"
    },
    "GRID_ID": {
        "children": [
            "ROW-SyT19EFEQ"
        ],
        "id": "GRID_ID",
        "type": "GRID"
    },
    "ROOT_ID": {
        "children": [
            "GRID_ID"
        ],
        "id": "ROOT_ID",
        "type": "ROOT"
    },
    "ROW-SyT19EFEQ": {
        "children": [
            "CHART-Hkx6154FEm"
        ],
        "id": "ROW-SyT19EFEQ",
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW"
    },
    "DASHBOARD_VERSION_KEY": "v2"
}
    """
    dash.dashboard_title = "Unicode Test"
    pos = json.loads(js)
    update_slice_ids(pos, [slc])
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = "unicode-test"
    dash.slices = [slc]
    db.session.merge(dash)
    db.session.commit()
Exemple #32
0
def load_birth_names():
    """Loading birth name dataset from a zip file in the repo"""
    with gzip.open(os.path.join(DATA_FOLDER, 'birth_names.json.gz')) as f:
        pdf = pd.read_json(f)
    pdf.ds = pd.to_datetime(pdf.ds, unit='ms')
    pdf.to_sql('birth_names',
               db.engine,
               if_exists='replace',
               chunksize=500,
               dtype={
                   'ds': DateTime,
                   'gender': String(16),
                   'state': String(10),
                   'name': String(255),
               },
               index=False)
    print('Done loading table!')
    print('-' * 80)

    print('Creating table [birth_names] reference')
    obj = db.session.query(TBL).filter_by(table_name='birth_names').first()
    if not obj:
        obj = TBL(table_name='birth_names')
    obj.main_dttm_col = 'ds'
    obj.database = get_or_create_main_db()
    obj.filter_select_enabled = True

    if not any(col.column_name == 'num_california' for col in obj.columns):
        obj.columns.append(
            TableColumn(
                column_name='num_california',
                expression="CASE WHEN state = 'CA' THEN num ELSE 0 END",
            ))

    db.session.merge(obj)
    db.session.commit()
    obj.fetch_metadata()
    tbl = obj

    defaults = {
        'compare_lag': '10',
        'compare_suffix': 'o10Y',
        'limit': '25',
        'granularity_sqla': 'ds',
        'groupby': [],
        'metric': 'sum__num',
        'metrics': ['sum__num'],
        'row_limit': config.get('ROW_LIMIT'),
        'since': '100 years ago',
        'until': 'now',
        'viz_type': 'table',
        'where': '',
        'markup_type': 'markdown',
    }

    admin = security_manager.find_user('admin')

    print('Creating some slices')
    slices = [
        Slice(slice_name='Girls',
              viz_type='table',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    groupby=['name'],
                                    filters=[{
                                        'col': 'gender',
                                        'op': 'in',
                                        'val': ['girl'],
                                    }],
                                    row_limit=50,
                                    timeseries_limit_metric='sum__num')),
        Slice(slice_name='Boys',
              viz_type='table',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    groupby=['name'],
                                    filters=[{
                                        'col': 'gender',
                                        'op': 'in',
                                        'val': ['boy'],
                                    }],
                                    row_limit=50)),
        Slice(slice_name='Participants',
              viz_type='big_number',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    viz_type='big_number',
                                    granularity_sqla='ds',
                                    compare_lag='5',
                                    compare_suffix='over 5Y')),
        Slice(slice_name='Genders',
              viz_type='pie',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    viz_type='pie',
                                    groupby=['gender'])),
        Slice(slice_name='Genders by State',
              viz_type='dist_bar',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    adhoc_filters=[
                                        {
                                            'clause': 'WHERE',
                                            'expressionType': 'SIMPLE',
                                            'filterOptionName': '2745eae5',
                                            'comparator': ['other'],
                                            'operator': 'not in',
                                            'subject': 'state',
                                        },
                                    ],
                                    viz_type='dist_bar',
                                    metrics=[
                                        {
                                            'expressionType': 'SIMPLE',
                                            'column': {
                                                'column_name': 'sum_boys',
                                                'type': 'BIGINT(20)',
                                            },
                                            'aggregate': 'SUM',
                                            'label': 'Boys',
                                            'optionName': 'metric_11',
                                        },
                                        {
                                            'expressionType': 'SIMPLE',
                                            'column': {
                                                'column_name': 'sum_girls',
                                                'type': 'BIGINT(20)',
                                            },
                                            'aggregate': 'SUM',
                                            'label': 'Girls',
                                            'optionName': 'metric_12',
                                        },
                                    ],
                                    groupby=['state'])),
        Slice(slice_name='Trends',
              viz_type='line',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    viz_type='line',
                                    groupby=['name'],
                                    granularity_sqla='ds',
                                    rich_tooltip=True,
                                    show_legend=True)),
        Slice(slice_name='Average and Sum Trends',
              viz_type='dual_line',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    viz_type='dual_line',
                                    metric={
                                        'expressionType':
                                        'SIMPLE',
                                        'column': {
                                            'column_name': 'num',
                                            'type': 'BIGINT(20)',
                                        },
                                        'aggregate':
                                        'AVG',
                                        'label':
                                        'AVG(num)',
                                        'optionName':
                                        'metric_vgops097wej_g8uff99zhk7',
                                    },
                                    metric_2='sum__num',
                                    granularity_sqla='ds')),
        Slice(slice_name='Title',
              viz_type='markup',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    viz_type='markup',
                                    markup_type='html',
                                    code="""\
    <div style='text-align:center'>
        <h1>Birth Names Dashboard</h1>
        <p>
            The source dataset came from
            <a href='https://github.com/hadley/babynames' target='_blank'>[here]</a>
        </p>
        <img src='/static/assets/images/babytux.jpg'>
    </div>
    """)),
        Slice(slice_name='Name Cloud',
              viz_type='word_cloud',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    viz_type='word_cloud',
                                    size_from='10',
                                    series='name',
                                    size_to='70',
                                    rotation='square',
                                    limit='100')),
        Slice(slice_name='Pivot Table',
              viz_type='pivot_table',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    viz_type='pivot_table',
                                    metrics=['sum__num'],
                                    groupby=['name'],
                                    columns=['state'])),
        Slice(slice_name='Number of Girls',
              viz_type='big_number_total',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults,
                                    viz_type='big_number_total',
                                    granularity_sqla='ds',
                                    filters=[{
                                        'col': 'gender',
                                        'op': 'in',
                                        'val': ['girl'],
                                    }],
                                    subheader='total female participants')),
        Slice(slice_name='Number of California Births',
              viz_type='big_number_total',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(
                  defaults,
                  metric={
                      'expressionType': 'SIMPLE',
                      'column': {
                          'column_name':
                          'num_california',
                          'expression':
                          "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                      },
                      'aggregate': 'SUM',
                      'label': 'SUM(num_california)',
                  },
                  viz_type='big_number_total',
                  granularity_sqla='ds')),
        Slice(slice_name='Top 10 California Names Timeseries',
              viz_type='line',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(
                  defaults,
                  metrics=[{
                      'expressionType':
                      'SIMPLE',
                      'column': {
                          'column_name':
                          'num_california',
                          'expression':
                          "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                      },
                      'aggregate': 'SUM',
                      'label': 'SUM(num_california)',
                  }],
                  viz_type='line',
                  granularity_sqla='ds',
                  groupby=['name'],
                  timeseries_limit_metric={
                      'expressionType': 'SIMPLE',
                      'column': {
                          'column_name':
                          'num_california',
                          'expression':
                          "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                      },
                      'aggregate': 'SUM',
                      'label': 'SUM(num_california)',
                  },
                  limit='10')),
        Slice(slice_name='Names Sorted by Num in California',
              viz_type='table',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(
                  defaults,
                  groupby=['name'],
                  row_limit=50,
                  timeseries_limit_metric={
                      'expressionType':
                      'SIMPLE',
                      'column': {
                          'column_name':
                          'num_california',
                          'expression':
                          "CASE WHEN state = 'CA' THEN num ELSE 0 END",
                      },
                      'aggregate': 'SUM',
                      'label': 'SUM(num_california)',
                  })),
        Slice(slice_name='Num Births Trend',
              viz_type='line',
              datasource_type='table',
              datasource_id=tbl.id,
              params=get_slice_json(defaults, viz_type='line')),
        Slice(slice_name='Daily Totals',
              viz_type='table',
              datasource_type='table',
              datasource_id=tbl.id,
              created_by=admin,
              params=get_slice_json(defaults,
                                    groupby=['ds'],
                                    since='40 years ago',
                                    until='now',
                                    viz_type='table')),
    ]
    for slc in slices:
        merge_slice(slc)

    print('Creating a dashboard')
    dash = db.session.query(Dash).filter_by(dashboard_title='Births').first()

    if not dash:
        dash = Dash()
    js = textwrap.dedent("""\
{
    "CHART-0dd270f0": {
        "meta": {
            "chartId": 51,
            "width": 2,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-0dd270f0",
        "children": []
    },
    "CHART-a3c21bcc": {
        "meta": {
            "chartId": 52,
            "width": 2,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-a3c21bcc",
        "children": []
    },
    "CHART-976960a5": {
        "meta": {
            "chartId": 53,
            "width": 2,
            "height": 25
        },
        "type": "CHART",
        "id": "CHART-976960a5",
        "children": []
    },
    "CHART-58575537": {
        "meta": {
            "chartId": 54,
            "width": 2,
            "height": 25
        },
        "type": "CHART",
        "id": "CHART-58575537",
        "children": []
    },
    "CHART-e9cd8f0b": {
        "meta": {
            "chartId": 55,
            "width": 8,
            "height": 38
        },
        "type": "CHART",
        "id": "CHART-e9cd8f0b",
        "children": []
    },
    "CHART-e440d205": {
        "meta": {
            "chartId": 56,
            "width": 8,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e440d205",
        "children": []
    },
    "CHART-59444e0b": {
        "meta": {
            "chartId": 57,
            "width": 3,
            "height": 38
        },
        "type": "CHART",
        "id": "CHART-59444e0b",
        "children": []
    },
    "CHART-e2cb4997": {
        "meta": {
            "chartId": 59,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e2cb4997",
        "children": []
    },
    "CHART-e8774b49": {
        "meta": {
            "chartId": 60,
            "width": 12,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-e8774b49",
        "children": []
    },
    "CHART-985bfd1e": {
        "meta": {
            "chartId": 61,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-985bfd1e",
        "children": []
    },
    "CHART-17f13246": {
        "meta": {
            "chartId": 62,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-17f13246",
        "children": []
    },
    "CHART-729324f6": {
        "meta": {
            "chartId": 63,
            "width": 4,
            "height": 50
        },
        "type": "CHART",
        "id": "CHART-729324f6",
        "children": []
    },
    "COLUMN-25a865d6": {
        "meta": {
            "width": 4,
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "COLUMN",
        "id": "COLUMN-25a865d6",
        "children": [
            "ROW-cc97c6ac",
            "CHART-e2cb4997"
        ]
    },
    "COLUMN-4557b6ba": {
        "meta": {
            "width": 8,
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "COLUMN",
        "id": "COLUMN-4557b6ba",
        "children": [
            "ROW-d2e78e59",
            "CHART-e9cd8f0b"
        ]
    },
    "GRID_ID": {
        "type": "GRID",
        "id": "GRID_ID",
        "children": [
            "ROW-8515ace3",
            "ROW-1890385f",
            "ROW-f0b64094",
            "ROW-be9526b8"
        ]
    },
    "HEADER_ID": {
        "meta": {
            "text": "Births"
        },
        "type": "HEADER",
        "id": "HEADER_ID"
    },
    "MARKDOWN-00178c27": {
        "meta": {
            "width": 5,
            "code": "<div style=\\"text-align:center\\">\\n <h1>Birth Names Dashboard</h1>\\n <p>\\n The source dataset came from\\n <a href=\\"https://github.com/hadley/babynames\\" target=\\"_blank\\">[here]</a>\\n </p>\\n <img src=\\"/static/assets/images/babytux.jpg\\">\\n</div>\\n",
            "height": 38
        },
        "type": "MARKDOWN",
        "id": "MARKDOWN-00178c27",
        "children": []
    },
    "ROOT_ID": {
        "type": "ROOT",
        "id": "ROOT_ID",
        "children": [
            "GRID_ID"
        ]
    },
    "ROW-1890385f": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-1890385f",
        "children": [
            "CHART-e440d205",
            "CHART-0dd270f0",
            "CHART-a3c21bcc"
        ]
    },
    "ROW-8515ace3": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-8515ace3",
        "children": [
            "COLUMN-25a865d6",
            "COLUMN-4557b6ba"
        ]
    },
    "ROW-be9526b8": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-be9526b8",
        "children": [
            "CHART-985bfd1e",
            "CHART-17f13246",
            "CHART-729324f6"
        ]
    },
    "ROW-cc97c6ac": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-cc97c6ac",
        "children": [
            "CHART-976960a5",
            "CHART-58575537"
        ]
    },
    "ROW-d2e78e59": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-d2e78e59",
        "children": [
            "MARKDOWN-00178c27",
            "CHART-59444e0b"
        ]
    },
    "ROW-f0b64094": {
        "meta": {
            "background": "BACKGROUND_TRANSPARENT"
        },
        "type": "ROW",
        "id": "ROW-f0b64094",
        "children": [
            "CHART-e8774b49"
        ]
    },
    "DASHBOARD_VERSION_KEY": "v2"
}
        """)
    pos = json.loads(js)
    # dashboard v2 doesn't allow add markup slice
    dash.slices = [slc for slc in slices if slc.viz_type != 'markup']
    update_slice_ids(pos, dash.slices)
    dash.dashboard_title = 'Births'
    dash.position_json = json.dumps(pos, indent=4)
    dash.slug = 'births'
    db.session.merge(dash)
    db.session.commit()
Exemple #33
0
def init():
    """Inits the Superset application"""
    utils.get_or_create_main_db()
    utils.get_example_database()
    appbuilder.add_permissions(update_perms=True)
    security_manager.sync_role_definitions()
Exemple #34
0
def load_energy():
    """Loads an energy related dataset to use with sankey and graphs"""
    tbl_name = "energy_usage"
    data = get_example_data("energy.json.gz")
    pdf = pd.read_json(data)
    pdf.to_sql(
        tbl_name,
        db.engine,
        if_exists="replace",
        chunksize=500,
        dtype={"source": String(255), "target": String(255), "value": Float()},
        index=False,
    )

    print("Creating table [wb_health_population] reference")
    tbl = db.session.query(TBL).filter_by(table_name=tbl_name).first()
    if not tbl:
        tbl = TBL(table_name=tbl_name)
    tbl.description = "Energy consumption"
    tbl.database = utils.get_or_create_main_db()

    if not any(col.metric_name == "sum__value" for col in tbl.metrics):
        col = str(column("value").compile(db.engine))
        tbl.metrics.append(
            SqlMetric(metric_name="sum__value", expression=f"SUM({col})")
        )

    db.session.merge(tbl)
    db.session.commit()
    tbl.fetch_metadata()

    slc = Slice(
        slice_name="Energy Sankey",
        viz_type="sankey",
        datasource_type="table",
        datasource_id=tbl.id,
        params=textwrap.dedent(
            """\
        {
            "collapsed_fieldsets": "",
            "groupby": [
                "source",
                "target"
            ],
            "having": "",
            "metric": "sum__value",
            "row_limit": "5000",
            "slice_name": "Energy Sankey",
            "viz_type": "sankey",
            "where": ""
        }
        """
        ),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)

    slc = Slice(
        slice_name="Energy Force Layout",
        viz_type="directed_force",
        datasource_type="table",
        datasource_id=tbl.id,
        params=textwrap.dedent(
            """\
        {
            "charge": "-500",
            "collapsed_fieldsets": "",
            "groupby": [
                "source",
                "target"
            ],
            "having": "",
            "link_length": "200",
            "metric": "sum__value",
            "row_limit": "5000",
            "slice_name": "Force",
            "viz_type": "directed_force",
            "where": ""
        }
        """
        ),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)

    slc = Slice(
        slice_name="Heatmap",
        viz_type="heatmap",
        datasource_type="table",
        datasource_id=tbl.id,
        params=textwrap.dedent(
            """\
        {
            "all_columns_x": "source",
            "all_columns_y": "target",
            "canvas_image_rendering": "pixelated",
            "collapsed_fieldsets": "",
            "having": "",
            "linear_color_scheme": "blue_white_yellow",
            "metric": "sum__value",
            "normalize_across": "heatmap",
            "slice_name": "Heatmap",
            "viz_type": "heatmap",
            "where": "",
            "xscale_interval": "1",
            "yscale_interval": "1"
        }
        """
        ),
    )
    misc_dash_slices.add(slc.slice_name)
    merge_slice(slc)