Beispiel #1
0
    def plot_diff_correlations(data):
        """
        Function to plot the correlation between
        daily difference and various twitter variables.

        :param data: Combined data DataFrame
        """
        # Prepare data
        source = data
        # Create date column
        source['date'] = source.index
        # Unpivot DataFrame
        source = source[['date', 'daily_diff_usd', 'number_of_tweets', 'likes', 'retweets', 'polarity']]
        source = source.melt(['date', 'daily_diff_usd'], var_name='dependent_variable', value_name='measure')
        # Delete index
        source = source.set_index(['date'])

        alt.Chart(source).mark_point().encode(
            x='measure:Q',
            y='daily_diff_usd:Q',
            color='dependent_variable:N'
        )

        input_dropdown = alt.binding_select(options=['number_of_tweets', 'likes', 'retweets', 'polarity'])
        selection = alt.selection_single(fields=['dependent_variable'], bind=input_dropdown, name='Choose')
        color = alt.condition(
            selection,
            alt.Color('dependent_variable:N', legend=None),
            alt.value('lightgray'))

        chart = alt.Chart(source).mark_point().encode(
            x=alt.X('measure:Q', title='Wert der ausgewählten abh. Variable'),
            y=alt.Y('daily_diff_usd:Q', title='Kursdifferenz (USD)'),
            color='dependent_variable:N',
            tooltip='date:T'
        ).add_selection(
            selection
        ).transform_filter(
            selection
        ).properties(title="Korrelation Kursdifferenz/abhängige Variable")

        # Workaround for interactive charts in presentation mode
        chart.save("resources/diff_correlations.html")
Beispiel #2
0
def altair_sir_plot(df_alt, default_country):
    alt.data_transformers.disable_max_rows()

    select_country = alt.selection_single(
        name='Select',
        fields=['country'],
        init={'country': default_country},
        bind=alt.binding_select(options=sorted(df_alt['country'].unique()))
    )

    title = (alt.Chart(df_alt[['country', 'title']].drop_duplicates())
             .mark_text(dy=-180, dx=0, size=16)
             .encode(text='title:N')
             .transform_filter(select_country))

    base = alt.Chart(df_alt).encode(x='day:Q')

    line_cols = ['Infected', 'Removed']  # 'Susceptible'
    colors = ['red', 'green']
    lines = (base.mark_line()
             .transform_fold(line_cols)
             .encode(x=alt.X('day:Q', title=f'days relative to today ({CovidData.cur_date})'),
                     y=alt.Y('value:Q',
                             axis=alt.Axis(format='%', title='Percentage of Population')),
                     color=alt.Color('key:N',
                                     scale=alt.Scale(domain=line_cols, range=colors))))

    import functools
    bands = functools.reduce(alt.Chart.__add__,
                             [base.mark_area(opacity=0.1, color=color)
                             .encode(y=f'{col}\.max:Q', y2=f'{col}\.min:Q')
                              for col, color in zip(line_cols, colors)])

    today_line = (alt.Chart(pd.DataFrame({'x': [0]}))
                  .mark_rule(color='orange')
                  .encode(x='x', size=alt.value(1)))

    return ((lines + bands + title + today_line)
            .add_selection(select_country)
            .transform_filter(select_country)
            .configure_title(fontSize=20)
            .configure_axis(labelFontSize=15, titleFontSize=18, grid=True)
            .properties(width=550, height=340))
Beispiel #3
0
def visualize_mod_time(directory, bus_num, module_num):
    df = build_module_average_df(directory, bus_num, module_num)
    df = df.reset_index()
    data = df.melt('DateRetrieved', var_name='voltage', value_name='counts')
    dates = list(data['DateRetrieved'].unique())

    brush = alt.selection_interval(bind='scales')
    input_dropdown = alt.binding_select(options=dates)
    selection = alt.selection_single(fields=['DateRetrieved'],
                                     bind=input_dropdown,
                                     name=' ')
    color = alt.condition(selection, alt.Color('DateRetrieved:N'),
                          alt.value('lightgray'))

    line = alt.Chart(data.reset_index()).mark_line().encode(
        x='voltage:Q', y='counts:Q', color=color,
        tooltip='Name:N').add_selection(brush, selection)

    return line
def show_yearly_sale(df):

    st.write('## Yearly Sales Trend')
    st.write(
        "**The dataset contains yearly sales data for various games in different regions. Let's explore the sales trend in different regions through some interactions!** 👇🏻"
    )

    columns = [
        'NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'
    ]
    select_box = alt.binding_select(options=columns, name='Select a Region:')
    sel = alt.selection_single(fields=['Region Sale'],
                               bind=select_box,
                               init={'Region Sale': 'Global_Sales'})

    # line chart
    st.write("💡 *You can select specific region by clicking on the lines*")

    data = get_melt_year_df(df, columns)
    brush = alt.selection_single(encodings=['color'])

    st.write(
        alt.Chart(data).mark_line(opacity=0.75, thickness=10).encode(
            x='Year:N',
            y=alt.Y('value:Q', title='Sale (in millions)', aggregate='sum'),
            color=alt.condition(brush,
                                "Region:N",
                                alt.value('lightgrey'),
                                scale=alt.Scale(scheme='tableau20')),
        ).add_selection(brush))

    # bar chart
    st.write("💡 *You can select specific region with the dropdown menu below*")

    st.write(
        alt.Chart(df).transform_fold(columns, as_=[
            'Region Sale', 'Sale'
        ]).transform_filter(sel).mark_bar().encode(
            x=alt.X('Year:N'),
            y=alt.Y('Sale:Q',
                    title='Sale (in millions)')).add_selection(sel).properties(
                        height=500))
Beispiel #5
0
def create_viz_region_confirmed_and_rate(charts_path: str,
                                         dfs: Dict[str, pd.DataFrame]) -> None:
    df_region_agg_confirmed = dfs["region-agg-confirmed"]
    df_region_agg_growth_rate = dfs["region-agg-growth-rate"]

    viz_path = os.path.join(charts_path, "region-agg-chart.html")

    choices = sorted(list(df_region_agg_confirmed["location"].unique()))
    input_dropdown = alt.binding_select(options=choices)
    single_selector = alt.selection_single(
        fields=[typedef.Columns.REGION],
        bind=input_dropdown,
        name="Location",
        init={typedef.Columns.REGION: choices[0]},
    )
    color = alt.condition(
        single_selector,
        alt.Color("%s:N" % typedef.Columns.REGION, legend=None),
        alt.value("lightgray"),
    )

    region_agg_confirmed_chart = (alt.Chart(df_region_agg_confirmed).mark_line(
        point=True).encode(
            alt.X(typedef.Columns.DATE, title="Date"),
            alt.Y(typedef.Columns.CONFIRMED, title="# New Cases"),
            color=color,
            tooltip=typedef.Columns.CONFIRMED,
        ).add_selection(single_selector).transform_filter(single_selector))

    region_agg_growth_rate_chart = (
        alt.Chart(df_region_agg_growth_rate).mark_line(point=True).encode(
            alt.X(typedef.Columns.DATE, title="Date"),
            alt.Y(typedef.Columns.GROWTH_RATE, title="Rate of New Cases"),
            color=color,
            tooltip=typedef.Columns.GROWTH_RATE,
        ).add_selection(single_selector).transform_filter(single_selector))

    alt.vconcat(region_agg_confirmed_chart, region_agg_growth_rate_chart).save(
        viz_path, embed_options={"renderer": "svg"})
Beispiel #6
0
def get_line_chart(multi=False):
    flunet_df = get_df()
    countries = flunet_df.columns.tolist()[1:]
    transformed_df = pd.melt(
        flunet_df,
        id_vars=["week"],
        value_vars=countries,
        var_name="country",
        value_name="flu_cases",
    )
    transformed_df["index"] = transformed_df["week"] % 53
    slider1 = alt.binding_range(min=1, max=52, step=1)
    slider2 = alt.binding_range(min=1, max=52, step=1)
    select_week1 = alt.selection_single(name="week1",
                                        fields=["week"],
                                        bind=slider1)
    select_week2 = alt.selection_single(name="week2",
                                        fields=["week"],
                                        bind=slider2)
    if multi:
        multi_select = alt.selection_multi(fields=["country"])
        selection = alt.selection_multi(fields=["country"])
        line, color = plot(selection, transformed_df)
        make_selector = (alt.Chart(pd.DataFrame({
            "country": countries
        })).mark_rect().encode(y="country",
                               color=color).add_selection(selection))
        return line | make_selector
    selector = alt.selection(
        type="single",
        fields=["country"],
        bind=alt.binding_select(options=countries),
        name="Select",
    )

    line, _ = plot(selector, transformed_df)
    return (line.add_selection(select_week2).add_selection(select_week1).
            transform_filter("datum.week > week1_week").transform_filter(
                "datum.week < week2_week"))
    def get_skill_count():
        counts = []

        for key, val in skills_count.items():
            for t in val:
                counts.append([t[0], t[1], key])
        source = pd.DataFrame(counts, columns=['Skill', 'Count', 'Category']).sort_values(
            'Count', ascending=False).nlargest(50, columns='Count')

        # isolate the categories for the dropbox
        categs = np.array(source['Category'].unique())
        category = np.insert(categs, 0, 'all')

        # Dropdownbox
        input_dropdown = alt.binding_select(options=category)
        selection = alt.selection_single(
            fields=['Category'], bind=input_dropdown, name='Skill ')

        c_skills_count = alt.Chart(source).mark_bar().encode(
            x=('Count:Q'),
            y=alt.Y('Skill', sort='-x')).add_selection(selection).transform_filter(selection)
        return c_skills_count
Beispiel #8
0
def countWordPlot(source, x_val, y_val, category):
    files = list(np.unique(source[y_val]))
    categories = list(np.unique(source[category]))
    color_scale = alt.Scale(domain=categories)
    base = alt.Chart(source).mark_circle().encode(
        x=x_val,
        y='count(' + x_val + ')',
        size='count(' + x_val + ')',
        color=alt.Color(category + ':N', scale=color_scale),
        tooltip=[x_val]).interactive()

    # A dropdown filter
    img_dropdown = alt.binding_select(options=files)
    img_select = alt.selection_single(fields=[y_val],
                                      bind=img_dropdown,
                                      name="Test")

    filter_imgs = base.add_selection(img_select).transform_filter(
        img_select).properties(title="Word Distribution of Predicted Captions",
                               width=800,
                               height=300)
    return filter_imgs
def close_for_how_long(yelp_join):
    st.markdown("From now on, we retrive the original information in **Temporary Closed Until**, **Covid Banner**, and **highlights**.")
    
    st.write("First, let's see when did these temporarly closed businesses plan to reopen in June 10. There are {} businesses uploading closure notification.".format(sum(yelp_covid_df['Temporary Closed Until'] != 'FALSE')))
    
    st.write("You may select certain category you are interested in from the bottom box, and brush certain time sub-interval from the upper figure.")

    close_time = yelp_join[yelp_join['Temporary Closed Until'] != 'FALSE']['Temporary Closed Until']
    close_time = list(close_time)
    close_time = [ele[:-5] for ele in close_time]

    category = yelp_join[yelp_join['Temporary Closed Until'] != 'FALSE']['categories'].fillna('').apply(find_category)
    category = list(category)

    df = pd.DataFrame()
    df['Close Until'] = close_time
    df['Category'] = category

    brush = alt.selection_interval()
    input_dropdown = alt.binding_select(options = cate_list, name="Category of ")
    picked = alt.selection_single(encodings=["color"], bind=input_dropdown)

    base = alt.Chart(df[df['Close Until'] < '2021-01-01']).mark_area().encode(
        alt.X("Close Until:T"),
        alt.Y("count()")
    ).properties(height=50, width=500).add_selection(brush)


    chart = base & alt.Chart(df[df['Close Until'] < '2021-01-01T00:00:00']).mark_bar(size=20).encode(
        alt.X("Close Until:T",
            scale=alt.Scale(domain=brush)),
        alt.Y("count()", title='Business number'),
        alt.Tooltip(["Close Until:T", "Category:N", "count()"]),
        color = alt.condition(picked, "Category:N", alt.value("lightgray")),
    ).add_selection(picked).properties(height=300, width=500)

    st.write(chart)
    st.write("It is interesting that the planned reopen time is quite concentrated, most during June and July, and on the start or end of a certain month.")
Beispiel #10
0
def make_plot_bot(data=df_t4):
    chart_1 = alt.Chart(data).mark_circle(size=3, opacity = 0.8).encode(
        longitude='X:Q',
        latitude='Y:Q',
        color = alt.Color('PdDistrict:N', legend = alt.Legend(title = "District")),
        tooltip = 'PdDistrict'
    ).project(
        type='albersUsa'
    ).properties(
        width=450,
        height=350
    )

    chart_2 = alt.Chart(data).mark_bar().encode(
        x=alt.X('PdDistrict:N', axis=None, title="District"),
        y=alt.Y('count()', title="Count of reports"),
        color=alt.Color('PdDistrict:N', legend=alt.Legend(title="District")),
        tooltip=['PdDistrict', 'count()']
    ).properties(
        width=450,
        height=350
    )

    # A dropdown filter
    crimes_dropdown = alt.binding_select(options=list(data['Category'].unique()))
    crimes_select = alt.selection_single(fields=['Category'], bind=crimes_dropdown,
                                              name="Pick\ Crime")

    combine_chart = (chart_2 | chart_1)

    filter_crimes = combine_chart.add_selection(
        crimes_select
    ).transform_filter(
        crimes_select
    )

    return filter_crimes  
Beispiel #11
0
    def display_img(self, args):
        self.output.clear_output()
        with self.output:
            for csv in args["new"]:
                df = pd.read_csv(csv, sep=",")
                df.columns = df.columns.str.replace(".", "_")

                dropdown = alt.binding_select(options=list(df.columns))
                selection = alt.selection_single(
                    fields=["variable"],
                    bind=dropdown,
                    name="Selection of",  # empty=df.columns[1]
                )

                color = alt.condition(selection, alt.Color("variable:N"),
                                      alt.value("lightgray"))
                scales = alt.selection_interval(encodings=["x"], bind="scales")
                chart = (alt.Chart(df.melt().reset_index()).mark_line().encode(
                    x="index", y="value", color=color).add_selection(
                        selection).transform_filter(selection).properties(
                            width=400, height=300).add_selection(scales))

                with alt.data_transformers.enable("default", max_rows=None):
                    display(chart)
Beispiel #12
0
def catCountWordPlot(source, x_val, y_val, category):
    categories = list(np.unique(source[category]))
    color_scale = alt.Scale(domain=categories)
    base = alt.Chart(source).mark_circle().encode(
        x=alt.X(x_val,
                stack=None,
                scale=alt.Scale(),
                axis=alt.Axis(labelOverlap=True)),
        y='sum(' + y_val + ')',
        size='sum(' + y_val + ')',
        color=alt.Color(category + ':N', scale=color_scale),
        tooltip=[x_val]).interactive()

    # A dropdown filter
    img_dropdown = alt.binding_select(options=categories)
    img_select = alt.selection_single(fields=[category],
                                      bind=img_dropdown,
                                      name="Category")

    filter_imgs = base.add_selection(img_select).transform_filter(
        img_select).properties(title="Word Distribution of Predicted Captions",
                               width=800,
                               height=300)
    return filter_imgs
    st.write(data_barvis_PA)

county_data = pandasql.sqldf("select distinct geo_value from data_barvis_PA")
county_details = dict()
print(county_data.shape[0])
l = county_data["geo_value"].tolist()
print(str(covidcast.fips_to_name(county_data.iloc[1])))
for i in range(county_data.shape[0]):
    county_details.update({
        str(covidcast.fips_to_name(county_data.iloc[i]))[2:len(
            str(covidcast.fips_to_name(county_data.iloc[i]))) - 2]:
        l[i]
    })

input_drop = alt.binding_select(
    options=(list(county_details.values())),
    name="Select County to Highlight data for Bar visits")
picked = alt.selection_single(encodings=["color"], bind=input_drop)
scatter = alt.Chart(data_barvis_PA).mark_line().encode(
    x=alt.X("monthdate(time_value):O"),
    y=alt.Y("value:Q",
            axis=alt.Axis(title='Average number of daily bar visits')),
    tooltip=['geo_value', 'monthdate(time_value)', 'value'],
    color=alt.condition(picked, 'geo_value', alt.value('lightgray')),
    opacity=alt.condition(picked, alt.value(1),
                          alt.value(0.05))).add_selection(picked).properties(
                              width=800, height=400).interactive()

data = fetch(4)

data3_6hr = pandasql.sqldf("select * from data where geo_value like '42%'")
Beispiel #14
0
def largealtair_plot():

    keyvalues = [
        'balanc', 'discount', 'divers', 'food', 'free coff', 'free lunch',
        'gym', 'health', 'ice cream', 'incent', 'pay', 'perk', 'rais',
        'sign bonus', 'stock option', 'stress', 'student loan pay', 'surf',
        'vacat', 'women', 'work environ', 'work home'
    ]
    if not request.args.get("company"):
        raise RuntimeError("Missing company name, go back and select company")

    comp = request.args.get('company')  #gets company name from index
    c = pd.read_csv('clusters.csv')
    # c=c.drop(labels='cluster', axis=1) #old cluster only
    ##############################################################
    #
    #                  Normalize the data before running k-means
    #
    ##############################################################

    sample_size = pd.read_csv('li_samplesize.csv', header=None)
    sample_size = sample_size.rename(columns={0: "company", 1: "sample_size"})
    t = pd.merge(c, sample_size, how="left", on='company')
    t = t.drop(t.columns[0], axis=1)  #get rid of unamed
    t = t.set_index('company')  #leave only numbers
    t = t.div(t.sample_size, axis=0)
    t = round(t * 100, 3)
    t = t.drop(labels='sample_size',
               axis=1)  #remove column, we no longer need it
    n = t.reset_index()  #no index
    # d=pd.read_csv('d.csv')
    d = n.set_index('company')  #index
    d = t.fillna(0)
    d = t.astype(float)

    #################################################
    #
    #  Dimensionality reduction and k-means
    #
    ################################################
    twod_pca = PCA(n_components=2)
    X_pca = twod_pca.fit_transform(d)
    #lets find the best number of clusters based on silhouete

    cluster_silhouete = {}
    for n_clustersi in range(2,
                             10):  #min between number of samples and features
        km1 = KMeans(n_clusters=n_clustersi)
        km1.fit(X_pca)
        # Predict the cluster for each data point
        preds1 = km1.predict(X_pca)
        # Calculate the mean silhouette coefficient for the number of clusters chosen
        score = silhouette_score(X_pca, preds1, metric='euclidean')
        cluster_silhouete[n_clustersi] = score.round(5)

    #order dict and maximize it
    sorted_cluster_stats = dict(
        sorted(cluster_silhouete.items(),
               key=lambda item: item[1],
               reverse=True))
    n_clusterso = list(sorted_cluster_stats.keys())[0]  #redifine n clusters

    #we run the kmeans again with the prefered number of clusters
    km = KMeans(n_clusters=n_clusterso, random_state=11).fit(X_pca)
    predsi = km.predict(X_pca)

    #and graph
    df_km = pd.DataFrame(
        data={
            'pca1': X_pca[:, 0],
            'pca2': X_pca[:, 1],
            'cluster': predsi,
            'company': list(d.index)
        })
    # df_km=df_km.set_index('company')
    brush = alt.selection(type='interval')
    #graph centers
    centers = km.cluster_centers_
    labels = km.labels_
    sauce = []  #sauce is where the centers are stored, it is the source
    for i in range(len(centers)):
        sauce.append({"x": centers[:, 0][i], "y": centers[:, 1][i]})

    source = pd.DataFrame.from_records(sauce)
    #################
    #
    #And graph the clusters
    #
    ##############
    poin = alt.Chart(source).mark_point(color='black', size=100).encode(
        x='x',
        y='y',
        # tooltip=['cluster center']
    )
    points = alt.Chart(df_km).mark_circle(size=60).encode(
        x='pca1:Q',
        y='pca2:Q',
        color='cluster:N',
        tooltip=['company:N'],
        shape='comp:N').properties(height=350, width=600).interactive()

    json3 = (points + poin).to_json()

    #Do a little clustering map to compare averages

    # c['cluster']=labels #reseting the cluster labels
    d['cluster'] = labels

    # di2=c.set_index('company')
    # g=di2.groupby('cluster')
    g = d.groupby('cluster')

    m = g.mean()
    a = m.reset_index()
    m_long = a.melt(id_vars='cluster', value_vars=keyvalues)
    m_long = m_long.rename(columns={"value": "score", "variable": "keyword"})

    base2 = alt.Chart(m_long).mark_bar().encode(
        alt.X('cluster:N'),
        alt.Y('score:Q', title='Score % '),
        color=alt.Color('score:Q', scale=alt.Scale(scheme='darkred')),
        opacity=alt.value(.7),
        tooltip=['score:Q'
                 ]).properties(height=200,
                               width=200).add_selection(brush).interactive()

    # A dropdown filter
    columns = keyvalues
    column_dropdown = alt.binding_select(options=columns)
    column_select = alt.selection_single(fields=['keyword'],
                                         on='doubleclick',
                                         clear=False,
                                         bind=column_dropdown,
                                         name='search',
                                         init={'keyword': 'balanc'})
    filter_columns2 = base2.add_selection(column_select).transform_filter(
        column_select)

    json4 = filter_columns2.to_json()

    # get_clustered_companies_to compare_with
    ##################################
    #
    #List of companies inside cluster group
    #
    ################################
    try:
        number = d.loc[[comp], ['cluster']].values[0][0]
    except:
        return render_template('error.html')

    #number = d.loc[[comp],['cluster']].values[0][0]
    if number == 0:
        r = d.loc[d['cluster'] == 0]
    if number == 1:
        r = d.loc[d['cluster'] == 1]
    if number == 2:
        r = d.loc[d['cluster'] == 2]
    if number == 3:
        r = d.loc[d['cluster'] == 3]
    if number == 4:
        r = d.loc[d['cluster'] == 4]
    if number == 5:
        r = d.loc[d['cluster'] == 5]
    if number == 6:
        r = d.loc[d['cluster'] == 6]
    if number == 7:
        r = d.loc[d['cluster'] == 7]
    if number == 8:
        r = d.loc[d['cluster'] == 8]
    if number == 9:
        r = d.loc[d['cluster'] == 9]

    #########################
    #
    #Make a plot that compares all keyvalues of  all companies withing cluster
    #
    #############
    ri = list(r.index.values)
    ind = list(d.index)
    not_ri = []
    for item in ind:
        if item not in ri:
            not_ri.append(item)

    n = d.reset_index()
    filter_c = n[n.company.isin(ri)]
    c_num1 = len(n.groupby('cluster'))

    num = len(ri)
    if num < 30:
        height = 200
    if num > 29 and num < 200:
        height = 2200
    else:
        height = 3200

    c_long = filter_c.melt(id_vars='company', value_vars=keyvalues)
    c_long = c_long.rename(columns={"value": "score", "variable": "keyword"})
    base = alt.Chart(c_long).mark_bar().encode(
        alt.Y('company:N', title=' '),
        alt.X('score:Q', title='Score %'),
        color=alt.Color('score:Q', scale=alt.Scale(scheme='cividis')),
        opacity=alt.value(.7),
        tooltip=['score:Q']).properties(height=height)
    rule = alt.Chart(c_long).mark_rule(color='red').encode(y='mean(score)')
    # A dropdown filter
    columns = list(c.columns.values[2:])
    column_dropdown = alt.binding_select(options=columns)
    column_select = alt.selection_single(fields=['keyword'],
                                         on='doubleclick',
                                         clear=False,
                                         bind=column_dropdown,
                                         name='search',
                                         init={'keyword': 'balanc'})
    filter_columns = base.add_selection(column_select).transform_filter(
        column_select).interactive()
    json = (filter_columns).to_json()  #filter_columns+rule).to_json()

    #######################################################
    #Company Profile (first graph on the plot page)
    ########################################################

    d2 = d.drop(labels='cluster', axis=1)
    # d2=d2.set_index('company')
    tra = d2.T
    tra.reset_index(drop=False, inplace=True)
    tra_long = tra.melt(id_vars='index', value_vars=comp)
    tra_long = tra_long.rename(columns={
        "value": "score",
        "variable": "keyword"
    })
    base = alt.Chart(tra_long).mark_bar().encode(
        alt.X('index:N', title=" "),
        alt.Y('score:Q', title='Score % '),
        color=alt.Color('score:Q', scale=alt.Scale(scheme='darkgreen')),
        opacity=alt.value(.7),
        tooltip=['score:Q']).properties(height=300, width=550).interactive()
    json2 = (base).to_json()

    #*********************************************************
    #
    #3 most important keywords to explore graph
    #
    #########################################
    db = n  #changed from c
    brush = alt.selection(type='interval')

    keyword1 = request.args.get("keyword1")
    keyword2 = request.args.get("keyword2")
    keyword3 = request.args.get("keyword3")

    # keydict={'work-life balance':"balanc","free coffee":'free coff', 'gym':"gym"}

    points = alt.Chart(db).mark_point().encode(
        x=alt.X(keyword1),
        y=alt.Y(keyword2),
        color=alt.Color('company:N', legend=None),
        size=keyword3,
        tooltip=[keyword1, keyword2, keyword3, 'pay', 'company'
                 ]).properties(height=300,
                               width=500).add_selection(brush).interactive()
    json5 = points.to_json()

    #*************************************************************************

    #RECLUSTER GROUP
    #need to relabel new dataFrame with new clusters

    #************************************************************************

    #1. make new database d3 based on selections

    no_ind = n
    make_new = pd.DataFrame(no_ind['company'])  #using company names
    if not request.args.getlist("features"):
        raise RuntimeError(
            "Must check at least two boxes, go back and select two boxes. Also make triplesure your company field is not blank."
        )
    features = request.args.getlist("features")  #and features from index
    if len(features) < 2:  #raise error
        return render_template('error2.html')
    for keys in features:
        make_new[keys] = no_ind[keys]

    #2. prepare data to be graphed
    d3 = make_new.set_index('company')
    tra = d3.T
    tra.reset_index(drop=False, inplace=True)
    tra_long = tra.melt(id_vars='index', value_vars=comp)
    tra_long = tra_long.rename(columns={
        "value": "score",
        "variable": "keyword"
    })

    base = alt.Chart(tra_long).mark_bar().encode(
        alt.X('index:N', title=" "),  #comp),
        alt.Y('score:Q', title='Score % '),
        color=alt.Color('score:Q', scale=alt.Scale(scheme='darkgreen')),
        opacity=alt.value(.7),
        tooltip=['score:Q']).properties(height=200, width=300).interactive()
    json7 = (base).to_json()

    twod_pca = PCA(n_components=2)
    X_pca = twod_pca.fit_transform(d3)

    km1 = KMeans(n_clusters=5, random_state=1301).fit(d3)
    preds = km1.predict(d3)
    #get the best number of clusters
    cluster_silhouete = {}
    for n_clustersi in range(2,
                             10):  #min between number of samples and features
        km1 = KMeans(n_clusters=n_clustersi)
        km1.fit(X_pca)
        # Predict the cluster for each data point
        preds1 = km1.predict(X_pca)

        # Calculate the mean silhouette coefficient for the number of clusters chosen
        score = silhouette_score(X_pca, preds1, metric='euclidean')
        cluster_silhouete[n_clustersi] = score.round(5)

    #order dict and maximize it
    sorted_cluster_stats = dict(
        sorted(cluster_silhouete.items(),
               key=lambda item: item[1],
               reverse=True))
    n_clusterso = list(sorted_cluster_stats.keys())[0]  #redifine n clusters

    #we run the kmeans again with the prefered number of clusters
    km = KMeans(n_clusters=n_clusterso, random_state=11).fit(X_pca)
    predsi = km.predict(X_pca)
    #and graph
    # df_km = pd.DataFrame(data={'pca1':X_pca[:,0], 'pca2':X_pca [:,1], 'cluster':predsi})
    df_km = pd.DataFrame(
        data={
            'pca1': X_pca[:, 0],
            'pca2': X_pca[:, 1],
            'cluster': predsi,
            'company': list(d.index)
        })
    # brush = alt.selection(type='interval')
    #graph centers
    centers = km.cluster_centers_
    labels2 = km.labels_
    # di=d.set_index('company')
    di = d
    di['cluster'] = labels2
    number2 = di.loc[[comp], ['cluster']].values[0][0]
    if number2 == 0:
        r = di.loc[di['cluster'] == 0]
    if number2 == 1:
        r = di.loc[di['cluster'] == 1]
    if number2 == 2:
        r = di.loc[di['cluster'] == 2]
    if number2 == 3:
        r = di.loc[di['cluster'] == 3]
    if number2 == 4:
        r = di.loc[di['cluster'] == 4]
    if number2 == 5:
        r = di.loc[di['cluster'] == 5]
    if number2 == 6:
        r = di.loc[di['cluster'] == 6]
    if number2 == 7:
        r = di.loc[di['cluster'] == 7]
    if number2 == 8:
        r = di.loc[di['cluster'] == 8]
    if number2 == 9:
        r = di.loc[di['cluster'] == 9]

    c_num2 = len(di.groupby('cluster'))
    ri2 = list(r.index.values)
    ind = list(di.index)
    not_ri2 = []
    for item in ind:
        if item not in ri2:
            not_ri2.append(item)

    #get the sauce to graph the centers
    sauce = []
    for i in range(len(centers)):
        sauce.append({"x": centers[:, 0][i], "y": centers[:, 1][i]})

    source = pd.DataFrame.from_records(sauce)
    poin = alt.Chart(source).mark_point(size=100, color='black').encode(x='x',
                                                                        y='y')

    points = alt.Chart(df_km).mark_circle(size=60).encode(
        x='pca1:Q', y='pca2:Q', color='cluster:N',
        tooltip=['company:N']).properties(height=250, width=400).interactive()
    #color="white", alpha=1, s=200, edgecolor='k').mark_circle(size=100

    json6 = (points + poin).to_json()

    #filter c with only group members, label and do cluster averages

    dnew = d
    dnew['cluster'] = labels2

    g2 = dnew.groupby('cluster')
    m = g2.mean()
    a2 = m.reset_index()
    m_long2 = a2.melt(id_vars='cluster', value_vars=keyvalues)
    m_long2 = m_long2.rename(columns={"value": "score", "variable": "keyword"})
    brush = alt.selection(type='interval')
    base3 = alt.Chart(m_long2).mark_bar().encode(
        alt.X('cluster:N'),
        alt.Y('score:Q', title='Score % '),
        color=alt.Color('score:Q', scale=alt.Scale(scheme='darkred')),
        opacity=alt.value(.7),
        tooltip=['score:Q'
                 ]).properties(height=200,
                               width=200).add_selection(brush).interactive()
    # A dropdown filter
    columns = features
    column_dropdown = alt.binding_select(options=columns)
    column_select = alt.selection_single(fields=['keyword'],
                                         on='doubleclick',
                                         clear=False,
                                         bind=column_dropdown,
                                         name='search',
                                         init={'keyword': 'balanc'})
    filter_columns3 = base3.add_selection(column_select).transform_filter(
        column_select)

    json8 = filter_columns3.to_json()

    return render_template('plot.html',
                           json=json,
                           json2=json2,
                           json3=json3,
                           json4=json4,
                           json5=json5,
                           json6=json6,
                           lila=ri,
                           lila2=ri2,
                           not_ri2=not_ri2,
                           not_ri=not_ri,
                           company=comp,
                           json7=json7,
                           json8=json8,
                           number=number,
                           number2=number2,
                           c_num2=c_num2,
                           c_num1=c_num1)
Beispiel #15
0
    # altair code
    #
    chart = alt.Chart(aln)

    brush = alt.selection(type='interval', encodings=["x"])
    color = alt.Color('cutid:Q',
                      sort="descending",
                      scale=alt.Scale(scheme='spectral'),
                      legend=None)
    mycolor = alt.condition(brush, color, alt.value('lightgray'))
    single_nearest = alt.selection_single(on='mouseover',
                                          nearest=True,
                                          empty='none')
    scales = alt.selection_interval(bind='scales')

    t_name = alt.binding_select(options=contigs)
    q_name = alt.binding_select(options=contigs)
    select_t = alt.selection_single(fields=['t_name'],
                                    bind=t_name,
                                    name="Target",
                                    init={"t_name": contigs[0]})
    select_q = alt.selection_single(fields=['q_name'],
                                    bind=q_name,
                                    name="Query",
                                    init={"q_name": contigs[0]})

    segs = chart.mark_line().encode(x=alt.X('x1:Q',
                                            title='Target position (bp)'),
                                    x2="x2:Q",
                                    y=alt.Y("y1:Q",
                                            title='Query position (bp)'),
Beispiel #16
0
                                     scale=alt.Scale(domain=(100000, 10**9),
                                                     clamp=True)),
                                                        y='IMDB_Rating:Q',
                                                        tooltip="Title:N")

# A slider filter
year_slider = alt.binding_range(min=1969, max=2018, step=1)
slider_selection = alt.selection_single(bind=year_slider,
                                        fields=['Release_Year'],
                                        name="Release Year_")

filter_year = base.add_selection(slider_selection).transform_filter(
    slider_selection).properties(title="Slider Filtering")

# A dropdown filter
genre_dropdown = alt.binding_select(options=genres)
genre_select = alt.selection_single(fields=['Major_Genre'],
                                    bind=genre_dropdown,
                                    name="Genre")

filter_genres = base.add_selection(genre_select).transform_filter(
    genre_select).properties(title="Dropdown Filtering")

#color changing marks
rating_radio = alt.binding_radio(options=ratings)

rating_select = alt.selection_single(fields=['MPAA_Rating'],
                                     bind=rating_radio,
                                     name="Rating")
rating_color_condition = alt.condition(rating_select,
                                       alt.Color('MPAA_Rating:N', legend=None),
base = alt.Chart(source).mark_point(size=60).encode(
    x='Confirmed',
    y='SearchTrend',
    color='Country',
    tooltip=[
        'Country', 'Day', 'Confirmed', 'Death', 'Recovered', 'SearchTrend'
    ]).transform_calculate(
        # Generate Gaussian jitter with a Box-Muller transform
        jitter='sqrt(-2*log(random()))*cos(2*PI*random())')

chart = base.properties(width=960, height=720)

country_code = country_table['Code'].to_list()

country_dropdown = alt.binding_select(options=country_code)
country_select = alt.selection_single(fields=['Country'],
                                      bind=country_dropdown,
                                      name="Country")

filtered = base.add_selection(country_select).transform_filter(
    country_select).properties(title="Search Trend vs Confirmed Cases",
                               width=960,
                               height=720)

chart.save('temp/viz/test5.html')
filtered.save('temp/viz/test6.html')

# line chart
base2 = alt.Chart(source).mark_line().encode(x='Day',
                                             y='SearchTrend',
Beispiel #18
0
#--------------------------------------------------
#--Time versus Value charts (Line)
#--------------------------------------------------
st.title("Compare the individual metrics over time")
st.markdown("**1) Select a PA County from the dropdown (below charts)**")
st.markdown(
    "**2) Highlight any of the first two graph (emotional charts) to compare with last two (behavior)**"
)
st.markdown(
    "Note: The counties shown are the only counties in which there was data for all four datasets."
)
# st.markdown("Note: Not all counties have data for each metric for every time period (Try large counties such as Alleghany, Philadelphia, York, etc.)")

#---Select County Dropdown--
county_dropdown = alt.binding_select(options=list(countyList))
selectedCounty = alt.selection_single(fields=['name'],
                                      bind=county_dropdown,
                                      name='PA County:')

#---brush select area to focus on
brush = alt.selection(type='interval', encodings=['x'])

#---commWorry chart
commWorryChart = alt.Chart(commWorrydf).mark_area(color='red').encode(
    alt.X("monthdate(time_value):T", axis=alt.Axis(title='Date')),
    alt.Y("value:Q", axis=alt.Axis(title='Percentage of people')),
    tooltip=[
        alt.Tooltip('geo_value', title='FIPS'),
        alt.Tooltip('monthdate(time_value)', title='date'),
        alt.Tooltip('value', title='Value')
# df_5 for HDI trends
country_5 = ['Norway', 'United States', 'Netherlands', 'Germany', 'Canada']
data = pd.read_csv('HDI.csv')
data_5 = data[data['Country'].isin(country_5)].iloc[:, 1:]
df_5 = data_5.melt('Country', var_name=['year'])

################# Altair Charts ################
# altair_vis 1 (history)
country_options = list(df_point['Country'].unique())

# radio
# widget = alt.binding_radio(options=country_options,name='Select Country: ')

# drop down
widget = alt.binding_select(options=country_options, name='Select Country: ')

selectionEmoji = alt.selection_single(fields=['Country'],
                                      init={'Country': country_options[5]},
                                      bind=widget,
                                      name='Counry')
colorCondition = alt.condition(selectionEmoji, 'Country', alt.value('white'))

selection1 = alt.selection_single(empty='none',
                                  on="mouseover",
                                  fields=['Country'],
                                  bind='legend')
condition1 = alt.condition(selection1, alt.value(1), alt.value(0.00001))

# selection_zoom=alt.selection_interval(bind='scales',encodings=['y']) #,encodings=['x']
def make_chart(df, world_topo):
    categories = list(df.category.unique())
    map_data = (df.set_index(["country", "country_code",
                              "category"])["value"].div(100).groupby(
                                  ["country", "country_code",
                                   "category"]).last().unstack().reset_index())

    input_dropdown = alt.binding_select(options=categories)
    selection_category = alt.selection_single(
        fields=["category"],
        bind=input_dropdown,
        name="Mobility",
        init={"category": "workplaces"},
    )

    selection_country = alt.selection_multi(
        fields=["country"],
        name="Country of",
        empty="all",
        #     init={'country': 'France'}
    )

    background = (
        alt.Chart(world_topo).mark_geoshape(
            fill="lightgray", stroke="white",
            strokeWidth=0.5).transform_filter("datum.id != 10")
        #               .transform_filter('datum.id != 304')
    )

    foreground = (alt.Chart(world_topo).mark_geoshape(
        stroke="white", strokeWidth=0.5).encode(
            color=alt.condition(
                selection_country,
                alt.Color(
                    "value:Q",
                    scale=alt.Scale(scheme="blueorange", domainMid=0),
                    legend=alt.Legend(format=".0%"),
                ),
                alt.value("lightgray"),
            ),
            tooltip=[alt.Tooltip("value:Q", format=".0%"), "country:N"],
        ).transform_lookup(
            lookup="id",
            from_=alt.LookupData(data=map_data,
                                 key="country_code",
                                 fields=["country"] + categories),
        ).transform_fold(fold=categories, as_=[
            "category", "value"
        ]).add_selection(selection_category).transform_filter(
            selection_category)).add_selection(selection_country)

    map_chart = (background + foreground).properties(
        width=700,
        height=500,
        #                                                  title="Variation to baseline on March 29"
    )

    ts_data = df.assign(value=lambda f: f["value"].div(100))
    base_ts = alt.Chart(ts_data)

    ts_chart = (
        base_ts.mark_line(point=True).encode(
            x="date:T",
            y=alt.Y("value:Q", axis=alt.Axis(format="%")),
            tooltip=[
                alt.Tooltip("date:T", format="%a, %b %e"),
                alt.Tooltip("value:Q", format=".1%"),
            ],
        )
        #             .properties(title='Variation through time')
        .add_selection(selection_category).add_selection(selection_country).
        transform_filter(selection_category).transform_filter(
            selection_country).transform_aggregate(
                value="mean(value)", groupby=["category", "date"]))

    chart = (ts_chart | map_chart).properties(
        title=
        "Mobility change by geography, across different categories of places (Variation to baseline)"
    )
    return chart
Beispiel #21
0
def chart_altair(df, system_ini='peninsular'):
    '''
    Create an altair chart with the average of last 7 days of electric generation of total enery, renewable energy, solar
    photovoltaic energy and wind powered energy by date, the chart can be filtered by year and electric system.
    also add a vertical line to show the values where put the mouse in the chart.

    :param df: Pandas Dataframe with de average of last 7 days of electric generation by electric system, date, year and technology
    :param system_ini: Initial system to show in the chart, by default 'peninsular
    :return: altair layered chart
    '''

    # labels of X axis to show in the chart, every first day of month from 2016-01 to 2021-12.
    x_labels = [
        '2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01', '2016-05-01',
        '2016-06-01', '2016-07-01', '2016-08-01', '2016-09-01', '2016-10-01',
        '2016-11-01', '2016-12-01', '2016-12-31', '2017-01-01', '2017-02-01',
        '2017-03-01', '2017-04-01', '2017-05-01', '2017-06-01', '2017-07-01',
        '2017-08-01', '2017-09-01', '2017-10-01', '2017-11-01', '2017-12-01',
        '2017-12-31', '2018-01-01', '2018-02-01', '2018-03-01', '2018-04-01',
        '2018-05-01', '2018-06-01', '2018-07-01', '2018-08-01', '2018-09-01',
        '2018-10-01', '2018-11-01', '2018-12-01', '2018-12-31', '2019-01-01',
        '2019-02-01', '2019-03-01', '2019-04-01', '2019-05-01', '2019-06-01',
        '2019-07-01', '2019-08-01', '2019-09-01', '2019-10-01', '2019-11-01',
        '2019-12-01', '2019-12-31', '2020-01-01', '2020-02-01', '2020-03-01',
        '2020-04-01', '2020-05-01', '2020-06-01', '2020-07-01', '2020-08-01',
        '2020-09-01', '2020-10-01', '2020-11-01', '2020-12-01', '2020-12-31',
        '2021-01-01', '2021-02-01', '2021-03-01', '2021-04-01', '2021-05-01',
        '2021-06-01', '2021-07-01', '2021-08-01', '2021-09-01', '2021-10-01',
        '2021-11-01', '2021-12-01', '2021-12-31'
    ]

    # list of elements in Color to be plotted
    domain = ['Generación total', 'Renovable', 'Solar fotovoltaica', 'Eólica']
    # colors in hexadecimal, for each element in domain list
    range_ = ['#85C1E9', '#239B56', '#D35400', '#F7DC6F']

    # set a select box to select the system to show in the chart
    select_box_sys = alt.binding_select(options=list(df['system'].unique()))

    selection_sys = alt.selection_single(name='REE',
                                         fields=['system'],
                                         bind=select_box_sys,
                                         init={'system': system_ini})

    # set a radio selector to select the year to show in the chart
    select_radio_year = alt.binding_radio(options=list(df['year'].unique()))

    selection_year = alt.selection_single(name='Choose',
                                          fields=['year'],
                                          bind=select_radio_year,
                                          init={'year': max(df['year'])})

    # create a markpoint with variable fecha as X axis
    # with a selection that works over the variable fecha showing the nearest value where the mouse is over.
    nearest = alt.selection(type='single',
                            nearest=True,
                            on='mouseover',
                            fields=['fecha'],
                            empty='none')

    selectors = alt.Chart(df).mark_point().encode(
        alt.X('fecha'),
        opacity=alt.value(0)).add_selection(nearest).transform_filter(
            selection_sys).transform_filter(selection_year)

    # Create the main chart, with the electric generation by date, and add the selectors of year and systems
    bar = alt.Chart(df[df['Renov_norenov'] == 'Generación total']).mark_area(
        color='#85C1E9').encode(
            alt.X('fecha', axis=alt.Axis(values=x_labels, labelAngle=0)),
            alt.Y('Generacion_Mwh:Q')).add_selection(
                selection_sys, selection_year).transform_filter(
                    selection_sys).transform_filter(selection_year).properties(
                        width=1400, height=450)

    # Create the chart of renewable energy by fecha, also add the color with the list domain and his colors in list range_
    # also add the transformers of the main chart
    bar_renov = alt.Chart(
        df[df['Tecnologia'] == 'Renovable']).mark_area().encode(
            alt.X('fecha'),
            alt.Y('Generacion_Mwh:Q'),
            color=alt.Color('Tecnologia',
                            scale=alt.Scale(domain=domain, range=range_))
        ).transform_filter(selection_sys).transform_filter(selection_year)

    # add a text chart to show the value of the bar_renov chart
    text_renov = bar_renov.mark_text(
        align='left', dx=3, dy=-20, color='#212F3C').encode(
            text=alt.condition(nearest, 'Generacion_Mwh', alt.value(' ')))

    rules = alt.Chart(df).mark_rule(color='gray').encode(
        x='fecha', ).transform_filter(nearest)

    # Create the chart of Solar photovoltaic by fecha, also add the transformers of the main chart
    bar_solar = alt.Chart(
        df[df['Tecnologia'] == 'Solar fotovoltaica']).mark_area(
            opacity=.8, color='#D35400').encode(
                alt.X('fecha'), alt.Y('Generacion_Mwh:Q')).transform_filter(
                    selection_sys).transform_filter(selection_year)

    # add a text chart to show the value of the bar_renov chart
    text_solar = bar_solar.mark_text(
        align='left', dx=5, dy=-5, color='#212F3C').encode(
            text=alt.condition(nearest, 'Generacion_Mwh', alt.value(' ')))

    # Create the chart of wind power by fecha, also add the transformers of the main chart
    bar_eolica = alt.Chart(
        df[df['Tecnologia'] == 'Eólica']).mark_area(color='#F7DC6F').encode(
            alt.X('fecha'), alt.Y('Generacion_Mwh:Q')).transform_filter(
                selection_sys).transform_filter(selection_year)

    # add a text chart to show the value of the bar_renov chart
    text_eolica = bar_eolica.mark_text(
        align='left', dx=5, dy=-5, color='#212F3C').encode(
            text=alt.condition(nearest, 'Generacion_Mwh', alt.value(' ')))

    # retrun a altair layered chart with all the elements created in the function
    return alt.layer(
        bar, bar_renov, bar_eolica, bar_solar, selectors, rules, text_renov,
        text_eolica, text_solar).configure_axis(
            labelFontSize=13, titleFontSize=14).configure_text(
                fill='#212F3C',
                fontSize=13).configure_legend(labelFontSize=14).interactive()
def other_viz(result):
    #result = load5()
    years = list(result['YEAR_'].sort_values().unique())
    states = list(result['STATE'].sort_values().unique())

    firetype_df = result.groupby(['YEAR_', 'STATE', 'FIRETYPE'
                                  ]).size().reset_index(name="firetype count")
    firetype_df['Average'] = (firetype_df['firetype count'] /
                              sum(firetype_df['firetype count']))
    firetype_df['FIRETYPE'] = firetype_df['FIRETYPE'].replace(
        0, 'Action Fires/Supressed Fires')
    firetype_df['FIRETYPE'] = firetype_df['FIRETYPE'].replace(1, 'Natural Out')
    firetype_df['FIRETYPE'] = firetype_df['FIRETYPE'].replace(
        2, 'Support Action/Assist Fire')
    firetype_df['FIRETYPE'] = firetype_df['FIRETYPE'].replace(
        3, 'Fire Management/Perscribed')
    firetype_df['FIRETYPE'] = firetype_df['FIRETYPE'].replace(4, 'False Alarm')
    firetype_df['FIRETYPE'] = firetype_df['FIRETYPE'].replace(5, 'Severe')

    firetype_df2 = result.groupby(['YEAR_', 'FIRETYPE'
                                   ]).size().reset_index(name="Firetype Count")
    firetype_df2['FIRETYPE'] = firetype_df2['FIRETYPE'].replace(
        0, 'Action Fires/Supressed Fires')
    firetype_df2['FIRETYPE'] = firetype_df2['FIRETYPE'].replace(
        1, 'Natural Out')
    firetype_df2['FIRETYPE'] = firetype_df2['FIRETYPE'].replace(
        2, 'Support Action/Assist Fire')
    firetype_df2['FIRETYPE'] = firetype_df2['FIRETYPE'].replace(
        3, 'Fire Management/Perscribed')
    firetype_df2['FIRETYPE'] = firetype_df2['FIRETYPE'].replace(
        4, 'False Alarm')
    firetype_df2['FIRETYPE'] = firetype_df2['FIRETYPE'].replace(5, 'Severe')

    year_dropdown = alt.binding_select(options=years)
    year_select = alt.selection_single(fields=['YEAR_'], bind=year_dropdown)

    state_dropdown = alt.binding_select(options=states)
    state_select = alt.selection_single(fields=['STATE'], bind=state_dropdown)

    fire_type = alt.Chart(firetype_df).mark_bar(color='firebrick').encode(
        x=alt.X('Average:Q', axis=alt.Axis(format='.0%')),
        y='FIRETYPE:N',
        #opacity=alt.condition(
        #    year_select & state_select,
        #    alt.value(1),
        #    alt.value(.1)
    ).add_selection(year_select, state_select).transform_filter(
        year_select).transform_filter(state_select).properties(width=300,
                                                               height=200,
                                                               title=f'')
    fire_total = alt.Chart(firetype_df2).mark_circle(
        opacity=0.8, stroke='black', strokeWidth=1).encode(
            alt.X('YEAR_:O',
                  axis=alt.Axis(
                      labelAngle=360,
                      values=[1980, 1985, 1990, 1995, 2000, 2005, 2010, 2016]),
                  title='Year'),
            alt.Y('FIRETYPE:N', title='Types of Fire'),
            alt.Size('Firetype Count:Q',
                     scale=alt.Scale(range=[0, 1000]),
                     legend=None),
            alt.Color('FIRETYPE:N', legend=None),
            alt.Tooltip(['Firetype Count:Q', 'YEAR_']),
        ).properties(width=400,
                     height=200,
                     title='National Count of Fires by Type')

    fire_type | fire_total
Beispiel #23
0
                            scale=alt.Scale(scheme='reds')))

    compare_map = alt.layer(basemap, compare_dotmap).properties(
        width=698, height=900).configure_view(stroke=None)

    st.altair_chart(compare_map)

elif menu == 'Monitor probability of failure':
    # Make monitor map with Altair date slider
    hazard_stacked = hazard.melt(
        id_vars=['reference', 'longitude', 'latitude'],
        value_vars=timestamps,
        var_name='timestamp',
        value_name='probability of failure')

    timestamp_dropdown = alt.binding_select(options=timestamps)
    timestamp_select = alt.selection_single(fields=['timestamp'],
                                            bind=timestamp_dropdown,
                                            name="Select")

    monitor_dotmap = alt.Chart(
        hazard_stacked,
        title='Probability of failure by timestamp').mark_circle(
            stroke='#aaa', strokeWidth=0.5).add_selection(
                timestamp_select).transform_filter(timestamp_select).encode(
                    latitude='latitude:Q',
                    longitude='longitude:Q',
                    # Added percentage formatting to tooltip
                    tooltip=[
                        alt.Tooltip('reference:N'),
                        alt.Tooltip('probability of failure:Q', format=".2%"),
        'instrumentalness':'Predicts whether a track contains no vocals. “Ooh” and “aah” sounds are treated as instrumental in this context. Rap or spoken word tracks are clearly “vocal”. The closer the instrumentalness value is to 1.0, the greater likelihood the track contains no vocal content. Values above 0.5 are intended to represent instrumental tracks, but confidence is higher as the value approaches 1.0.',
        'speechiness':'Speechiness detects the presence of spoken words in a track. The more exclusively speech-like the recording (e.g. talk show, audio book, poetry), the closer to 1.0 the attribute value. Values above 0.66 describe tracks that are probably made entirely of spoken words. Values between 0.33 and 0.66 describe tracks that may contain both music and speech, either in sections or layered, including such cases as rap music. Values below 0.33 most likely represent music and other non-speech-like tracks. ',
        'acousticness':'A confidence measure from 0.0 to 1.0 of whether the track is acoustic. 1.0 represents high confidence the track is acoustic.',
        }

st.subheader("Use the music metric dropdown (above the charts) to select the metric that will be presented in the charts."
    + " Use the broad genre dropdown (below the charts) to view only the data of that genre."
    + " Click and drag to select a subset of points in the scatter plot and view their music metric distribution in the histogram."
    + " Use tooltip to see the artist name and track (song) name for a particular data point.")

hoursMinutesOrdered = []
for hour in range(0, 24):
    for minute in range(0, 60):
        hoursMinutesOrdered.append(alt.DateTime(hours = hour, minutes = minute))

input_dropdown = alt.binding_select(options=broad_genres, name = "Broad Genre: ")
selection = alt.selection_single(fields=['broad_genres'], bind=input_dropdown)
color = alt.condition(selection, alt.Color('broad_genres:N'), alt.value('#00000000'))

scatter_brush = alt.selection(type='interval')

metric_dropdown = st.selectbox('Music Metric:', music_metrics)
st.write(spotify_features_explanations[metric_dropdown])

base_danceability_vs_hour = alt.Chart(df).mark_point().encode(
    x=alt.X('hoursminutes(endTime_loc):O', title="Hour of the Day", scale = alt.Scale(domain=hoursMinutesOrdered)),
    y=alt.Y(metric_dropdown, type="quantitative", scale=alt.Scale(zero=False, domain=[0.0, 1.0]))
).properties(
    width=700, height=500
)
    'Asian & Pacific Islander', 'Hispanic'
]

# useful constants for mapping
GEOJSON_STATES_URL = ('https://raw.githubusercontent.com/'
                      'vega/vega/master/docs/data/us-10m.json')

MAP_PROJECTION = 'albersUsa'
COLOR_SCHEME = 'yellowgreenblue'

# range for year slider
START_YEAR = 2000
END_YEAR = 2018

# fixed dropdown objects and selection options
DROPDOWN_OBJ_AGE = alt.binding_select(options=CATEGORIES_AGE)
DROPDOWN_OBJ_SEX = alt.binding_select(options=CATEGORIES_SEX)

# accompanying selection options
SELECT_OBJ_AGE = alt.selection_single(
    fields=['Group'],
    bind=DROPDOWN_OBJ_AGE,
    name='Age',
    init={'Group':'Total'}
)

SELECT_OBJ_SEX = alt.selection_single(
    fields=['Group'],
    bind=DROPDOWN_OBJ_SEX,
    name='Demographics',
    init={'Group':'Total'}
                    facet=alt.Facet('bf_id_s:N',
                                    columns=len(unq_bf),
                                    spacing=-.5),
                    color='np_id_s:N',
                    size='a_33:O',
                ).properties(width=180).interactive()

                p_radio = alt.binding_radio(options=unq_p.tolist())
                p_select = alt.selection_single(fields=['p'],
                                                bind=p_radio,
                                                name="Aspect ratio, p")
                p_color_condition = alt.condition(
                    p_select, alt.Color('p:N', legend=None),
                    alt.value('lightgray'))

                np_dropdown = alt.binding_select(options=unq_np.tolist())
                np_select = alt.selection_single(fields=['np_id_s'],
                                                 bind=np_dropdown,
                                                 name="Nanoparticle")

                radio_p = base.add_selection(p_select).encode(
                    color=p_color_condition, ).add_selection(
                        np_select).transform_filter(np_select).properties(
                            title="Select Aspect Ratio (p) and Nanoparticle")
                st.write(radio_p)

        if newplot_toggle:
            st.subheader('Extra Plots')
            "Pick variables to plot"
            plot_variable_y = st.selectbox('Y Variable', data.columns, 35)
            if 1 == 1:
Beispiel #27
0
# In[13]:

#hide_input
base = alt.Chart(dff2, width=600).encode(
    x='Days since 100 cases:Q',
    y=alt.Y('Confirmed Cases:Q', scale=alt.Scale(type='log')),
    color=alt.Color('Country:N',
                    scale=alt.Scale(domain=color_domain, range=color_range),
                    legend=None),
    tooltip=['Country', 'Date', 'Confirmed Cases', 'Days since 100 cases'])

country_selection = alt.selection_single(
    name='Select',
    fields=['Country'],
    bind=alt.binding_select(
        options=list(sorted(set(countries) - set(baseline_countries)))),
    init={'Country': 'US'})

date_filter = alt.datum['Date'] >= int(max_date.timestamp() * 1000)
base2 = base.transform_filter(
    alt.FieldOneOfPredicate('Country', baseline_countries))
base3 = base.transform_filter(country_selection)
base4 = base3.transform_filter(date_filter)

max_day = dff2['Days since 100 cases'].max()
ref = pd.DataFrame([[x, 100 * 1.33**x] for x in range(max_day + 1)],
                   columns=['Days since 100 cases', 'Confirmed Cases'])
base_ref = alt.Chart(ref).encode(x='Days since 100 cases:Q',
                                 y='Confirmed Cases:Q')
base_ref_f = base_ref.transform_filter(
    alt.datum['Days since 100 cases'] >= max_day)
def plot_on_PA(bar_dataPA):
    from vega_datasets import data
    counties = alt.topo_feature(data.us_10m.url, 'counties')
    bar_dataPA = pandasql.sqldf(
        "select * from bar_dataPA where geo_value like '42%'")
    county_data = pandasql.sqldf("select distinct geo_value from bar_dataPA")
    county_details = dict()

    l = county_data["geo_value"].tolist()

    for i in range(county_data.shape[0]):
        county_details.update({
            str(covidcast.fips_to_name(county_data.iloc[i]))[2:len(
                str(covidcast.fips_to_name(county_data.iloc[i]))) - 2]:
            l[i]
        })

    data6hrs = pandasql.sqldf(
        "select * from bar_dataPA where geo_value like '42%'")

    map_pennsylvania = (alt.Chart(data=counties, ).mark_geoshape(
        stroke='black', strokeWidth=1, fill='lightyellow').transform_calculate(
            state_id="(datum.id / 1000)|0").transform_filter(
                (alt.datum.state_id) == 42).transform_lookup(
                    lookup='data6hrs',
                    from_=alt.LookupData(data6hrs, 'geo_value', ['value']),
                ).properties(width=500, height=400))

    geolocator = Nominatim(user_agent="streamlit_app.py")

    lat = []
    lon = []
    coun = []

    @st.cache
    def sw():
        lsd = {}
        for i in county_details.keys():
            sss = i + ", PA"
            y = geolocator.geocode(sss)
            r = [y.latitude, y.longitude]
            lat.append(y.latitude)
            #st.write(y.latitude)
            lon.append(y.longitude)
            coun.append(county_details[i])
            lsd.update({county_details[i]: r})
        return lat, lon, coun

    [lat, lon, coun] = sw()
    det = {'County': coun, 'Latitude': lat, 'Longitude': lon}

    gb = pd.DataFrame(det)

    bar_dataPA['time_value'] = bar_dataPA['time_value'].str.slice(0, 10)
    kal = pandasql.sqldf(
        "select bar_dataPA.geo_value,bar_dataPA.time_value,bar_dataPA.value,gb.Latitude,gb.Longitude from bar_dataPA,gb where gb.County=bar_dataPA.geo_value"
    )

    dg = pandasql.sqldf("select distinct time_value from kal")

    input_drop = alt.binding_select(options=dg['time_value'].tolist(),
                                    name="Select Date")
    picked = alt.selection_single(encodings=["color"], bind=input_drop)

    points = alt.Chart(kal).mark_circle().encode(
        longitude='Longitude:Q',
        latitude='Latitude:Q',
        size='value:Q',
        color=alt.condition(picked,
                            'time_value',
                            alt.value('lightgray'),
                            legend=None),
        opacity=alt.condition(picked, alt.value(0.5), alt.value(0)),
        tooltip=['geo_value', 'value'
                 ]).add_selection(picked).transform_filter(picked).properties(
                     width=500, height=400)

    st.write(map_pennsylvania + points)
        'Unsafe water source',
        'Unsafe sanitation',
        'Household air pollution from solid fuels',
        'Air pollution',
        'Outdoor air pollution'
    ],
    var_name='Risk Factor')

# Country Selection
countries = deaths['country'].unique()  # get unique country names
countries.sort()  # sort alphabetically
selectCountry = alt.selection_single(
    name='Select',  # name the selection 'Select'
    fields=['country'],  # limit selection to the country field
    init={'country': countries[0]},  # use first country entry as initial value
    bind=alt.binding_select(
        options=countries)  # bind to a menu of unique country values
)

# Year selection
brush = alt.selection_interval(encodings=['x'])
years = alt.Chart(deaths).mark_line().add_selection(brush).transform_filter(
    selectCountry).encode(
        alt.X('year:O', title='Year'),
        alt.Y('sum(value)',
              title='Smoking Deaths (all ages)')).properties(height=100)

# Area chart - Smoking deaths by ages
base = alt.Chart(deaths).mark_area().add_selection(
    selectCountry).transform_filter(selectCountry).transform_filter(
        brush).encode(alt.X('year:O', title='Year'),
                      y=alt.Y('value:Q',
Beispiel #30
0
import altair as alt
import geopandas as gpd
import pandas as pd

# read the data and extract the features that is directly usable by altair
dt = gpd.read_file('harvest.shp')
json_f = dt.to_json()
json_features = json.loads(json_f)
data_geo = alt.Data(values=json_features['features'])

#plot the graphic
scen_list = ['1', '65', '129', '193', '257', '321', '385', '449']
selectScen = alt.selection_single(
    name='Select',
    fields=['scenario'
            ],  # this fails when I specify that I want 'properties.scenario'
    bind=alt.binding_select(options=scen_list))
alt.Chart(data_geo).mark_geoshape(
    fill='lightgray',
    stroke='white',
).encode(color=alt.Color('properties.harvest:N', title='Period'),
         tooltip=['properties.AREAAC:Q', 'properties.StandAge:Q'
                  ]).properties(width=500, projection={
                      'type': 'mercator'
                  }).add_selection(selectScen).transform_filter(selectScen)