Esempio n. 1
0
def create_graph():
    # retira o número máximo de linhas para pot com Altair
    alt.data_transformers.disable_max_rows()
    
    # faz query no banco de dados
    autores = queryDB('author', ['ID_author','author'])
    artigos = queryDB('paper', ['ID_paper','paper'])
    author_paper = queryDB('author_paper', ['ID_paper','ID_author'])
    
    autores['ID_author'] = autores['ID_author'].astype(str)
    artigos['ID_paper'] = artigos['ID_paper'].astype(str)

    ### renderiza os gráficos
    
    ## Grafo 1 - Autores (authors)
    
    print('Preparando grafo dos autores...')
    graph = nx.Graph()
    
    # dataframe com colunas: paper e [lista_autores]
    group = pd.DataFrame(author_paper.groupby('ID_paper')['ID_author'].apply(list))
    
    
    # Adicionando "edges"
    for j,row in group.iterrows():
        i=len(row['ID_author'])
        for i in range(len(row['ID_author'])):
            for k in range(i,len(row['ID_author'])):
                graph.add_edge(row['ID_author'][i], row['ID_author'][k])
                
    pos = nx.spring_layout(graph,k=0.2, iterations=50, weight=0.1, center=(0.5,0.5)) # forces graph layout
    
    # coletando nodes
    nodes = to_pandas_nodes(graph,pos)
    nodes.reset_index(inplace=True)
    nodes.rename(columns={'index':'ID_author'}, inplace=True)
    nodes = pd.merge(nodes,autores,on='ID_author')  # coletando nome dos autores
    nodes = pd.merge(nodes,author_paper, on='ID_author')  # coletando ID_paper
    
    # coletando edges
    edges = to_pandas_edges(graph,pos)
    
    
    
    # Gráfico 1
    print('Criando interatividade com o Altair (autores) ...')
    
    selector = alt.selection_single(empty='all',fields=['ID_author']) # iniciando seletor
    
    points = alt.Chart(nodes).add_selection(selector).mark_point(filled=True,size=90).encode(
                alt.X('x', axis=alt.Axis(title='')),
                alt.Y('y', axis=alt.Axis(title='')),
                tooltip='author',
                opacity=alt.condition(selector,alt.value(0.95),alt.value(0.4),legend=None),
                color=alt.condition(selector, 'ID_author', alt.value('lightgray'), legend=None)
            ).properties( selection=selector ).transform_filter(selector)

    # cria um background para efeitos de transição do seletor
    bk = alt.Chart(nodes).mark_point(color='lightgray',filled=True,size=90).encode(
                alt.X('x', axis=alt.Axis(title='')),
                alt.Y('y', axis=alt.Axis(title='')),
                tooltip='author',
                opacity=alt.value(0.4),
    )

    lines = alt.Chart(edges).mark_line(color='salmon').encode(
                alt.X('x', axis=alt.Axis(title='')),
                alt.Y('y', axis=alt.Axis(title='')),
                detail='edge',
                opacity=alt.value(0.15)
            )

    chart = alt.LayerChart(layer=(lines,bk+points)).properties(
                height=350,
                width=450
                ).interactive()
    
    
    
    
    ## Grafo 2 - Artigos (papers)
    print('Preparando grafo dos artigos...')

    graph1 = nx.Graph()
    group1 = pd.DataFrame(author_paper.groupby('ID_author')['ID_paper'].apply(list))
    
    # Adicionando "edges"
    for j,row in group1.iterrows():
        i=len(row['ID_paper'])
        for i in range(len(row['ID_paper'])):
            for k in range(i,len(row['ID_paper'])):
                graph1.add_edge(row['ID_paper'][i], row['ID_paper'][k])
                
    pos1 = nx.spring_layout(graph1,k=0.2, iterations=50, weight=0.1, center=(0.5,0.5))  # forces graph layout
    
    # coletando nodes
    nodes1 = to_pandas_nodes(graph1, pos1)
    nodes1.reset_index(inplace=True)
    nodes1.rename(columns={'index':'ID_paper'}, inplace=True)
    nodes1 = pd.merge(nodes1,artigos,on='ID_paper')  # coletando nome dos papers
    nodes1 = pd.merge(nodes1,author_paper,on='ID_paper')  # coletando ID_author
    
    # coletando edges
    edges1 = to_pandas_edges(graph1,pos1)
    
    
    
    # Gráfico 2
    print('Criando interatividade com o Altair (artigos)...')

    points1 = alt.Chart(nodes1).add_selection(selector).mark_point(filled=True,size=90).encode(
                alt.X('x', axis=alt.Axis(title='')),
                alt.Y('y', axis=alt.Axis(title='')),
                tooltip='paper',
                opacity=alt.condition(selector,alt.value(0.95),alt.value(0.4),legend=None),
                color=alt.condition(selector, 'ID_author', alt.value('lightgray'), legend=None)
    ).transform_filter(selector)

    # cria um background para efeitos de transição do seletor
    bk1 = alt.Chart(nodes1).mark_point(color='lightgray',filled=True,size=90).encode(
                alt.X('x', axis=alt.Axis(title='')),
                alt.Y('y', axis=alt.Axis(title='')),
                tooltip='paper',
                opacity=alt.value(0.4),
    )

    lines1 = alt.Chart(edges1).mark_line(color='lightblue').encode(
                alt.X('x', axis=alt.Axis(title='')),
                alt.Y('y', axis=alt.Axis(title='')),
                detail='edge',
                opacity=alt.value(0.2)
    )

    chart1 = alt.LayerChart(layer=(lines1,bk1 + points1)).properties(
                height=350,
                width=450
                ).interactive()

    
    
    ### Concatenando horizontamnete os gráficos 1 e 2
    horiz_chart = alt.hconcat(chart, chart1 ).configure_axis( ticks=False,
                grid=False,
                domain=False,
                labels=False).configure_view(
                strokeWidth=0   
            )


    return horiz_chart.to_json()
Esempio n. 2
0
def get_altair_chart(df,
                     x_col,
                     y_cols='ALL',
                     cat_col=None,
                     sel_cols=None,
                     sliders=None,
                     ns_opacity=1.0,
                     chart_title='',
                     scheme='lightmulti',
                     mark_type='line',
                     sort_values=False,
                     y_index=-1,
                     stack=None):

    if mark_type == 'bar':
        chart = alt.Chart(df).mark_bar()
    elif mark_type == 'area':
        chart = alt.Chart(df).mark_area()
    else:
        chart = alt.Chart(df).mark_line(point=True, strokeWidth=2)

    sort_axis = 'x'
    x_col_ed = x_col
    if sort_values:
        x_col_ed = alt.X(f'{x_col}:N', sort='y')

    chart = chart.encode(
        x=x_col_ed,
        tooltip=list(df.columns),
    ).properties(width=600, height=400)  #.interactive()

    if sliders:
        for key, value in sliders.items():
            if key == 'min':
                comparisson = '>='
            elif key == 'max':
                comparisson = '<='
            else:
                print(
                    f"Atenção: a chave '{key}' não é válida para a variável sliders. Usar apenas 'min' ou 'max'"
                )

                continue
            if type(value) is list:
                slider_col = value[0]
                if len(value) > 1:
                    init_value = value[1]
                else:
                    init_value = eval(f'{key}(df[slider_col])')
            else:
                slider_col = value
                init_value = eval(f'{key}(df[slider_col])')

            if slider_col in df.columns:
                slider = alt.binding_range(min=min(df[slider_col]),
                                           max=max(df[slider_col]),
                                           step=1)
                slider_selector = alt.selection_single(
                    bind=slider,
                    name=key,
                    fields=[slider_col],
                    init={slider_col: init_value})
                chart = chart.add_selection(slider_selector).transform_filter(
                    f'datum.{slider_col} {comparisson} {key}.{slider_col}[0]')

    if y_cols == 'ALL':
        index = 1
        if cat_col:
            index += 1
        if sel_cols:
            index += len(sel_cols)

        y_cols = df.columns[index:].to_list()

    if len(y_cols) > 1:
        columns = y_cols
        y_col_name = 'Y_col'
        select_box = alt.binding_select(options=columns, name=y_col_name)
        sel = alt.selection_single(fields=[y_col_name],
                                   bind=select_box,
                                   init={y_col_name: y_cols[y_index]})

        chart = chart.transform_fold(columns,
                                     as_=[y_col_name,
                                          'Valor']).transform_filter(sel)
        if stack == 'normalize':
            chart = chart.encode(y=alt.Y("Valor:Q", stack="normalize"), )
        elif stack == 'sum':
            chart = chart.encode(y='sum(Valor):Q', )
        else:
            chart = chart.encode(y='Valor:Q', )
        chart = chart.add_selection(sel)
    else:
        y_col = y_cols[0]
        chart = chart.encode(y=y_col)

#     TODO: adicionar filtro de range
#     lower = chart.properties(
#         height=60
#     ).add_selection(brush)
#     chart = chart & lower

    if cat_col:
        base_cat = cat_col
        chart = chart.encode(
            color=alt.Color(base_cat,
                            scale=alt.Scale(scheme=scheme)),  #,legend=None),
        )

        sel_base = alt.selection_multi(empty='all',
                                       fields=[base_cat],
                                       bind='legend')

        chart = chart.add_selection(sel_base).encode(opacity=alt.condition(
            sel_base, alt.value(1.0), alt.value(ns_opacity)))

        bar = alt.Chart(df).mark_bar().encode(
            y=alt.Y(f'{base_cat}:O', title=None),
            x='total',
            #             tooltip='total',
            color=alt.condition(
                sel_base,
                alt.Color(f'{base_cat}:N', scale=alt.Scale(scheme=scheme)),
                alt.ColorValue("lightgrey"),
                legend=None)).add_selection(sel_base).properties(width=100,
                                                                 height=400)

        chart = alt.concat(chart, bar)


#         chart = chart & lower  TODO: adicionar fltro de range

    select_cols = sel_cols
    if select_cols:

        options_lists = [
            df[cat].dropna().astype(str).sort_values().unique().tolist()
            for cat in select_cols
        ]

        selection = alt.selection_single(
            name='Selecione',
            fields=select_cols,
            init={
                cat: options_lists[i][0]
                for i, cat in enumerate(select_cols)
            },
            bind={
                cat: alt.binding_select(options=options_lists[i])
                for i, cat in enumerate(select_cols)
            })

        chart = chart.add_selection(selection).transform_filter(selection)

    return chart
Esempio n. 3
0
# Create a 50-element time-series for each object
timeseries = pd.DataFrame(np.random.randn(n_times, n_objects).cumsum(0),
                          columns=locations['id'],
                          index=pd.RangeIndex(0, n_times, name='time'))

# Melt the wide-form timeseries into a long-form view
timeseries = timeseries.reset_index().melt('time')

# Merge the (x, y) metadata into the long-form view
timeseries['id'] = timeseries['id'].astype(int)  # make merge not complain
data = pd.merge(timeseries, locations, on='id')

# Data is prepared, now make a chart

selector = alt.selection_single(empty='all', fields=['id'])

base = alt.Chart(data).properties(
    width=250,
    height=250
).add_selection(selector)

points = base.mark_point(filled=True, size=200).encode(
    x='mean(x)',
    y='mean(y)',
    color=alt.condition(selector, 'id:O', alt.value('lightgray'), legend=None),
).interactive()

timeseries = base.mark_line().encode(
    x='time',
    y=alt.Y('value', scale=alt.Scale(domain=(-15, 15))),
def get_histograms(df_scores_in, selected_score, selected_score_desc,
                   selected_score_axis):

    # prepare dataframe
    df_scores = df_scores_in.copy()
    df_scores = df_scores[["date", "name",
                           selected_score]]  # throw other scores away
    df_scores = df_scores.dropna(axis=0, how="any")  # remove rows with NaN

    df_scores = df_scores.groupby(["name", "date"]).mean().reset_index(
    )  # daily average in case of multiple datapoints per day
    df_scores["date"] = pd.to_datetime(
        df_scores["date"])  # make sure date column is datetime
    df_scores["date_str"] = df_scores["date"].apply(
        lambda x: x.strftime("%Y-%m-%d"))  # date string column

    # use a date_id for lookup purposes
    dates = sorted(list(set(df_scores["date_str"])))
    date2idx = {i: v for v, i in enumerate(dates)}

    def date2id(x):
        try:
            return date2idx[x]
        except:
            return np.nan

    df_scores["date_id"] = df_scores["date_str"].apply(
        lambda x: date2id(str(x)))

    # median datframe
    df_median = df_scores.groupby("date").median().reset_index()

    maxval = max(df_scores[selected_score])
    maxval = 10 * np.ceil(maxval / 10)

    # plot title
    title = {
        "text":
        ["", "{}".format(selected_score_desc)
         ],  # use two lines as hack so the umlauts at Ö are not cut off
        "subtitle": "EveryoneCounts.de",
        "color": "black",
        "subtitleColor": "lightgray",
        "subtitleFontSize": 12,
        "subtitleFontWeight": "normal",
        "fontSize": 15,
        "lineHeight": 5,
    }

    # special treatment for webcam score b/c it uses absolute values
    if selected_score == "webcam_score":
        scale = alt.Scale(domain=(1.05 * maxval, 0), scheme="blues")
        bin = alt.Bin(extent=[0, maxval], step=maxval / 20)
    elif selected_score == "tomtom_score":
        scale = alt.Scale(domain=(200, 0), scheme="redyellowgreen")
        bin = alt.Bin(extent=[0, max(50, maxval)], step=max(50, maxval) / 20)
    else:
        scale = alt.Scale(domain=(200, 0), scheme="redyellowgreen")
        bin = alt.Bin(extent=[0, max(200, maxval)], step=max(200, maxval) / 20)

    # Here comes the magic: a selector!
    selector = alt.selection_single(empty="none",
                                    fields=['date_id'],
                                    on='mouseover',
                                    nearest=True,
                                    init={'date_id': len(dates) - 2})

    #--- Altair charts from here on ---#
    # Histogram chart
    chart = alt.Chart(df_scores).mark_bar(
        #clip=True
    ).encode(
        alt.X(selected_score + ":Q", title=selected_score_axis, bin=bin),
        alt.Y(
            'count():Q',
            title="Anzahl Landkreise",
        ),
        color=alt.Color(
            selected_score + ":Q",
            scale=scale,
            legend=None,
        ),
    ).transform_filter(selector).properties(width='container',
                                            height=300,
                                            title=title)

    # Rule at 100%
    rule100 = alt.Chart(df_scores).mark_rule(
        color='lightgray', size=3).encode(x="a:Q").transform_calculate(a="100")

    # Rule for the median
    rulemedian = alt.Chart(df_median).mark_rule(color='#F63366').encode(
        x=selected_score + ":Q",
        size=alt.value(3),
        tooltip=[alt.Tooltip(selected_score + ':Q',
                             title="Median")]).transform_filter(selector)

    # median plot
    median_points = alt.Chart(df_median).mark_point(
        filled=True,
        size=150,
        color="gray",
    ).encode(
        alt.X("date:T", axis=alt.Axis(title='Datum', format=("%d %b"))),
        alt.Y(selected_score + ':Q', title="Median " +
              selected_score_axis)).properties(width='container',
                                               height=180,
                                               title={
                                                   "text": "Wähle ein Datum:",
                                                   "color": "black",
                                                   "fontWeight": "normal",
                                                   "fontSize": 12
                                               })
    selectorchart = alt.Chart(df_median).mark_point().encode(
        x='date:T',
        opacity=alt.value(0),
        tooltip=[
            alt.Tooltip("date:T", title="Datum", format=("%A %d %B")),
            alt.Tooltip(selected_score + ":Q", title="Median")
        ]).add_selection(selector)
    median_line = alt.Chart(df_median).mark_line(
        point=False, color="gray", size=1).encode(
            alt.X("date:T"),
            alt.Y(selected_score + ':Q'),
        ).properties(width='container', )
    median_selected = alt.Chart(df_median).mark_point(
        filled=True, size=400, color="#F63366", opacity=0.7).encode(
            alt.X("date:T"),
            alt.Y(selected_score + ':Q'),
        ).properties(width='container', ).transform_filter(selector)
    median_selected_rule = alt.Chart(df_median).mark_rule(
        point=False, color="gray", size=1,
        opacity=1).encode(alt.X("date:T"), ).properties(
            width='container', ).transform_filter(selector)
    median_selected_rule2 = alt.Chart(df_median).mark_rule(
        point=False, color="#F63366", size=1,
        opacity=1).encode(alt.Y(selected_score + ":Q"), ).properties(
            width='container', ).transform_filter(selector)

    if selected_score in ["airquality_score", "webcam_score", "tomtom_score"]:
        chart_top = chart + rulemedian
    else:
        chart_top = rule100 + chart + rulemedian
    chart_bottom = median_selected_rule + median_line + median_points + median_selected + median_selected_rule2 + selectorchart
    return chart_top & chart_bottom
Esempio n. 5
0
plt.title("Wealth Distribution by Income")

# %%
# alternative way to plot equilibrium

import altair as alt
df = eq.as_df()
spec = alt.Chart(df).mark_line().encode(x='a', y='μ', color='i_m:N')
spec

# %%
# alternative way to plot equilibrium (with some interactivity)
# TODO: function to generate it automatically.

import altair as alt
single = alt.selection_single(on='mouseover', nearest=True)
df = eq.as_df()
ch = alt.Chart(df)
spec = ch.properties(title='Distribution', height=100).mark_line().encode(
    x='a', y='μ', color=alt.condition(single, 'i_m:N', alt.value('lightgray'))
).add_selection(single) + ch.mark_line(color='black').encode(
    x='a', y='sum(μ)') & ch.properties(
        title='Decision Rule', height=100).mark_line().encode(
            x='a',
            y='i',
            color=alt.condition(single, 'i_m:N',
                                alt.value('lightgray'))).add_selection(single)

# %%

# Resulting object can be saved to a file. (try to open this file in jupyterlab)
Esempio n. 6
0
my_theme = alt.themes.get()()  # Get current theme as dict.
my_theme.setdefault('encoding', {}).setdefault('color', {})['scale'] = {
    'scheme': 'bluepurple',
}
alt.themes.register('my_theme', lambda: my_theme)
alt.themes.enable('my_theme')

# In[90]:

start = df_comp.marg_imp.min()
end = df_comp.marg_imp.max()

# In[91]:

selector = alt.selection_single(on='mouseover',
                                nearest=True,
                                empty='all',
                                fields=['base_seg_id'])

# In[92]:

base = alt.Chart(df_comp).mark_point(filled=True).encode(
    alt.X('Coef_value'),
    alt.Y('cr'),
    size=alt.Size('impressions', scale=alt.Scale(domain=[100, 100000])),
    color=alt.Color('marg_imp',
                    scale=alt.Scale(scheme='bluepurple', domain=[start, end])),
    tooltip=[
        alt.Tooltip('base_seg_id'),
        alt.Tooltip('Coef_value'),
        alt.Tooltip('marg_imp')
    ],
Esempio n. 7
0
def tl_summary(df,
               values,
               time,
               bars,
               col,
               text,
               title='',
               bars_w=810,
               bars_h=200,
               bars_stack='zero',
               timeline_w=450,
               timeline_h=200,
               slope_avg='Average',
               slope_w=300,
               slope_h=200,
               slope_y_pos=10,
               palette='tableau10'):
    '''
    Plots 3 charts: bars, timeline and slopegraph

    Parameters
    ----------
    df : pandas.DataFrame
    values : str
        Name of the column used for values.
    time : str
        Name of the column used for time values.
    bars : str
        Name of the column used to plot as X-axis on the bars.
    col : str
        Name of the column used for colors.
    text : str
        Name of the column used to show text on slopegraph.
    title : str
        Title of the plot.
    bars_w : int
        Bars plot width.
    bars_h : int
        Bars plot height.
    timeline_w : int
        Timeline plot width.
    timeline_h : int
        Timeline plot height.
    slope_avg : str
        Title for the avg measures on slopegraph.
    slope_w : int
        Slopegraph plot width.
    slope_h : int
        Slopegraph plot height.
    slope_y_pos : int
        Slopegraph titles position.
    palette : str
        Check https://vega.github.io/vega/docs/schemes/#reference

    Returns
    -------
        altair.Chart
    '''
    df = df.copy()
    df['slope_x'] = 'measures'
    df_avg = df.groupby([col, time]).mean().reset_index()
    df_avg[bars] = slope_avg
    df_avg['slope_x'] = 'averages'
    df = pd.concat([df, df_avg], ignore_index=True, sort=True)
    df[values] = df[values].round(2)
    df['slope_text'] = df[values].astype(str) + ' ' + df[col]

    max_time = df[time].max()
    orders = (df[df[time] == max_time].groupby(bars)[values].sum().sort_values(
        ascending=False).index.tolist())
    orders.remove(slope_avg)

    filter_in = alt.selection_single(fields=[bars],
                                     on='mouseover',
                                     empty='none')
    base = alt.Chart(df)
    barsplot = base.mark_bar().encode(
        alt.X(f'{bars}:N', title=None, scale=alt.Scale(domain=orders)),
        alt.Y(f'{values}:Q', title=text, stack=bars_stack),
        alt.Color(col,
                  legend=alt.Legend(orient='bottom-left', title=None),
                  scale=alt.Scale(scheme=palette)),
        opacity=alt.condition(
            filter_in, alt.value('1'), alt.value('0.6'))).transform_filter({
                'and':
                [f'datum.{time} == {max_time}', 'datum.slope_x == "measures"']
            }).properties(title=title,
                          selection=filter_in,
                          width=bars_w,
                          height=bars_h)

    timeline_base = base.mark_line().encode(
        alt.X(f'{time}:O'),
        alt.Y(f'{values}:Q',
              title=text,
              scale=alt.Scale(domain=[df[values].min(), df[values].max()])),
        alt.Color(col, legend=None)).properties(width=timeline_w,
                                                height=timeline_h)

    timeline = timeline_base.transform_filter(filter_in)
    timeline += timeline.mark_circle(size=25)

    timeline_avg = timeline_base.mark_line(
        strokeDash=[4, 2],
        opacity=0.45).transform_filter(f'datum.{bars} == {slope_avg!r}')

    slope = _build_slope(df, values, time, bars, col, text, filter_in,
                         slope_y_pos, slope_w, slope_h)
    chart = barsplot & ((timeline_avg + timeline) | slope)

    return chart
Esempio n. 8
0
def main():
    #connection config
    db_conn = get_connection('./billboard-200.db')
    all_data = get_data(db_conn, 'acoustic_features')

    #Paragrah-Intro
    st.title(
        "Explore the acoustic and meta features of albums and songs by David Bowie"
    )
    st.header("Introduction")
    st.write(
        "This 3-week project is for the Interactive Data Science - (Spring 2021) course under Adam Perer and Hendrik Strobelt, created by Vivian Young and Carol Ho. After navigating the Acoustic and meta-features of albums and songs data from Spotify, with 340,000 rows containing acoustic data for tracks from Billboard 200 albums from 1/5/1963 to 1/19/2019. We're intrigued by the feature label on each piece - the danceability, the energy, the beats, and the valence. To better explore the feature label's trend and distribution, we decided on David Bowie's work. They are well-known for their diverse music styles, and their creation has been influential since the 60s till now."
    )
    st.write(
        "The project consists of three parts; the first is the charts that allow the user to read each song's features and how albums distribute these features. The second part is a comparison of the albums' features with the overall music features by decade. With the holistic understanding of Bowie's work, the last part is an interactive search function that allows the users to search for their music by features."
    )
    st.write(
        " The analysis results aim to provide a different view of interpreting the albums and songs. Moreover, besides searching for particular songs or albums, how might we help users find the pieces that better fit the context and mood?"
    )
    st.write(
        "The original dataset: https://components.one/datasets/billboard-200/")

    #checkbox-original dataset
    agree = st.checkbox('show original data.(David Bowie)')
    if agree:
        st.text(
            'original data set - accoustic features of songs of David Bowie from 1969-2018'
        )
        st.dataframe(all_data)
        st.markdown(
            "```SELECT * FROM EMP JOIN DEPT ON EMP.DEPTNO = DEPT.DEPTNO;```")

    #Paragraph-Intro to Features
    st.header("Intro to Features")
    st.write(
        "Spotify labeled the songs with features to maximize the recommendation result. We picked the below features that are more relevant to the use-case of a music listener."
    )
    st.markdown(
        ":point_right:Danceability: Describes how suitable a track is for dancing based on a combination of musical elements including tempo, rhythm stability, beat strength, and overall regularity."
    )
    st.markdown(
        ":point_right:Energy: Represents a perceptual measure of intensity and activity. Typically, energetic tracks feel fast, loud, and noisy. For example, death metal has high energy, while a Bach prelude scores low on the scale."
    )
    st.markdown(
        ":point_right:Instrumentalness: Predicts whether a track contains no vocals. “Ooh” and “aah” sounds are treated as instrumental in this context. Rap or spoken word tracks are clearly “vocal”."
    )
    st.markdown(
        ":point_right:Tempo: The overall estimated tempo of a track in beats per minute (BPM). In musical terminology, tempo is the speed or pace of a given piece, and derives directly from the average beat duration."
    )
    st.markdown(
        ":point_right:Valence: Describes the musical positiveness conveyed by a track. Tracks with high valence sound more positive (e.g. happy, cheerful, euphoric), while tracks with low valence sound more negative (e.g. sad, depressed, angry)."
    )

    #select-feature
    option = st.selectbox('What feature are you interested in dicover?',
                          ('danceability', 'energy', 'instrumentalness'))

    #connection config
    all_dacade_avg = get_all_decade_avg(db_conn, option)
    bowie_data = get_bowie_data(db_conn, option)

    #Paragrah-Chart 1
    st.header("Scatter Chart-David Bowie's albums")
    st.subheader(
        ":musical_note: How the selected feature shapes the distribution of songs by album?"
    )
    st.markdown(
        "Instruction: The slider allows you to zoom in albums by issued year, and clicking on the valence allows you to see the distribution of songs, from high valence(Happy) to low valence(Sad)."
    )

    #slider-year
    start_year = st.slider("Show me the albums within these issued year!",
                           1969, 2018, (1969, 2000))
    filtered_data1 = bowie_data[start_year[0] < bowie_data['date'].dt.year]
    filtered_data2 = filtered_data1[
        filtered_data1['date'].dt.year <= start_year[1]]
    select_scatter = alt.selection_multi(fields=['valence'], bind='legend')

    #color palette for scatter chart
    range_ = [
        '#D64550', '#EE8189', '#FC8B4A', '#F7B801', '#B9F18C', '#71DA1B',
        '#439A86', '#00BECC', '#7678ED', '#3D348B'
    ]

    #chart-scatter
    scatter = alt.Chart(filtered_data2).mark_circle().encode(
        alt.X('album',
              scale=alt.Scale(zero=True),
              sort={
                  "field": "date",
                  "order": "ascending"
              },
              title="Albums order by Issued Date"),
        alt.Y(option, scale=alt.Scale(zero=True), title=option),
        alt.Color('valence:O',
                  sort='descending',
                  scale=alt.Scale(range=range_)),
        tooltip=['album', 'song', 'date', option, 'valence', 'tempo'],
        size=alt.Size('tempo',
                      scale=alt.Scale(domain=[0, 100], range=[1, 200]),
                      legend=alt.Legend(values=[50, 100, 150, 200])),
        opacity=alt.condition(select_scatter, alt.value(1),
                              alt.value(0.1))).properties(
                                  width=1200,
                                  height=900,
                              ).add_selection(select_scatter)
    st.write(scatter)
    #checkbox-bowie's album
    agree = st.checkbox('show original dataset.', key='album')
    if agree:
        st.text(
            'original data set - accoustic features of songs from 1969-2018')
        st.dataframe(bowie_data)

    #Paragrah-Chart 2
    st.header("Bar Chart-David Bowie's albums with average features")
    st.subheader(
        ":musical_note: How is the feature of the album different from the songs in that decade?"
    )
    st.markdown(
        "Instruction: Click on the checkbox to compare with the songs at that decade. Click on the bar for highlight."
    )

    #chart-bar
    selector = alt.selection_single(empty='all', fields=['album'])

    bar_album = alt.Chart(all_dacade_avg).mark_bar(
        color='#1FC3AA', opacity=0.5, thickness=10).encode(
            alt.X('album',
                  sort={
                      "field": "date",
                      "order": "ascending"
                  },
                  title="(B)Albums order by Issued Date"),
            alt.Y('avg_feature',
                  scale=alt.Scale(zero=False),
                  title='(B)Average_' + option + '_by_Albums'),
            tooltip=['album', 'date', 'avg_feature'],
            color=alt.condition(selector,
                                'album:O',
                                alt.value('lightgray'),
                                legend=None),
        ).properties(
            width=1200,
            height=600,
        ).add_selection(selector)
    #chart-decade-the numbers
    #chart-decade
    bar_decade = alt.Chart(all_dacade_avg).mark_bar(
        color='#8624F5', opacity=0.5, thickness=10).encode(
            alt.X('album',
                  sort={
                      "field": "date",
                      "order": "ascending"
                  },
                  title="(V)The correspondent decade of albums Issued Date"),
            alt.Y('trend_feature',
                  scale=alt.Scale(zero=False),
                  title='(V)Average_' + option + '_by_Decade'),
        ).properties(
            width=1200,
            height=600,
        )  #.add_selection(selector)
    #chart-decade-the numbers
    text_decade = bar_decade.mark_text(align='center', color='white',
                                       dy=80).encode(text='avg_feature:N')

    #checkbox-chart comparison
    agree = st.checkbox('Compare the\n' + option +
                        '\nof the albums with the average\n' + option +
                        '\nof songs by decede.')
    if agree:
        st.write(bar_decade + bar_album + text_decade)
    else:
        st.write(bar_album)

    agree = st.checkbox('show original dataset.', key='decade')
    if agree:
        st.text(
            'David Bowie album average feature and all songs averge feature by decade'
        )
        st.dataframe(all_dacade_avg)

    #Paragrah-Search
    st.header("Search with features!")
    st.subheader(":musical_note: What are the songs that fit my mood?")
    st.markdown("Instruction: blablabla.")
Esempio n. 9
0
def slope_comparison(df,
                     values,
                     bars,
                     col,
                     text,
                     bars_w=200,
                     bars_h=515,
                     slope_avg='Average',
                     slope_w=350,
                     slope_h=240,
                     slope_y_pos=10,
                     slope_y_title=None):
    '''
    Plots 3 charts: v-bars and 2 slopegraph for comparison.

    Parameters
    ----------
    df : pandas.DataFrame
    values : str
        Name of the column used for values.
    bars : str
        Name of the column used to plot as X-axis on the bars.
    col : str
        Name of the column used for colors.
    text : str
        Name of the column used to show text on slopegraph.
    bars_w : int
        Bars plot width.
    bars_h : int
        Bars plot height.
    slope_avg : str
        Title for the avg measures on slopegraph.
    slope_w : int
        Slopegraph plot width.
    slope_h : int
        Slopegraph plot height.
    slope_y_pos : int
        Slopegraph titles position.
    slope_y_title : str
        Title to use on slope y axis.

    Returns
    -------
        altair.Chart

    Parameters
    ----------
    df : pandas.DataFrame
    vs : str list
        List of variables to include in the plot.
    year : int
        Year to extract from data.
    custom_fn : function
        Function to apply to df after formatting.
        Use it to format names on the df.
    kwargs : arguments passed to get_data_series
    '''
    df = df.copy()
    df['slope_x'] = 'measures'
    df_avg = df.groupby(col).mean().reset_index()
    df_avg[bars] = slope_avg
    df_avg['slope_x'] = 'averages'
    df = pd.concat([df, df_avg], ignore_index=True, sort=True)
    df[values] = df[values].round(2)
    df['slope_text'] = df[values].astype(str) + ' ' + df[col]

    mouse = alt.selection_single(on='mouseover',
                                 fields=[bars],
                                 empty='none',
                                 nearest=True)
    click = alt.selection_single(fields=[bars], empty='none')

    base = alt.Chart(df)

    barsplot = base.mark_point(filled=True).encode(
        alt.X(f'mean({values})',
              scale=alt.Scale(zero=False),
              axis=alt.Axis(title=None)),
        alt.Y(f'{bars}:N', axis=alt.Axis(title=None)),
        size=alt.condition(mouse, alt.value(400), alt.value(
            200))).transform_filter('datum.slope_x == "measures"').properties(
                selection=mouse, width=bars_w, height=bars_h)

    barsplot += barsplot.encode(
        size=alt.condition(click, alt.value(350), alt.value(200)),
        color=alt.condition(click, alt.ColorValue('#800000'),
                            alt.value('#879cab'))).properties(selection=click)

    bars_ci = base.mark_rule().encode(
        x=f'ci0({values})', x2=f'ci1({values})',
        y=f'{bars}:N').transform_filter(
            'datum.slope_x == "measures"').properties(width=bars_w,
                                                      height=bars_h)

    slope_mouse = _build_slope(df, values, None, bars, col, text, mouse,
                               slope_y_pos, slope_w, slope_h, slope_y_title)
    slope_click = _build_slope(df, values, None, bars, col, text, click,
                               slope_y_pos, slope_w, slope_h)
    chart = (bars_ci + barsplot) | (slope_mouse & slope_click)

    return chart
Esempio n. 10
0
def pdp_plot_filter(filter_in,
                    df,
                    rows,
                    columns,
                    values,
                    variables,
                    clusters=True,
                    cluster_centers=3,
                    cluster_lines=True,
                    columns_type='N',
                    x_title=None,
                    y_title=None,
                    width=700,
                    height=400):
    df = df.copy()

    def get_lines(data, stroke_w, color, selection=None, **kwargs):
        lines = alt.Chart(data).mark_line(
            strokeWidth=stroke_w, **kwargs).encode(
                alt.X(f'{columns}:{columns_type}',
                      title=x_title,
                      axis=alt.Axis(minExtent=30)), alt.Y(values,
                                                          title=y_title),
                alt.Opacity(rows, legend=None),
                alt.ColorValue(color)).transform_filter(filter_in).properties(
                    width=width, height=height)
        if selection:
            lines = lines.encode(size=alt.condition(
                selection, alt.value(stroke_w *
                                     2), alt.value(stroke_w))).properties(
                                         selection=selection)
        return lines

    if clusters:
        mouseover_cluster = alt.selection_single(on='mouseover',
                                                 fields=[rows],
                                                 empty='none',
                                                 nearest=True)
        df_clusters = utils.pdp_clusters(cluster_centers, df, rows, columns,
                                         values, variables)
        background = get_lines(df_clusters,
                               2,
                               '#468499',
                               selection=mouseover_cluster)
    else:
        background = get_lines(df, 1, '#bbbbbb')

    if cluster_lines:
        # mouseover_lines = alt.selection_single(on='mouseover', fields=[rows], empty='none', nearest=True)
        background = get_lines(df, 1, '#bbbbbb', strokeDash=[2, 2
                                                             ]) + background

    df_avg = df.groupby([columns, variables])[values].mean().reset_index()
    avg_base = alt.Chart(df_avg).encode(
        alt.X(f'{columns}:{columns_type}', title=x_title),
        alt.Y(values, title=y_title),
    ).transform_filter(filter_in)

    avg = avg_base.mark_line(strokeWidth=5, color='gold')
    avg += avg_base.mark_line(strokeWidth=2)
    avg += avg_base.mark_point(filled=True, size=55)

    return background + avg
import altair as alt
import pandas as pd
import numpy as np

# generate fake data
source = pd.DataFrame({'gender': ['M']*1000 + ['F']*1000,
               'height':np.concatenate((np.random.normal(69, 7, 1000),
                                       np.random.normal(64, 6, 1000))),
               'weight': np.concatenate((np.random.normal(195.8, 144, 1000),
                                        np.random.normal(167, 100, 1000))),
               'age': np.concatenate((np.random.normal(45, 8, 1000),
                                        np.random.normal(51, 6, 1000)))
        })

selector = alt.selection_single(empty='all', fields=['gender'])

color_scale = alt.Scale(domain=['M', 'F'],
                        range=['#1FC3AA', '#8624F5'])

base = alt.Chart(source).properties(
    width=250,
    height=250
).add_selection(selector)

points = base.mark_point(filled=True, size=200).encode(
    x=alt.X('mean(height):Q',
            scale=alt.Scale(domain=[0,84])),
    y=alt.Y('mean(weight):Q',
            scale=alt.Scale(domain=[0,250])),
    color=alt.condition(selector,
Esempio n. 12
0
                          columns=["order"])
medal_count_year = medal_count_year.sort_values(by=["Year", "Medal"],
                                                ascending=[True, False
                                                           ]).reset_index()
medal_count_year["Order"] = count_year

# merge count with order and count medals by categories
medal_count_year_withCate = pd.merge(medal_count_year,
                                     merge_medalCate,
                                     how='outer',
                                     on=["Year", "NOC"])

# Altair part
slider_year = alt.binding_range(min=1896, max=2016, step=4, name='Year:')
selector_year = alt.selection_single(fields=['Year'],
                                     bind=slider_year,
                                     init={'Year': 2016})
select_country = alt.selection(type="single", fields=['Year'])

sphere = alt.sphere()
graticule = alt.graticule()
background1 = alt.Chart(sphere).mark_geoshape(fill='lightgray')
background2 = alt.Chart(graticule).mark_geoshape(stroke='white',
                                                 strokeWidth=0.5)
chart_medal_year = alt.Chart(medal_count_year_withCate).mark_geoshape(
    stroke='darkgray').encode(
        color=alt.Color(field="Medal",
                        type="quantitative",
                        scale=alt.Scale(type="sqrt"),
                        legend=alt.Legend(title="Medals",
                                          labelFontSize=15,
def plot_interactive_histograms_sm():
    data1 = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-01-01.csv'
    )
    data2 = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-02-01.csv'
    )
    data3 = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-03-01.csv'
    )
    data4 = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-04-01.csv'
    )
    data5 = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-05-01.csv'
    )
    data6 = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-06-01.csv'
    )
    data7 = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-07-01.csv'
    )
    data8 = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-08-01.csv'
    )
    data9 = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-09-01.csv'
    )
    data10 = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-10-01.csv'
    )
    data11 = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-11-01.csv'
    )
    data12 = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/all_data2018-12-01.csv'
    )

    node_info = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/Data/Node_List.csv'
    )

    time_summary = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/summary_data.csv'
    )

    data1['month'] = 'January'
    data2['month'] = 'February'
    data3['month'] = 'March'
    data4['month'] = 'April'
    data5['month'] = 'May'
    data6['month'] = 'June'
    data7['month'] = 'July'
    data8['month'] = 'August'
    data9['month'] = 'September'
    data10['month'] = 'October'
    data11['month'] = 'November'
    data12['month'] = 'December'

    data1 = pd.merge(data1,
                     node_info,
                     how='inner',
                     left_on='NodeID',
                     right_on='ID')
    data2 = pd.merge(data2,
                     node_info,
                     how='inner',
                     left_on='NodeID',
                     right_on='ID')
    data3 = pd.merge(data3,
                     node_info,
                     how='inner',
                     left_on='NodeID',
                     right_on='ID')
    data4 = pd.merge(data4,
                     node_info,
                     how='inner',
                     left_on='NodeID',
                     right_on='ID')
    data5 = pd.merge(data5,
                     node_info,
                     how='inner',
                     left_on='NodeID',
                     right_on='ID')
    data6 = pd.merge(data6,
                     node_info,
                     how='inner',
                     left_on='NodeID',
                     right_on='ID')
    data7 = pd.merge(data7,
                     node_info,
                     how='inner',
                     left_on='NodeID',
                     right_on='ID')
    data8 = pd.merge(data8,
                     node_info,
                     how='inner',
                     left_on='NodeID',
                     right_on='ID')
    data9 = pd.merge(data9,
                     node_info,
                     how='inner',
                     left_on='NodeID',
                     right_on='ID')
    data10 = pd.merge(data10,
                      node_info,
                      how='inner',
                      left_on='NodeID',
                      right_on='ID')
    data11 = pd.merge(data11,
                      node_info,
                      how='inner',
                      left_on='NodeID',
                      right_on='ID')
    data12 = pd.merge(data12,
                      node_info,
                      how='inner',
                      left_on='NodeID',
                      right_on='ID')

    all_data = pd.concat([
        data1, data2, data3, data4, data5, data6, data7, data8, data9, data10,
        data11, data12
    ])

    months = [
        'January', 'February', 'March', 'April', 'May', 'June', 'July',
        'August', 'September', 'October', 'November', 'December'
    ]

    layers = ['Algorithm', 'Physical', 'Task', 'Function', 'Information']

    input_dropdown = alt.binding_select(options=months)
    selection = alt.selection_single(fields=['month'],
                                     bind=input_dropdown,
                                     name='Month')

    layer_dropdown = alt.binding_select(options=layers)
    layer_selection = alt.selection_single(fields=['Layer'],
                                           bind=layer_dropdown,
                                           name='Layer')

    cn = alt.Chart(all_data).mark_bar().encode(
        x=alt.X('Cn:Q', bin=alt.Bin(maxbins=20), title='Connectedness Rating'),
        y=alt.Y('count()', title='Number of Nodes'),
        color=alt.value('#4e79a7')).add_selection(selection).transform_filter(
            selection).add_selection(layer_selection).transform_filter(
                layer_selection)

    rn = alt.Chart(all_data).mark_bar().encode(
        x=alt.X('Rn:Q', bin=alt.Bin(maxbins=20), title='Reliability Rating'),
        y=alt.Y('count()', title='Number of Nodes'),
        color=alt.value('#f28e2b')).add_selection(selection).transform_filter(
            selection).add_selection(layer_selection).transform_filter(
                layer_selection)

    id = alt.Chart(all_data).mark_bar().encode(
        x=alt.X('Id:Q',
                bin=alt.Bin(maxbins=20),
                title='Interdependency Rating'),
        y=alt.Y('count()', title='Number of Nodes'),
        color=alt.value('#e15759')).add_selection(selection).transform_filter(
            selection).add_selection(layer_selection).transform_filter(
                layer_selection)

    chart = alt.hconcat(cn, rn, id)
    #(cn | rn | id)
    chart.serve()
    return
Esempio n. 14
0
# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""
import altair as alt
import pandas as pd
from vega_datasets import data as vega_data

morse = pd.read_csv(
    'https://raw.githubusercontent.com/jvelleu/649_final_project/7648fb227384b3dad2707efdf8fb960f502b52ad/morse_data.csv',
    encoding='utf-8')

mouseSelection = alt.selection_single(on="mouseover",
                                      nearest=True,
                                      empty='none')

opacityCondition = alt.condition(mouseSelection, alt.value(1.0),
                                 alt.value(0.6))

scatter1 = alt.Chart(
    morse, width=400, height=400).mark_point(filled=True).encode(
        alt.X("x", title="", axis=None), alt.Y("y", title="", axis=None),
        alt.Tooltip(["char", "code"], title=None),
        alt.Size("components:O")).add_selection(mouseSelection).encode(
            opacity=opacityCondition)

scatter2 = alt.Chart(
    morse, width=400, height=400).mark_point(filled=True).encode(
        alt.X("x", title="", axis=None),
Esempio n. 15
0
def plot_iroas_over_time(iroas_df: pd.DataFrame,
                         experiment_dates: pd.DataFrame,
                         cooldown_date: pd.DataFrame):
    """Returns a chart of the iROAS estimate over time with confidence bands.

  This function provides a visualization of the evolution of the iROAS estimate
  over the duration of the experiment and cooldown, together with confidence
  bands.

  Args:
    iroas_df: a dataframe with columns: date, lower, mean, upper
    experiment_dates: dataframe with columns (date, color) which contains two
      dates for each period (start, end), and the column color is the label
      used in the chart to refer to the corresponding period, e.g. "Experiment
      period" or "Pretes period".
    cooldown_date: dataframe with column (date, color) with only one entry,
      where date indicates the last day in the cooldown period, and color is the
      label used in the plot legend, e.g. "End of cooldown period".

  Returns:
    iroas_chart: Chart containing the plot.
  """
    iroas_base = alt.Chart(iroas_df).mark_line().encode(
        x=alt.X('date:T', axis=alt.Axis(title='', format=('%b %e'))))

    iroas_selection = alt.selection_single(fields=['date'],
                                           nearest=True,
                                           on='mouseover',
                                           empty='none',
                                           clear='mouseout')

    iroas_lines = iroas_base.mark_line().encode(
        y=alt.Y('mean:Q', axis=alt.Axis(title=' ', format='.1')))

    iroas_points = iroas_lines.mark_point().transform_filter(iroas_selection)

    iroas_rule1 = iroas_base.mark_rule().encode(
        tooltip=['date:T', 'mean:Q', 'lower:Q', 'upper:Q'])

    iroas_rule = iroas_rule1.encode(
        opacity=alt.condition(iroas_selection, alt.value(0.3), alt.value(
            0))).add_selection(iroas_selection)

    iroas_ci_bands_rule = alt.Chart(iroas_df).mark_area(color='gray').encode(
        alt.X('date:T'), y='lower:Q', y2='upper:Q', opacity=alt.value(0.5))

    date_rule = alt.Chart(experiment_dates[
        experiment_dates['color'] == 'Experiment period']).mark_rule(
            strokeWidth=2).encode(x='date:T',
                                  color=alt.Color('color',
                                                  scale=alt.Scale(domain=[
                                                      'Experiment period',
                                                      'End of cooldown period',
                                                      'iROAS estimate'
                                                  ],
                                                                  range=[
                                                                      'black',
                                                                      'black',
                                                                      '#1f77b4'
                                                                  ])))
    cooldown_date_rule = alt.Chart(cooldown_date).mark_rule(
        strokeWidth=2, strokeDash=[5,
                                   2], color='black').encode(x='date:T',
                                                             color='color:N')
    # Compile chart
    iroas_chart = alt.layer(iroas_lines, iroas_rule, iroas_points, date_rule,
                            cooldown_date_rule, iroas_ci_bands_rule)

    return iroas_chart
Esempio n. 16
0
    def chart(self):
        df = self.clean_ticker(self.transform(self.read_curated()))

        # I don't think this even f*****g works but whatever. cheesed
        df = df.query("ticker not in @self.words")

        # used in data table
        df["date_str"] = df["created"].map(
            lambda x: x.strftime("%Y-%m-%d %H:%M"))
        # used for date filters
        df["date"] = df["created"].map(lambda x: x.strftime("%Y-%m-%d"))
        df["date2"] = df["created"].map(lambda x: x.strftime("%Y-%m-%d"))
        data_start = df["date"].min()
        data_end = df["date"].max()

        # DATETIME RANGE FILTERS
        # https://github.com/altair-viz/altair/issues/2008#issuecomment-621428053
        range_start = alt.binding(input="date")
        range_end = alt.binding(input="date")
        select_range_start = alt.selection_single(name="start",
                                                  fields=["date"],
                                                  bind=range_start,
                                                  init={"date": data_start})
        select_range_end = alt.selection_single(name="end",
                                                fields=["date"],
                                                bind=range_end,
                                                init={"date": data_end})

        # slider timestamp is javascript timestamp. not human readable
        # slider = alt.binding_range(
        #     min=self.timestamp(min(self.datelist)),
        #     max=self.timestamp(max(self.datelist)),
        #     step=1, name='Created Date'
        # )
        # slider_selection = alt.selection_single(
        #     name="SelectorName", fields=['created_date'],
        #     bind=slider, init={'created': self.timestamp('2021-01-01')}
        # )

        # count slider filter
        max_count = df.groupby(["ticker"])["ticker"].count().max()
        slider_max = alt.binding_range(min=0, max=max_count, step=1)
        slider_min = alt.binding_range(min=0, max=max_count, step=1)
        select_max_count = alt.selection_single(name='ticker_max',
                                                fields=['count'],
                                                bind=slider_max,
                                                init={"count": max_count})
        select_min_count = alt.selection_single(name='ticker_min',
                                                fields=['count'],
                                                bind=slider_min,
                                                init={"count": 0})

        # zoom = alt.selection_interval(bind='scales')
        selector = alt.selection_single(empty='all', fields=['ticker'])

        base = alt.Chart(df.reset_index()).transform_filter(
            # slider_selection
            (alt.datum.date2 >= select_range_start.date)
            & (alt.datum.date2 <= select_range_end.date)).add_selection(
                selector,
                select_range_start,
                select_range_end,
                select_max_count,
                select_min_count,
            )

        # BAR CHART
        # https://stackoverflow.com/questions/52385214/how-to-select-a-portion-of-data-by-a-condition-in-altair-chart
        bars = base.mark_bar(
        ).transform_aggregate(count='count()', groupby=['ticker']).encode(
            x=alt.X(
                'ticker',
                # https://altair-viz.github.io/gallery/bar_chart_sorted.html
                sort="-y",
                axis=alt.Axis(title='Stock Tickers')),
            y=alt.Y(
                "count:Q",
                axis=alt.Axis(title='Number of Mentions'),
                # scale=alt.Scale(zero=False)
            ),
            color=alt.condition(selector,
                                'id:O',
                                alt.value('lightgray'),
                                legend=None),
            tooltip=['ticker', 'count:Q'],
        ).properties(
            # title="Stock Ticker mentions on r/wallstreetbets",
            width=1400,
            height=400).transform_filter(
                (alt.datum.count <= select_max_count.count)
                & (alt.datum.count >= select_min_count.count))

        # base chart for data tables
        # href: https://altair-viz.github.io/gallery/scatter_href.html
        ranked_text = base.transform_calculate(
            # url='https://www.reddit.com' + alt.datum.permalink
            url=alt.datum.built_url
        ).mark_text(
            align='left',
            dx=-12,
            # dy=0,
            color="white",
            strokeWidth=0,
            strokeOpacity=0,
        ).encode(
            y=alt.Y('row_number:O', axis=None),
            href='url:N',
            tooltip=['url:N'],
            # color=alt.condition(selector,'id:O',alt.value('lightgray'),legend=None),
        ).transform_window(
            # groupby=["ticker"],  # causes overlap
            # https://altair-viz.github.io/user_guide/generated/core/altair.SortField.html#altair.SortField
            sort=[
                alt.SortField("score", "descending"),
                alt.SortField("created", "descending"),
            ],
            row_number='row_number()').transform_filter(
                selector).transform_window(
                    rank='rank(row_number)').transform_filter(
                        # only shows up to 20 rows
                        alt.datum.rank < 20).properties(width=30, height=300)

        # Data Tables
        created = ranked_text.encode(text='date_str').properties(
            title='Created Date')
        ticker = ranked_text.encode(text='ticker').properties(
            title='Stock Ticker')
        score = ranked_text.encode(text='score').properties(title='Upvotes')
        title = ranked_text.encode(text="title" if "title" in self.
                                   cols_with_ticker else "comment").properties(
                                       title='Submission Title' if "title" in
                                       self.cols_with_ticker else 'Comment')

        # Combine data tables
        text = alt.hconcat(created, ticker, score, title)

        # Build final chart
        chart = alt.vconcat(
            bars,
            text,
            # autosize="fit"
        ).resolve_legend(color="independent")

        self.save_semantic_chart(chart.to_json(indent=None))
        return
Esempio n. 17
0
import altair as alt
from vega_datasets import data

source = data.cars()

input_dropdown = alt.binding_select(options=['Europe', 'Japan', 'USA'])
selection = alt.selection_single(fields=['Origin'],
                                 bind=input_dropdown,
                                 name='Country of ')
color = alt.condition(selection, alt.Color('Origin:N', legend=None),
                      alt.value('lightgray'))

# alt.Chart(source).mark_point().encode(
#     x='Horsepower:Q',
#     y='Miles_per_Gallon:Q',
#     color=color,
#     tooltip='Name:N'
# ).add_selection(
#     selection
# )

vega = alt.Chart(source).mark_circle(size=60).encode(
    x='Horsepower',
    y='Miles_per_Gallon',
    color=color,
).add_selection(selection)

vega.save('a.html')
Esempio n. 18
0
def map_state_slider(state_txt, state_counties, confirmed, confirmed_min,
                     confirmed_max, deaths, deaths_min, deaths_max,
                     state_fips):
    # Pivot confirmed data by day_num
    confirmed_pv = confirmed[['fips', 'day_num', 'confirmed']].copy()
    confirmed_pv['fips'] = confirmed_pv['fips'].astype(str)
    confirmed_pv['day_num'] = confirmed_pv['day_num'].astype(str)
    confirmed_pv['confirmed'] = confirmed_pv['confirmed'].astype('int64')
    confirmed_pv = confirmed_pv.pivot_table(index='fips',
                                            columns='day_num',
                                            values='confirmed',
                                            fill_value=0).reset_index()

    # Pivot deaths data by day_num
    deaths_pv = deaths[['lat', 'long_', 'day_num', 'deaths']].copy()
    deaths_pv['day_num'] = deaths_pv['day_num'].astype(str)
    deaths_pv['deaths'] = deaths_pv['deaths'].astype('int64')
    deaths_pv = deaths_pv.pivot_table(index=['lat', 'long_'],
                                      columns='day_num',
                                      values='deaths',
                                      fill_value=0).reset_index()

    # Extract column names for slider
    column_names = confirmed_pv.columns.tolist()

    # Remove first element (`fips`)
    column_names.pop(0)

    # Convert to int
    column_values = [None] * len(column_names)
    for i in range(0, len(column_names)):
        column_values[i] = int(column_names[i])

    # Disable max_rows to see more data
    alt.data_transformers.disable_max_rows()

    # Topographic information
    us_states = alt.topo_feature(topo_usa, 'states')
    us_counties = alt.topo_feature(topo_usa, 'counties')

    # state county boundaries
    base_state = alt.Chart(state_counties).mark_geoshape(
        fill='white',
        stroke='lightgray',
    ).properties(
        width=800,
        height=600,
    ).project(type='mercator')

    # Slider choices
    min_day_num = column_values[0]
    max_day_num = column_values[len(column_values) - 1]
    slider = alt.binding_range(min=min_day_num, max=max_day_num, step=1)
    slider_selection = alt.selection_single(fields=['day_num'],
                                            bind=slider,
                                            name="day_num",
                                            init={'day_num': min_day_num})

    # Confirmed cases by county
    base_state_counties = alt.Chart(us_counties).mark_geoshape(
        stroke='black', strokeWidth=0.05).transform_lookup(
            lookup='id',
            from_=alt.LookupData(confirmed_pv, 'fips', column_names)
        ).transform_fold(column_names, as_=[
            'day_num', 'confirmed'
        ]).transform_calculate(
            state_id="(datum.id / 1000)|0",
            day_num='parseInt(datum.day_num)',
            confirmed='isValid(datum.confirmed) ? datum.confirmed : -1'
        ).encode(color=alt.condition(
            'datum.confirmed > 0',
            alt.Color('confirmed:Q',
                      scale=alt.Scale(domain=(confirmed_min, confirmed_max),
                                      type='symlog')), alt.value('white')
        )).properties(
            # update figure title
            title=f'COVID-19 WA State Confirmed Cases by County'
        ).transform_filter((alt.datum.state_id
                            ) == state_fips).transform_filter(slider_selection)

    # deaths by long, latitude
    points = alt.Chart(deaths_pv).mark_point(
        opacity=0.75, filled=True).transform_fold(
            column_names, as_=['day_num', 'deaths']).transform_calculate(
                day_num='parseInt(datum.day_num)',
                deaths='isValid(datum.deaths) ? datum.deaths : -1').encode(
                    longitude='long_:Q',
                    latitude='lat:Q',
                    size=alt.Size('deaths:Q',
                                  scale=alt.Scale(domain=(deaths_min,
                                                          deaths_max),
                                                  type='symlog'),
                                  title='deaths'),
                    color=alt.value('#BD595D'),
                    stroke=alt.value('brown'),
                ).add_selection(slider_selection).transform_filter(
                    slider_selection)

    # confirmed cases (base_counties) and deaths (points)
    return (base_state + base_state_counties + points)
Esempio n. 19
0
def selectors_figure(text, o_html, full_pds, ts, ts_step, decays, decays_step,
                     knns, knns_step):

    subtext = ['Parameters:']
    tooltip = ['sample_name', 'PHATE1', 'PHATE2']

    circ = alt.Chart(full_pds).mark_point(size=20).encode(x='PHATE1:Q',
                                                          y='PHATE2:Q')

    if knns_step:
        slider_knns = alt.binding_range(min=min(knns),
                                        max=max(knns),
                                        step=knns_step,
                                        name='knn')
        selector_knns = alt.selection_single(name="knn",
                                             fields=['knn'],
                                             bind=slider_knns,
                                             init={'knn': min(knns)})
        tooltip.append('knn')
        circ = circ.add_selection(selector_knns).transform_filter(
            selector_knns)
        subtext.append('knn ("k") = %s\n' % ', '.join(map(str, knns)))

    if decays_step:
        slider_decays = alt.binding_range(min=min(decays),
                                          max=max(decays),
                                          step=decays_step,
                                          name='decay')
        selector_decays = alt.selection_single(name="decay",
                                               fields=['decay'],
                                               bind=slider_decays,
                                               init={'decay': min(decays)})
        tooltip.append('decay')
        circ = circ.add_selection(selector_decays).transform_filter(
            selector_decays)
        subtext.append('decay ("alpha") = %s\n' % ', '.join(map(str, decays)))

    if ts_step:
        slider_ts = alt.binding_range(min=min(ts),
                                      max=max(ts),
                                      step=ts_step,
                                      name='t:')
        selector_ts = alt.selection_single(name="t",
                                           fields=['t'],
                                           bind=slider_ts,
                                           init={'t': min(ts)})
        tooltip.append('t')
        circ = circ.add_selection(selector_ts).transform_filter(selector_ts)
        subtext.append('t = %s\n' % ', '.join(map(str, ts)))

    has_cats = 0
    has_nums = 0
    if 'variable' in full_pds.columns:

        dtypes_set = set(full_pds['dtype'])
        if 'categorical' in dtypes_set:
            cats = full_pds.loc[full_pds.dtype == 'categorical']
            cats_init = sorted(
                [x for x in cats['variable'] if str(x) != 'nan'],
                key=lambda x: -len(x))[0]
            cats_dropdown = alt.binding_select(
                options=cats['variable'].unique(), name='variable:')
            cats_select = alt.selection_single(fields=['variable'],
                                               bind=cats_dropdown,
                                               name="categorical variable",
                                               init={'variable': cats_init})
            cats_plot = make_subplot(circ, cats_select, list(tooltip), 'N')
            has_cats = 1

        if 'numerical' in dtypes_set:
            nums = full_pds.loc[full_pds.dtype == 'numerical']
            cats_init = sorted(
                [x for x in nums['variable'] if str(x) != 'nan'],
                key=lambda x: -len(x))[0]
            nums_dropdown = alt.binding_select(
                options=nums['variable'].unique(), name='variable:')
            nums_select = alt.selection_single(fields=['variable'],
                                               bind=nums_dropdown,
                                               name="numerical variable",
                                               init={'variable': cats_init})
            nums_plot = make_subplot(circ, nums_select, list(tooltip), 'Q')
            has_nums = 1

    title = {
        "text": text,
        "color": "black",
    }
    if subtext != ['Parameters:']:
        title.update({
            "subtitle": (subtext + ["(based on altair)"]),
            "subtitleColor": "grey"
        })

    if has_nums and has_cats:
        circ = alt.hconcat(cats_plot, nums_plot)
    elif has_nums:
        circ = nums_plot
    elif has_cats:
        circ = cats_plot

    circ.save(o_html)
    print('-> Written:', o_html)
Esempio n. 20
0
    COUNTY = pd.read_csv("health_ineq_online_table_12.csv", encoding = "latin-1")
    COUNTY["cty"] = COUNTY["cty"].astype(int)
    COVID["geo_value"] = COVID["geo_value"].astype(int)
    DATA = COUNTY.join(COVID.set_index("geo_value"), how = "inner", on = "cty")
    DATA['Date'] = pd.to_datetime(DATA.time_value)
    DATA['Date'] = DATA.Date.dt.strftime('%d').astype(int)
    return DATA[["Date", "cty", "statename", "state_id", "county_name", "value", "median_house_value", "puninsured2010"]]

DATA = load_data()

st.title("Percentage of (COVID) Doctor Visits by State and County")
st.write("In this section, we explore the percentage of doctor visits for COVID by State and County.  We begin by hilighting Pennsylvania and as we can see, there are some interesting observatins for the state.  ")
alt.data_transformers.disable_max_rows()

slider = alt.binding_range(min=1, max=31, step=1)
select_date = alt.selection_single(name="January", fields=['Date'], bind=slider, init={'Date':1})

state_selector = alt.selection_multi(fields=['statename'], init=[{'statename':'Pennsylvania'}])

States = alt.Chart(DATA).mark_bar().encode(
    x=alt.X('value:Q', title="% of Visits to Doctor about COVID", aggregate="mean", scale=alt.Scale(domain=[0, 35])),
    y=alt.Y('statename:N', title="State"),
    color=alt.condition(state_selector, alt.value("#f76f5c"), alt.value("#451076")),
    tooltip=[alt.Tooltip("statename:N", title='State'), alt.Tooltip("value:Q", aggregate="mean", title="% of COVID Doctor Visits", format='.2f')]
    ).add_selection(
        state_selector
    ).add_selection(
        select_date
    ).transform_filter(
        select_date).interactive()
Esempio n. 21
0
import pandas as pd
import altair as alt

df2 = pd.read_excel('mianjiandjiage.xlsx')
df0 = pd.read_excel('ziru.xlsx')

#广州各区租金与面积关系
areas = [
    '请选择区域', '荔湾区', '白云区', '海珠区', '黄埔区', '萝岗区', '南沙区', '从化区', '花都区', '番禺区',
    '天河区', '越秀区', '增城区'
]
areas_dropdown = alt.binding_select(options=areas)
areas_select = alt.selection_single(fields=['区域'],
                                    bind=areas_dropdown,
                                    name="district")

mj = alt.Chart(df2).mark_bar().encode(x=alt.X(
    "区域", sort=alt.EncodingSortField(field="平均使用面积/㎡")),
                                      y=alt.Y("平均使用面积/㎡"),
                                      color='平均使用面积/㎡')

mj2 = alt.Chart(df2).mark_point(color="orange").encode(x=alt.X(
    "区域", sort=alt.EncodingSortField(field="平均使用面积/㎡")),
                                                       y=alt.Y("每月平均租金/元"))

mj3 = alt.Chart(df2).mark_text(color="red").encode(
    x=alt.X("区域", sort=alt.EncodingSortField(field="平均使用面积/㎡")),
    y=alt.Y("每月平均租金/元")).add_selection(areas_select).transform_filter(
        areas_select)
#mark_text(align='left', dx=5)
zj = alt.Chart(df2).mark_line(color="orange").encode(
Esempio n. 22
0
                     "datum.Production_Budget > 100000000.0 ? 100 : 10",
                     Release_Year="year(datum.Release_Date)").transform_filter(
                         alt.datum.IMDB_Rating > 0).transform_filter(
                             alt.FieldOneOfPredicate(
                                 field='MPAA_Rating',
                                 oneOf=ratings)).encode(x=alt.X(
                                     'Worldwide_Gross:Q',
                                     scale=alt.Scale(domain=(100000, 10**9),
                                                     clamp=True)),
                                                        y='IMDB_Rating:Q',
                                                        tooltip="Title:N")

# A slider filter
year_slider = alt.binding_range(min=1969, max=2018, step=1)
slider_selection = alt.selection_single(bind=year_slider,
                                        fields=['Release_Year'],
                                        name="Release Year_")

filter_year = base.add_selection(slider_selection).transform_filter(
    slider_selection).properties(title="Slider Filtering")

# A dropdown filter
genre_dropdown = alt.binding_select(options=genres)
genre_select = alt.selection_single(fields=['Major_Genre'],
                                    bind=genre_dropdown,
                                    name="Genre")

filter_genres = base.add_selection(genre_select).transform_filter(
    genre_select).properties(title="Dropdown Filtering")

#color changing marks
def plot_results_timeconstant_static():
    # the base chart
    base = alt.Chart(data).transform_calculate(
        x_jittered = '0.15*random()*datum.taus+datum.taus',
        ymin = "datum.confIntLow",
        ymax = "datum.confIntHigh",
        goal='0.95')

    selector = alt.selection_single(
        fields=['methodName'],
        empty='all',
        bind='legend')
    opacity = alt.condition(selector, alt.value(1.0), alt.value(0.5))

    #generate the scatter points:
    points = base.mark_point(filled=True).add_selection(selector).encode(
        x=alt.X('x_jittered:Q', scale=alt.Scale(type='log'), title='Length of Timeseries (τ)'),
        y=alt.Y('rate:Q', scale=alt.Scale(domain=[0,1.04]), title='Rate of correct SEM'),
        size=alt.value(80),
        color=alt.condition(selector, col, alt.value('lightgrey')),
        opacity=opacity)

    selector = alt.selection_single(
        fields=['methodName'],
        empty='all',
        bind='legend')
    opacity = alt.condition(selector, alt.value(1.0), alt.value(0.5))

    #generate the scatter points:
    line = base.mark_line().add_selection(selector).encode(
        x=alt.X('x_jittered:Q'),
        y=alt.Y('rate:Q'),
        color=alt.condition(selector, col, alt.value('lightgrey')),
        opacity=opacity)

    #generate the 95% mark:
    rule = base.mark_rule(color='black').encode(
        alt.Y('goal:Q'))

    selector = alt.selection_single(
        fields=['methodName'],
        empty='all',
        bind='legend')
    opacity = alt.condition(selector, alt.value(1.0), alt.value(0.5))

    errorbars = base.mark_rule(strokeWidth=3).add_selection(selector).encode(
        alt.X("x_jittered:Q"),
        alt.Y("ymin:Q", title=''),
        alt.Y2("ymax:Q"),
        color=alt.condition(selector, col, alt.value('lightgrey')),
        opacity=opacity)

    chart = alt.layer(
        errorbars,
        points,
        line,
        rule,).properties(
        width=250,
        height=200
        ).facet(facet=alt.Facet('trueRho:N', 
                                title='Autocorrelation parameter (ρ)'), columns=3)


    chart = chart.configure_header(titleColor='darkred',
                                   titleFontSize=16,
                                   labelColor='darkred',
                                   labelFontSize=14)
    
    chart = chart.configure_legend(
        strokeColor='gray',
        fillColor='#EEEEEE',
        padding=10,
        cornerRadius=10,
        orient='top')


    return chart
def get_timeline_plots(df_scores, selected_score, selected_score_axis,
                       selected_score_desc, use_states, countys):

    title = {
        "text":
        ["", selected_score_desc
         ],  # use two lines as hack so the umlauts at Ö are not cut off
        "subtitle": "EveryoneCounts.de",
        "color": "black",
        "subtitleColor": "lightgray",
        "subtitleFontSize": 12,
        "subtitleFontWeight": "normal",
        "fontSize": 15,
        "lineHeight": 5,
    }
    if use_states:
        titlestr = "Bundesland"
        scheme = 'category20'
    else:
        titlestr = "Landkreis"
        scheme = 'category10'

    if len(countys) > 0 and not use_states:
        # Landkreise
        df_scores = df_scores[df_scores["name"].isin(countys)].dropna(
            axis=1, how="all")
        df_scores = df_scores[["name", "date", selected_score]].dropna()
    elif use_states:
        pass
    else:
        return None  # county mode, nothing selected

    # altair selectors
    highlight = alt.selection_single(empty="none",
                                     fields=['name'],
                                     on='mouseover',
                                     nearest=True,
                                     clear="mouseout")
    highlight_circles = alt.selection_single(empty="none",
                                             fields=['date', 'name'],
                                             on='mouseover',
                                             nearest=True,
                                             clear="mouseout")

    # charts
    base = alt.Chart(df_scores[[
        "name", "date", selected_score
    ]].dropna()).encode(x=alt.X('date:T',
                                axis=alt.Axis(title='Datum',
                                              format=("%d %b"))),
                        y=alt.Y(selected_score + ':Q',
                                title=selected_score_axis),
                        color=alt.Color('name',
                                        title=titlestr,
                                        scale=alt.Scale(scheme=scheme),
                                        legend=alt.Legend(orient="bottom",
                                                          columns=2)),
                        tooltip=[
                            alt.Tooltip("name:N", title=titlestr),
                            alt.Tooltip(selected_score + ":Q",
                                        title=selected_score_axis),
                            alt.Tooltip("date:T",
                                        title="Datum",
                                        format=("%A %d %B")),
                        ])

    points = base.mark_circle().encode(
        opacity=alt.value(1),
        size=alt.condition(~highlight_circles, alt.value(40), alt.value(300)),
    ).add_selection(highlight).add_selection(highlight_circles).properties(
        width='container', height=450, title=title)

    lines = base.mark_line().encode(
        size=alt.condition(~highlight, alt.value(2), alt.value(6)),
        opacity=alt.condition(~highlight, alt.value(0.5), alt.value(1)))

    if selected_score in ["airquality_score", "webcam_score", "tomtom_score"]:
        return points + lines
    else:
        # add horizontal rule at 100%
        rule = alt.Chart(df_scores).mark_rule(color='lightgray').encode(
            y="a:Q").transform_calculate(a="100")
        return rule + points + lines
Esempio n. 25
0
def getBattingChart():
  filename="%s/batting.csv" % (settings.MEDIA_ROOT)
  source = pd.DataFrame(list(Batting.objects.all().values("name","country","average","year","strikerate","runs")))
  print(source.head())
  alt.data_transformers.disable_max_rows()
  slider = alt.binding_range(min=1990, max=2018, step=1)
  select_year = alt.selection_single(name="year", fields=['year'], on='none' ,clear='none',
                                             bind=slider, init={'year': 1998})
  singlePlayer = alt.selection_single(empty='none', fields=['name'] , init={'name':'SR Tendulkar'})
  domain=["INDIA","AUS","PAK","ENG","SA","NZ","WI","BAN","SL"]
  range_=["#6baed6","yellow","green","red","orange","black","brown","purple","pink"]
  base=alt.Chart(source).mark_circle().encode(
#      x=alt.X('average',scale=alt.Scale(domain=[0, 200])),
#      y=alt.Y('strikerate',scale=alt.Scale(domain=[0, 200])),
      x='average',
      y='strikerate',
      #color='country',
      color=alt.Color('country', legend=alt.Legend(title='Country', orient = 'left'),scale=alt.Scale(domain=domain, range=range_)),
      tooltip=['name', 'country', 'average', 'strikerate']
  ).add_selection(
    select_year,
    singlePlayer
  ).transform_filter(
     datum.runs > 450        
  ).transform_filter(
    select_year
  ).properties(
   title="Batting Records Year Wise"
)

  titleLine = alt.Chart(source).mark_text(dy=100, size=30, opacity=0.5,text='foo-baz', color='#d6616b').encode(
      text='name:N',
      opacity=alt.value(0.5)
      ).transform_filter(
          singlePlayer
	  )

  label1 = alt.Chart(source).mark_text(align='left', dy=-140,size=15, opacity=0.5,text=' --- average', color='blue')
  label2 = alt.Chart(source).mark_text(dy=-140, size=15, align='right',opacity=0.5,text=' --- strikerate  ', color='red')
  combinedLine=alt.Chart(source).mark_line(point=True).encode(
          x='year:Q',
)

  z=alt.layer(
    combinedLine.mark_line(color='blue',opacity=.5).encode(
        y='average',
        ),
    combinedLine.mark_line(color='red',opacity=.5).encode(
        y='strikerate'
        )
).transform_filter(
singlePlayer
).properties(
  title='Selected Player Recored over Years'
 )
  myChart=base  | z + titleLine + label1 + label2
  myChart1=myChart.configure_circle(
   filled=True,
   size=200,
).properties(
autosize='fit'
)
  return myChart1
    squad = allteams2.loc[index]

    a = make_team(squad, stand)
    df_ind = a.at[13, "player"]
    teams_dict[df_ind] = a
stand = stand.sort_values("Total Points", ascending=0).reset_index(drop=True)

# create full standing
full_stand = roster_infos.merge(stand[["Team", "Total Points", "Num Alive"]], left_on = "GM", right_on = "Team")
full_stand = full_stand.sort_values(["Total Points", "Num Alive"], ascending = [False, False]).reset_index(drop=True)
full_stand_cols = ["GM", "Total Points", "Num Alive", "QB1", "QB2", "K1", "K2", "D1", "D2", "P1", "P2", "P3", "P4", "P5", "P6", "P7", "SB_Champ", "Runner_Up", "SB_Points"]
full_stand = full_stand[full_stand_cols]


#### VIZZES
single = alt.selection_single()
bar = alt.Chart(stand).mark_bar().encode(
    x = alt.X("Team", sort = alt.SortField(field="Total Points", order='descending'), title = "Team"),
    y = alt.Y("Total Points"),
    tooltip = alt.Tooltip(["Team", "Total Points", "Num Alive", "QBs Remaining",
                           "Ks Remaining", "Ds Remaining", "Positions Remaining", "Dead"]),
    color = alt.Color("Num Alive", scale = alt.Scale(scheme = "lighttealblue", reverse=True))
    #color=alt.condition(single, 'count()', alt.value('lightgray'), legend = None)
).properties(width=1200, height=600, title = "Points by Team").configure_axis(
    labelFontSize=30,
    titleFontSize=35
).configure_title(fontSize= 45).add_selection(single)

circ = alt.Chart(stand).mark_circle(size=100).encode(
    x=alt.Y("Total Points", scale=alt.Scale(domain=(min(stand["Total Points"]), max(stand["Total Points"])))),
    y=alt.Y("Num Alive", title="Players Remaining"),
Esempio n. 27
0
def wsb_chart(
    data: pd.DataFrame,
    xvar: str = "start",
    x2var: str = "end",
    xvar_middle: str = "middle",
    yvar: str = "mantissa",
    vvar: str = "original",
    evar: str = "multiplier",
    xcat: str = "category",
    w: int = 400,
    h: int = 400,
    color_scheme: str = "orangered",
    title: str = "Width-Scale Bar Chart",
) -> alt.LayerChart:
    _n_bars = len(data[xcat].unique())
    _padding_width = (w / _n_bars) * 0.1

    data_with_padding = data.copy()
    data_with_padding["start"] = (data_with_padding["start"] +
                                  _padding_width / 4 +
                                  _padding_width / 2 * data_with_padding.index)
    data_with_padding["end"] = (data_with_padding["end"] + _padding_width / 4 +
                                _padding_width / 2 * data_with_padding.index)
    data_with_padding["middle"] = (
        (data_with_padding["end"] - data_with_padding["start"]) /
        2) + data_with_padding["start"]

    selection = alt.selection_single(fields=["multiplier"], bind="legend")

    # base = alt.Chart(data, width=w, height=h)
    base = alt.Chart(data_with_padding, width=w, height=h)

    bar = (
        # base.mark_rect(xOffset=1.0, x2Offset=0.5)
        base.mark_rect().encode(
            x=alt.X(
                f"{xvar}:Q",
                axis=alt.Axis(
                    titleY=(-0.5 + 22),
                    labels=False,
                    title=xcat.capitalize(),
                    grid=False,
                    # values=data[xvar_middle].to_list(),
                    values=data_with_padding[xvar_middle].to_list(),
                ),
            ),
            x2=alt.X2(f"{x2var}:Q"),
            y=alt.Y(
                f"{yvar}:Q",
                axis=alt.Axis(
                    title=yvar.capitalize(),
                    titleAngle=0,
                    titleAlign="left",
                    titleY=-5,
                    titleX=0,
                    labelExpr="datum.value + ' ×'",
                ),
                scale=alt.Scale(domain=[0, 10]),
            ),
            color=alt.Color(
                f"{evar}:O",
                title="Magnitude Multiplier",
                legend=alt.Legend(labelExpr="'× ' + format(datum.value, ',')"),
                scale=alt.Scale(scheme=color_scheme),
            ),
            tooltip=[
                alt.Tooltip(f"{xcat}:N", title=xcat.capitalize()),
                alt.Tooltip(f"{vvar}:N", title="Value"),
                alt.Tooltip(f"{yvar}:Q", title=yvar.capitalize()),
                alt.Tooltip(f"{evar}:O",
                            format=",",
                            title="Magnitude Multiplier"),
            ],
            opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
        ).add_selection(selection))

    # Altair/Vega-Lite:
    # Default `labelFontSize` = 10
    # Default `tickSize` = 5
    # Default `labelPadding` = 2
    # Default `translate` = 0.5

    text = base.mark_text(align="center", baseline="middle",
                          fontSize=10).encode(
                              x=alt.X(f"{xvar_middle}:Q"),
                              y=alt.value(h + (10 / 2) + 5 + 2 + 0.5),
                              text=alt.Text(f"{xcat}:N"),
                          )

    return alt.layer(bar, text, title=alt.TitleParams(title, anchor="start"))
Esempio n. 28
0
from typing import Optional

import altair as alt

idle_color = "lightgray"

time_selection_brush = alt.selection_interval(encodings=["x"],
                                              name="time_select")
horizon_hover_brush = alt.selection_single(on="mouseover",
                                           nearest=True,
                                           encodings=["x"],
                                           empty="all")
source_selection_brush = alt.selection_multi(fields=["source"],
                                             name="source_select")

# Create selection brushes that choose the nearest point & selects based on x-value
nearest_x_hover_brush = alt.selection_single(nearest=True,
                                             on="mouseover",
                                             encodings=["x"],
                                             empty="none",
                                             name="nearest_x_hover")
nearest_x_select_brush = alt.selection_single(nearest=True,
                                              encodings=["x"],
                                              empty="all",
                                              name="nearest_x_select")


def horizon_selection_brush(init_belief_horizon=None) -> alt.MultiSelection:
    """Create a brush for selecting one or multiple horizons.

    :param init_belief_horizon: Optional initialisation value
Esempio n. 29
0
def omm_chart(
    data: pd.DataFrame,
    xvar: str = "category",
    e_yvar: str = "exponent",
    m_yvar: str = "mantissa",
    v_var: str = "original",
    w: int = 400,
    h: int = 400,
    m_color: str = "#F3852A",
    e_color: str = "#707070",
    title: str = "Order of Magnitude Markers",
) -> alt.LayerChart:
    _n_bars = len(data[xvar].unique())

    # Default `bandPaddingInner` = 0.1
    # More info: https://altair-viz.github.io/user_guide/configuration.html#scale-configuration
    _e_bar_width = (w / _n_bars) - ((w / _n_bars) * 0.1)
    _m_bar_width = _e_bar_width / 5

    selection = alt.selection_single(fields=["to_color"], bind="legend")

    base = alt.Chart(data)

    e_bar = (base.mark_bar(color=e_color, size=_e_bar_width).encode(
        x=alt.X(f"{xvar}:N", axis=alt.Axis(title=xvar.capitalize())),
        y=alt.Y(
            f"{e_yvar}:Q",
            axis=alt.Axis(title=None),
            scale=alt.Scale(domain=[0, 10]),
        ),
        tooltip=[
            alt.Tooltip(f"{xvar}:N", title=xvar.capitalize()),
            alt.Tooltip(f"{e_yvar}:Q", title=e_yvar.capitalize()),
            alt.Tooltip(f"{v_var}:N", title="Value"),
        ],
        color=alt.Color(
            "to_color:N",
            legend=alt.Legend(title="Part"),
            scale=alt.Scale(domain=["Exponent", "Mantissa"],
                            range=[e_color, m_color]),
        ),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
    ).transform_calculate(to_color="'Exponent'"))

    m_bar = (base.mark_bar(color=m_color, size=_m_bar_width).encode(
        x=alt.X(f"{xvar}:N"),
        y=alt.Y(f"{m_yvar}:Q"),
        tooltip=[
            alt.Tooltip(f"{xvar}:N", title=xvar.capitalize()),
            alt.Tooltip(f"{m_yvar}:Q", title=m_yvar.capitalize()),
            alt.Tooltip(f"{v_var}:N", title="Value"),
        ],
        color=alt.Color("to_color:N"),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
    ).transform_calculate(to_color="'Mantissa'"))

    # Open issue: https://github.com/altair-viz/altair/issues/2009
    m_bar = m_bar.add_selection(alt.selection_single())

    return (alt.layer(e_bar,
                      m_bar,
                      title=alt.TitleParams(title, anchor="start")).properties(
                          width=w, height=h).add_selection(selection))
Esempio n. 30
0
=======================
This chart visualizes the age distribution of the US population over time.
It uses a slider widget that is bound to the year to visualize the age
distribution over time.
"""
# category: case studies
import altair as alt
from vega_datasets import data

source = data.population.url

pink_blue = alt.Scale(domain=('Male', 'Female'),
                      range=["steelblue", "salmon"])

slider = alt.binding_range(min=1900, max=2000, step=10)
select_year = alt.selection_single(name="year", fields=['year'], bind=slider)

alt.Chart(source).mark_bar().encode(
    x=alt.X('sex:N', axis=alt.Axis(title=None)),
    y=alt.Y('people:Q', scale=alt.Scale(domain=(0, 12000000))),
    color=alt.Color('sex:N', scale=pink_blue),
    column='age:O'
).properties(
    width=20
).add_selection(
    select_year
).transform_calculate(
    "sex", alt.expr.if_(alt.datum.sex == 1, "Male", "Female")
).transform_filter(
    select_year
)
Esempio n. 31
0
def viz_paired(df, input_city):
    # Data Transform
    cities = df.groupby(['city', 'category_name'])[[
        'city', 'category_name', 'total_business_count', 'sample_rating',
        'sample_review_count'
    ]].mean()
    cities = cities.join(df.groupby(['city'])[['city',
                                               'total_business_count']].sum(),
                         on='city',
                         rsuffix='_by_city')
    cities['%_of_total'] = cities['total_business_count'] / cities[
        'total_business_count_by_city']

    ## Input city
    selected_cities = ['New York, New York', input_city]

    cities_pair = cities.reset_index()
    cities_pair = cities_pair[cities_pair['city'].isin(selected_cities)]

    # Create layered visualization
    viz_cities_slope_circles = alt.Chart(cities_pair).mark_point(
        size=40, filled=True, opacity=1).encode(
            x=alt.X('city:N',
                    sort=alt.Sort(selected_cities),
                    axis=alt.Axis(labelAngle=0)),
            y=alt.Y('%_of_total:Q',
                    axis=alt.Axis(format='.2p',
                                  title='Percent of Total Businesses')),
            color=alt.Color('category_name:N', legend=None),
            tooltip=[
                alt.Tooltip('category_name:N', title='Ethnic Category'),
                alt.Tooltip('%_of_total:Q',
                            format='.2%',
                            title='Percentage of Total'),
                alt.Tooltip('total_business_count:Q', title='Count')
            ]).interactive(bind_x=False)

    selection_opacity = alt.selection_single(encodings=['y'],
                                             on='mouseover',
                                             clear="click",
                                             empty='none')

    condition_opacity = alt.condition(selection_opacity, alt.value(1),
                                      alt.value(0.2))
    condition_size = alt.condition(selection_opacity, alt.value(3),
                                   alt.value(2))

    viz_cities_slope_line = alt.Chart(cities_pair).mark_line().add_selection(
        selection_opacity).encode(
            x=alt.X('city:N',
                    sort=alt.Sort(selected_cities),
                    axis=alt.Axis(labelAngle=0)),
            y=alt.Y('%_of_total:Q',
                    axis=alt.Axis(format='.2p',
                                  title='Percent of Total Businesses')),
            color=alt.Color('category_name:N', legend=None),
            opacity=condition_opacity,
            size=condition_size,
            tooltip=[
                alt.Tooltip('category_name:N', title='Ethnic Category'),
                alt.Tooltip('%_of_total:Q',
                            format='.2%',
                            title='Percentage of Total'),
                alt.Tooltip('total_business_count:Q', title='Count')
            ]).interactive(bind_x=False)

    viz_cities_slope = (viz_cities_slope_line +
                        viz_cities_slope_circles).properties(height=600)

    return viz_cities_slope