Ejemplo n.º 1
0
def multiline():
    county_data = read_county_from_db(session.get('current_state'),
                                      session.get('current_county'))

    source = helper_functions.process_data(county_data)

    # Create a column for the label
    source['value_label'] = source['value'].apply(
        lambda x: helper_functions.to_percentage(x))

    # Create a selection that chooses the nearest point & selects based on x-value
    nearest = alt.selection(type='single',
                            nearest=True,
                            on='mouseover',
                            fields=['year'],
                            empty='none')

    demographics = [
        'Total white population (15-64)', 'Total black population (15-64)',
        'White jail population', 'Black jail population',
        'White prison population', 'Black prison population'
    ]

    # Define color pairs matched to above demographics
    hex_colors = [
        '#cccec1', '#272727', '#cccec1', '#272727', '#cccec1', '#272727'
    ]

    # Combine demographic and colors into a dictionary
    demographic_labels = dict(zip(demographics, hex_colors))

    # Create pairs of variables to be used in the stacked charts
    wb_general = ['perc_white_total_pop', 'perc_black_total_pop']
    wb_jail = ['perc_white_jail_pop', 'perc_black_jail_pop']
    wb_prison = ['perc_white_prison_pop', 'perc_black_prison_pop']

    # General population chart
    total_wb_population = alt.Chart(
        source[source['variable'].isin(wb_general)], height=150, width=500
    ).mark_bar().encode(
        x=alt.X("year:O", axis=Axis(title='Year')),
        y=alt.Y("value:Q", stack="normalize", axis=Axis(title='Ratio')),
        color=alt.Color('demographic:N',
                        legend=None,
                        scale=alt.Scale(domain=list(demographic_labels.keys()),
                                        range=list(
                                            demographic_labels.values())))
    ).properties(
        title=
        'Ratio of white/black residents in total county population (15-64)')

    if session.get('jail_data_exists'):
        # White/black jail population chart
        total_wb_jail = alt.Chart(
            source[source['variable'].isin(wb_jail)], height=150,
            width=500).mark_bar().encode(
                x=alt.X("year:O", axis=Axis(title='Year')),
                y=alt.Y("value:Q", stack="normalize",
                        axis=Axis(title='Ratio')),
                color=alt.Color('demographic:N',
                                legend=None,
                                scale=alt.Scale(
                                    domain=list(demographic_labels.keys()),
                                    range=list(demographic_labels.values())))
            ).properties(
                title='Ratio of white/black  inmates in jail population')

    if session.get('prison_data_exists'):
        total_wb_prison = alt.Chart(
            source[source['variable'].isin(wb_prison)], height=150,
            width=500).mark_bar().encode(
                x=alt.X("year:O", axis=Axis(title='Year')),
                y=alt.Y("value:Q", stack="normalize",
                        axis=Axis(title='Ratio')),
                color=alt.Color('demographic:N',
                                legend=None,
                                scale=alt.Scale(
                                    domain=list(demographic_labels.keys()),
                                    range=list(demographic_labels.values())))
            ).properties(
                title='Ratio of white/black  inmates in prison population')

    # Concatenate charts depending on what data is available
    if session.get('prison_data_exists') and session.get('jail_data_exists'):
        chart = alt.vconcat(total_wb_population, total_wb_jail,
                            total_wb_prison)
    elif session.get(
            'prison_data_exists') and not session.get('jail_data_exists'):
        chart = alt.vconcat(total_wb_population, total_wb_prison)
    elif not session.get('prison_data_exists') and session.get(
            'jail_data_exists'):
        chart = alt.vconcat(total_wb_population, total_wb_jail)
    else:
        chart = total_wb_population
    return chart.to_json()
Ejemplo n.º 2
0
"""
World Projections
-----------------
This example shows a map of the countries of the world using four available
geographic projections. For more details on the projections available in
Altair, see https://vega.github.io/vega-lite/docs/projection.html
"""
# category: maps
import altair as alt
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

base = alt.Chart(source).mark_geoshape(
    fill='#666666',
    stroke='white'
).properties(
    width=300,
    height=180
)

projections = ['equirectangular', 'mercator', 'orthographic', 'gnomonic']
charts = [base.project(proj).properties(title=proj)
          for proj in projections]

alt.vconcat(
    alt.hconcat(*charts[:2]),
    alt.hconcat(*charts[2:])
)
Ejemplo n.º 3
0
def make_plots(tidy_ballots_df, benchmarks_df, colors):
    '''create two linked output plots
    
    Parameters
    ----------
    tidy_ballots_df : pandas DataFrame
        DataFrame of the form produced by tidy_ballots()
    benchmarks_df : pandas DataFrame
        DataFrame of the form produced by `calculate_benchmarks()`
    colors : dict
        Dictionary of the form `'player name': 'hex_color'`
    
    Returns
    -------
    altair Chart object
        An altair chart consisting of a horizontal bar plot and a linked line plot
    '''

    color_scale = alt.Color('player:N', legend=None, scale=colors)
    click = alt.selection_multi(fields=['player'])
    # brush = alt.selection_interval(encodings=['x'])

    top = alt.Chart().mark_bar().encode(
        x = alt.X('sum(votes):Q', scale=alt.Scale(domain=(0, 412)), axis=alt.Axis(title='total votes')),
        y = alt.Y('player:N', sort=alt.EncodingSortField(field='votes', op='sum', order='descending'), axis=alt.Axis(title=None)),
        tooltip = alt.Tooltip('sum(votes):Q', title='votes'),
        color=alt.condition(click, 
                            color_scale,
                            alt.value('lightgray'))
    ).properties(
        width=600, height=350
    ).add_selection(
        click
    )
    
    bottom = alt.Chart().mark_line(point=True).encode(
        x = alt.X('yearmonthdate(date):T', axis=alt.Axis(title='date')),
        y = alt.Y('cumulative_votes:Q', axis = alt.Axis(title='cumulative votes')),
        color = alt.Color('player:N', legend=None, scale=colors),
        tooltip = alt.Tooltip('player:N', title='null')
    ).properties(
        width=600, height=300
    ).transform_filter(
        click
    ).interactive()

    line75_df = get_cum_ballots_by_date(tidy_ballots_df)
    line75 = alt.Chart(line75_df).mark_line(point=True, strokeDash=[4,4]).encode(
        x = alt.X('yearmonthdate(date):T'),
        y = alt.Y('line75:Q'),
        color = alt.value('gray'),
        tooltip = alt.Tooltip("line75:Q")
    )

    current_pace_bars = alt.Chart(benchmarks_df).mark_rule(color='orangered').encode(
        x='induction_pace:Q',
        tooltip=alt.Tooltip('induction_pace:Q')
    )

    current_pace_lines = alt.Chart(benchmarks_df).mark_rule(color='orangered').encode(
        y='induction_pace:Q',
        tooltip=alt.Tooltip('induction_pace:Q')
    )

    return alt.vconcat(
        (top + current_pace_bars), 
        (bottom + current_pace_lines + line75), 
        data=tidy_ballots_df)
Ejemplo n.º 4
0

stds = np.logspace(0, -4, num=16)
prod = product(trainer, stds, [True, False])  # type:ignore
prod = prod.fit(data, epoch_data=data[:])
prod
# -

it = iter(prod)
print(next(it))
print(next(it))
print(next(it))

# 訓練を実行し、結果をデータフレームで返します。
df = prod.to_frame(columns=["std", "bn"])
df.tail()
# 可視化します。
import altair as alt  # isort:skip


def plot(std, df):
    y = alt.Y("accuracy", scale=alt.Scale(domain=[0, 1]))
    return (alt.Chart(df).mark_line().encode(
        x="epoch", y=y, color="bn").properties(width=80,
                                               height=80,
                                               title=f"std={std:.05f}"))


charts = [plot(*x) for x in df.groupby("std")][::-1]
alt.vconcat(*(alt.hconcat(*charts[k:k + 4]) for k in range(0, 16, 4)))
Ejemplo n.º 5
0
import altair as alt
from vega_datasets import data

source = data.movies.url

pts = alt.selection(type="single", encodings=['x'])

rect = alt.Chart(data.movies.url).mark_rect().encode(
    alt.X('IMDB_Rating:Q', bin=True),
    alt.Y('Rotten_Tomatoes_Rating:Q', bin=True),
    alt.Color('count()',
              scale=alt.Scale(scheme='greenblue'),
              legend=alt.Legend(title='Total Records')))

circ = rect.mark_point().encode(
    alt.ColorValue('grey'),
    alt.Size(
        'count()',
        legend=alt.Legend(title='Records in Selection'))).transform_filter(pts)

bar = alt.Chart(source).mark_bar().encode(
    x='Major_Genre:N',
    y='count()',
    color=alt.condition(pts, alt.ColorValue("steelblue"),
                        alt.ColorValue("grey"))).properties(selection=pts,
                                                            width=550,
                                                            height=200)

alt.vconcat(rect + circ, bar).resolve_legend(color="independent",
                                             size="independent")
Ejemplo n.º 6
0
def facet_wrap(subplts, plots_per_row):
    rows = [subplts[i * plots_per_row:i * plots_per_row + plots_per_row] for i in range(len(subplts) // plots_per_row)]
    rows = [alt.hconcat(*charts) for charts in rows]
    return alt.vconcat(*rows).configure_axis(grid=False).configure_view(strokeOpacity=0).configure_axisBottom(
        labelAngle=0
    )
#         brush
#     ).properties(width=500,height=500,title="Number of People Going to Bars and Restaurants")
# # st.write(behavioral)

# vCombine = alt.vconcat(emotionalBase,behavioralBase) #vertical concat two combined charts
# st.write(vCombine)

#---version 2 combine
#st.write(commWorryChart|selfWorryChart|barDataChart|restaurantDataChart) #horizontal concat

#---version 3 combine
#st.write(commWorryChart+selfWorryChart) #layer

#---version 4 combine #vertical concat all 4
vconcatChart=alt.vconcat(commWorryChart,selfWorryChart,barDataChart,restaurantDataChart).add_selection(
        selectedCounty,
        brush
        )
st.write(vconcatChart)

    
   #---Show Raw Data data table

#BarData
if st.checkbox("Show me the raw data for bar visits"):
    st.write(barDatadf)    
#RestaurantData
if st.checkbox("Show me the raw data for restaurant visits"):
    st.write(restaurantDatadf)
#CommunityWorry Data
if st.checkbox("Show me the raw for those data worry about illness in community"):
    st.write(commWorrydf)
ratings = alt.Chart(sh_movies_df).mark_circle().encode(
    alt.X('Rotten_Tomatoes_Rating:Q',
          title='Rotten Tomatoes Rating (Normalized)',
          scale=alt.Scale(domain=[0, 1])),
    alt.Y('IMDB_Rating:Q',
          title='IMDB Rating (Normalized)',
          scale=alt.Scale(domain=[0, 1])),
    alt.Tooltip(
        ['Title:N', 'Rotten_Tomatoes_Rating:Q', 'IMDB_Rating:Q', 'Year:O']),
    opacity=alt.condition(brush, alt.value(0.75), alt.value(0.05))).properties(
        width=650,
        height=400,
        title='Movie Ratings (Normalized) in IMDB and Rotten Tomatoes')

gross = alt.Chart(sh_movies_df).mark_circle().encode(
    alt.X('Production_Budget:Q',
          scale=alt.Scale(type='log', base=10, zero=False),
          axis=alt.Axis(format="~s"),
          title='Production Budget ($) (log_10 scale)'),
    alt.Y('Worldwide_Gross:Q',
          scale=alt.Scale(type='log', base=10, zero=False),
          axis=alt.Axis(format="~s"),
          title='Worldwide Gross ($) (log_10 scale)'),
    alt.Tooltip(
        ['Title:N', 'Production_Budget:Q', 'Worldwide_Gross:Q', 'Year:O']),
    opacity=alt.condition(brush, alt.value(0.75), alt.value(0.05))).properties(
        width=650, height=400, title='World Gross and Production Budget')

alt.vconcat(years, ratings, gross).properties(spacing=10)
        width=250,
        height=250,
    ).add_selection(brush).add_selection(hover)

text = alt.Chart(df).mark_text(dy=-5, align='right').encode(
    alt.Text('name', type='nominal'),
    x=alt.X("Principal component 1",
            axis=alt.Axis(ticks=False, labels=False, grid=False)),
    y=alt.Y("Principal component 2",
            axis=alt.Axis(ticks=False, labels=False, grid=False)),
    tooltip=[
        'accession', 'name', 'country', 'admixed group', 'continent',
        'country code', 'CS number', 'latitude', 'longitude', 'collector',
        'site', 'seq by'
    ],
    opacity=alt.condition(
        ~hover, alt.value(0),
        alt.value(1))).transform_filter(selection).transform_filter(hover)

chart_PCA += text

legend = alt.Chart().mark_rect().encode(
    y=alt.Y('country:N', axis=alt.Axis(orient='left', title="Countries")),
    color=color).add_selection(selection).transform_filter(brush)

hcharts = alt.hconcat(chart_GTM, chart_tSNE, chart_PCA, data=df)
chart = alt.hconcat(legend, chart, data=df)
vcharts = alt.vconcat(hcharts, chart, data=df).properties()

vcharts.save(argv[2] + '.html')
Ejemplo n.º 10
0
"""
Histogram with Responsive Bins
------------------------------
This shows an example of a histogram with bins that are responsive to a
selection domain. Click and drag on the bottom panel to see the bins
change on the top panel.
"""
# category: histograms
import altair as alt
from vega_datasets import data

source = data.flights_5k()

brush = alt.selection_interval(encodings=['x'])

base = alt.Chart(source).transform_calculate(
    time="hours(datum.date) + minutes(datum.date) / 60").mark_bar().encode(
        y='count():Q').properties(width=600, height=100)

alt.vconcat(
    base.encode(
        alt.X('time:Q',
              bin=alt.Bin(maxbins=30, extent=brush),
              scale=alt.Scale(domain=brush))),
    base.encode(alt.X('time:Q',
                      bin=alt.Bin(maxbins=30)), ).add_selection(brush))
Ejemplo n.º 11
0
# category: interactive

import altair as alt
from vega_datasets import data

scale = alt.Scale(
    domain=['sun', 'fog', 'drizzle', 'rain', 'snow'],
    range=['#e7ba52', '#c7c7c7', '#aec7e8', '#1f77b4', '#9467bd'])

brush = alt.selection_interval(encodings=['x'])

points = alt.Chart().mark_point().encode(
    alt.X('date:T', timeUnit='monthdate', axis=alt.Axis(title='Date')),
    alt.Y('temp_max:Q', axis=alt.Axis(title='Maximum Daily Temperature (C)')),
    color=alt.condition(brush,
                        'weather:N',
                        alt.value('lightgray'),
                        scale=scale),
    size=alt.Size('precipitation:Q',
                  scale=alt.Scale(range=[5, 200]))).properties(width=600,
                                                               height=400,
                                                               selection=brush)

bars = alt.Chart().mark_bar().encode(
    x='count(*):Q',
    y='weather:N',
    color=alt.Color('weather:N', scale=scale),
).transform_filter(brush.ref()).properties(width=600)

chart = alt.vconcat(points, bars, data=data.seattle_weather.url)
Ejemplo n.º 12
0
    def one_variable(self, df, variable, axis_title, color_scheme):
        base = alt.Chart(df).transform_filter(
            alt.datum.variable == variable).transform_filter(
                alt.datum.value > 0).encode(color=alt.Color(
                    'sum(value):Q',
                    title=None,
                    scale=alt.Scale(type='log', base=2, scheme=color_scheme)))

        heatmap = base.mark_rect().encode(x=alt.X('day(datum_date):O',
                                                  title=axis_title),
                                          y=alt.Y('day(bulletin_date):O',
                                                  title='Día boletín'),
                                          tooltip=[
                                              'variable',
                                              'day(bulletin_date):O',
                                              'day(datum_date):O',
                                              alt.Tooltip(field='value',
                                                          type='quantitative',
                                                          aggregate='sum')
                                          ])

        right = base.mark_bar().encode(x=alt.X('sum(value):Q',
                                               title=None,
                                               axis=None),
                                       y=alt.Y('day(bulletin_date):O',
                                               title=None,
                                               axis=None),
                                       tooltip=[
                                           'variable', 'day(bulletin_date):O',
                                           alt.Tooltip(field='value',
                                                       type='quantitative',
                                                       aggregate='sum')
                                       ])

        top = base.mark_bar().encode(x=alt.X('day(datum_date):O',
                                             title=None,
                                             axis=None),
                                     y=alt.Y('sum(value):Q',
                                             title=None,
                                             axis=None),
                                     tooltip=[
                                         'variable', 'day(datum_date):O',
                                         alt.Tooltip(field='value',
                                                     type='quantitative',
                                                     aggregate='sum')
                                     ])

        heatmap_size = 160
        histogram_size = 40
        return alt.vconcat(
            top.properties(
                width=heatmap_size,
                height=histogram_size,
                # This title should logically belong to the whole chart,
                # but assigning it to the concat chart anchors it wrong.
                # See: https://altair-viz.github.io/user_guide/generated/core/altair.TitleParams.html
                title=alt.TitleParams(text=variable,
                                      anchor='middle',
                                      align='center',
                                      fontSize=14,
                                      fontWeight='normal')),
            alt.hconcat(heatmap.properties(width=heatmap_size,
                                           height=heatmap_size),
                        right.properties(width=histogram_size,
                                         height=heatmap_size),
                        spacing=3),
            spacing=3)
Ejemplo n.º 13
0
def int_vega():
    rl_vio = doc(0)
    rl_vio["YEAR"] = rl_vio["YEAR"].astype("int")
    source = rl_vio[rl_vio["YEAR"] > 2015]
    source2 = source
    scale = alt.Scale(domain=[2016, 2017, 2018, 2019, 2020, 2021],
                      range=["#e7ba52", "#c7c7c7", "#aec7e8", "#659CCA", "#1f77b4", "#9467bd"])
    color = alt.Color('YEAR:O', scale=scale)
    click = alt.selection_multi(encodings=['color'])
    brush = alt.selection_interval()

    st.text("How about damage level? Click 'Show Damage Level'!")
    if st.button("Show Damage Level"):
        points = alt.Chart(source).mark_point().encode(
            alt.Tooltip(["YEAR:O", "MONTH:O", "sum(RECORDS):Q", "sum(INJURIES):Q"]),
            alt.X('MONTH:O', title='Month',
                  axis=alt.Axis(
                      offset=10,
                      labelAngle=0,
                      ticks=True,
                      minExtent=30,
                      grid=False
                  )
                  ),
            alt.Y('sum(RECORDS):Q',
                  scale=alt.Scale(domain=[0, 6000]),
                  axis=alt.Axis(
                      offset=10,
                      ticks=True,
                      minExtent=30,
                      grid=False,
                  ), title="Records"),
            color=alt.condition(brush, color, alt.value('darkgray')),
            size="DAMAGE:N"
        ).properties(
            width=650,
            height=400,
        ).add_selection(
            brush
        ).transform_filter(
            click
        )

        lines = alt.Chart(source2).mark_circle().encode(
            alt.Tooltip(["YEAR:O", "MONTH:O", "sum(RECORDS):Q", "sum(INJURIES):Q"]),
            alt.X('MONTH:O',title="Month",
                  axis=alt.Axis(
                      offset=10,
                      ticks=True,
                      labelAngle=0,
                      minExtent=30,
                      grid=False
                  )),
            alt.Y("sum(INJURIES):Q",
                  title="Injuries",
                  axis=alt.Axis(
                      offset=10,
                      ticks=True,
                      minExtent=30,
                      grid=False,
                  )), color=alt.condition(brush, color, alt.value('red')),
            size="DAMAGE:N"
        ).transform_filter(
            brush
        ).properties(
            width=650,
        ).add_selection(
            click
        )
        vega = alt.vconcat(
            points,
            lines,
            title="Cases VS Injured"
        )
    else:
        points = alt.Chart(source).mark_point().encode(
        alt.Tooltip(["YEAR:O", "MONTH:O", "sum(RECORDS):Q", "sum(INJURIES):Q"]),
        alt.X('MONTH:O', title='Month',
              axis=alt.Axis(
                  offset=10,
                  labelAngle=0,
                  ticks=True,
                  minExtent=30,
                  grid=False
              )
              ),
        alt.Y('sum(RECORDS):Q',
              scale=alt.Scale(domain=[0, 12000]),
              axis=alt.Axis(
                  offset=10,
                  ticks=True,
                  minExtent=30,
                  grid=False,
              ), title="Records"),
        color=alt.condition(brush, color, alt.value('darkgray')),
        size=alt.value(80)
    ).properties(
        width=750,
        height=400,
    ).add_selection(
        brush
    ).transform_filter(
        click
    )

    lines = alt.Chart(source2).mark_circle().encode(
        alt.Tooltip(["YEAR:O", "MONTH:O", "sum(RECORDS):Q", "sum(INJURIES):Q"]),
        alt.X('MONTH:O',title="Month",
              axis=alt.Axis(
                  offset=10,
                  ticks=True,
                  labelAngle=0,
                  minExtent=30,
                  grid=False
              )),
        alt.Y("sum(INJURIES):Q",
              title="Injuries",
              axis=alt.Axis(
                  offset=10,
                  ticks=True,
                  minExtent=30,
                  grid=False,
              )), color=alt.condition(brush, color, alt.value('red')),
        size= alt.value(80)
    ).transform_filter(
        brush
    ).properties(
        width=750,
    ).add_selection(
        click
    )
    vega = alt.vconcat(
        points,
        lines,
        title="Cases VS Injuries"
    )
    return vega
Ejemplo n.º 14
0
"""
Interval Selection Example
==========================

This is an example of creating a stacked chart for which the domain of the
top chart can be selected by interacting with the bottom chart.
"""
# category: area charts

import altair as alt
from vega_datasets import data

source = data.sp500.url

brush = alt.selection(type='interval', encodings=['x'])

upper = alt.Chart().mark_area().encode(
    alt.X('date:T', scale={'domain': brush.ref()}),
    y='price:Q'
).properties(
    width=600,
    height=200
)

lower = upper.properties(
    height=60
).add_selection(brush)

alt.vconcat(upper, lower, data=source)
Ejemplo n.º 15
0
def bv_areaPlot(data, engine, xlabel, ylabel1, ylabel2):
    data = data.copy() 
    data.rename(columns={'plotY':ylabel1, 'plotX1':ylabel2}, inplace=True)

    if engine == 'Static':
        fig, axes = plt.subplots(figsize=(9,6))

        _index = data.index.tolist()

        axes.fill_between(_index, data[ylabel1].values)
        axes.legend([ylabel1], loc=0)
        axes_r = axes.twinx()
        axes_r.fill_between(_index, data[ylabel2].values, color='orange')
        axes_r.legend([ylabel2], loc=0)

        axes.set_xlabel(xlabel, fontsize = 15)
        axes.set_ylabel(ylabel1, fontsize = 15)
        axes_r.set_ylabel(ylabel2, fontsize = 15)
        axes.grid(b=True, which='major', color='k', linewidth=0.25)
                
        plt.close()
        return pn.pane.Matplotlib(fig, tight=True)
    
    elif engine == 'Interactive':
        data=data.dropna()
        # Selection Brush
        brush = alt.selection(type='interval', encodings=['x'], name='isel')
        # Base Plot
        base = alt.Chart(data.reset_index())
        base = base.encode(x = alt.X('{0}:T'.format(data.index.name), title=''),
                        tooltip = ylabel1)
        base = base.properties(width = 580, height = 275)
        # Upper Plot
        upper1 = base.mark_area(line={'color':'#3d84ba'},
                                    color=alt.Gradient(
                                        gradient='linear',
                                        stops=[alt.GradientStop(color='white', offset=0),
                                            alt.GradientStop(color='#3d84ba', offset=1)],
                                        x1=1, x2=1,
                                        y1=1, y2=0
                                    ))
        upper1 = upper1.encode(x = alt.X('{0}:T'.format(data.index.name), scale=alt.Scale(domain=brush), title=''),
                               y = alt.Y('{0}:Q'.format(ylabel1), scale=alt.Scale(zero=False), axis=alt.Axis(format='~s')))
        upper2 = base.mark_area(line={'color':'#f57542'},
                                    color=alt.Gradient(
                                        gradient='linear',
                                        stops=[alt.GradientStop(color='white', offset=0),
                                            alt.GradientStop(color='#f57542', offset=1)],
                                        x1=1, x2=1,
                                        y1=1, y2=0
                                    ))
        upper2 = upper2.encode(x = alt.X('{0}:T'.format(data.index.name), scale=alt.Scale(domain=brush), title=''),
                            y = alt.Y('{0}:Q'.format(ylabel2), scale=alt.Scale(zero=False), axis=alt.Axis(format='~s')))
        # Lower Plot
        lower = base.mark_area(line={'color':'darkgray'},
            color=alt.Gradient(
                gradient='linear',
                stops=[alt.GradientStop(color='white', offset=0),
                    alt.GradientStop(color='darkgray', offset=1)],
                x1=1, x2=1,
                y1=1, y2=0
            ))

        lower = lower.encode(y=alt.Y('{0}:Q'.format(ylabel1), title='', axis=None))
        lower = lower.properties(height=20)
        lower = lower.add_selection(brush)
        lower.encoding.x.title = 'Interval Selection'

        # Base Statistics1
        base_stat1 = upper1.transform_filter(brush)
        base_stat1 = base_stat1.transform_aggregate(Mean1='mean({0})'.format(ylabel1),
                                                    StdDev1='stdev({0})'.format(ylabel1),
                                                    Var1='variance({0})'.format(ylabel1))
        label_stat1 = base_stat1.transform_calculate(stat_label1="'Mean = ' + format(datum.Mean1, '~s') + \
                                                    '; Standard Deviation = ' + format(datum.StdDev1, '~s') +\
                                                    '; Variance = ' + format(datum.Var1, '~s')")
        label_stat1 = label_stat1.mark_text(align='left', baseline='bottom', color='#3d84ba')
        label_stat1 = label_stat1.encode(x=alt.value(0.0), y=alt.value(12.0), text=alt.Text('stat_label1:N'))
        # Base Statistics2
        base_stat2 = upper2.transform_filter(brush)
        base_stat2 = base_stat2.transform_aggregate(Mean2='mean({0})'.format(ylabel2),
                                                    StdDev2='stdev({0})'.format(ylabel2),
                                                    Var2='variance({0})'.format(ylabel2))
        label_stat2 = base_stat2.transform_calculate(stat_label1="'Mean = ' + format(datum.Mean2, '~s') + \
                                                    '; Standard Deviation = ' + format(datum.StdDev2, '~s') +\
                                                    '; Variance = ' + format(datum.Var2, '~s')")
        label_stat2 = label_stat2.mark_text(align='left', baseline='bottom', color='#f57542')
        label_stat2 = label_stat2.encode(x=alt.value(0.0), y=alt.value(25.0), text=alt.Text('stat_label1:N'))

        upper1 = upper1 + label_stat1
        upper2 = upper2 + label_stat2
        upper = (upper1+upper2).resolve_scale(y='independent')

        ## Y LABEL 1
        # Values
        _ymean_uu1 = data[ylabel1].max()
        _ymean1 = data[ylabel1].mean()
        # Inspired from :- https://stats.stackexchange.com/a/350278
        _maxvar_in_slice1 = ((data[ylabel1].max()-data[ylabel1].min())/2)**2
        _ystd_uu1 = np.sqrt(_maxvar_in_slice1)
        _ystd1 = data[ylabel1].std()
        _yvar_uu1 = _maxvar_in_slice1
        _yvar1 = data[ylabel1].var()
        # Stat Bar Base
        stats_barbase1 = base_stat1.mark_bar(color='#3d84ba')
        stats_barbase1 = stats_barbase1.properties(width = 188, height = 20)
        # Mean Bar
        mean_bar1 = stats_barbase1.encode(x=alt.X('Mean1:Q', title='',
                                                scale=alt.Scale(domain=[-_ymean_uu1,_ymean_uu1]),
                                                axis=alt.Axis(format='~s')), y=alt.value(10.5))
        totmean_line1 = alt.Chart(pd.DataFrame({'x': [_ymean1]}))
        totmean_line1 = totmean_line1.mark_rule(color='red', size=5)
        totmean_line1 = totmean_line1.encode(x='x')
        mean_bar1 += totmean_line1
        # Standard Deviation Bar
        std_bar1 = stats_barbase1.encode(x=alt.X('StdDev1:Q', title='',
                                                scale=alt.Scale(domain=[-_ystd_uu1,_ystd_uu1]),
                                                axis=alt.Axis(format='~s')), y=alt.value(10.5))
        totstd_line1 = alt.Chart(pd.DataFrame({'x': [_ystd1]}))
        totstd_line1 = totstd_line1.mark_rule(color='red', size=5)
        totstd_line1 = totstd_line1.encode(x='x')
        std_bar1 += totstd_line1
        # Variance Bar
        var_bar1 = stats_barbase1.encode(x=alt.X('Var1:Q', title='',
                                                scale=alt.Scale(domain=[-_yvar_uu1,_yvar_uu1]),
                                                axis=alt.Axis(format='~s')), y=alt.value(10.5))
        totvar_line1 = alt.Chart(pd.DataFrame({'x': [_yvar1]}))
        totvar_line1 = totvar_line1.mark_rule(color='red', size=5)
        totvar_line1 = totvar_line1.encode(x='x')
        var_bar1 += totvar_line1

        ## Y LABEL 2
        # Values
        _ymean_uu2 = data[ylabel2].max()
        _ymean2 = data[ylabel2].mean()
        # Inspired from :- https://stats.stackexchange.com/a/350278
        _maxvar_in_slice2 = ((data[ylabel2].max()-data[ylabel2].min())/2)**2
        _ystd_uu2 = np.sqrt(_maxvar_in_slice2)
        _ystd2 = data[ylabel2].std()
        _yvar_uu2 = _maxvar_in_slice2
        _yvar2 = data[ylabel2].var()
        # Stat Bar Base
        stats_barbase2 = base_stat2.mark_bar(color='#f57542')
        stats_barbase2 = stats_barbase2.properties(width = 188, height = 20)
        # Mean Bar
        mean_bar2 = stats_barbase2.encode(x=alt.X('Mean2:Q', title='Mean',
                                                scale=alt.Scale(domain=[-_ymean_uu2,_ymean_uu2]),
                                                axis=alt.Axis(format='~s')), y=alt.value(10.5))
        totmean_line2 = alt.Chart(pd.DataFrame({'x': [_ymean2]}))
        totmean_line2 = totmean_line2.mark_rule(color='red', size=5)
        totmean_line2 = totmean_line2.encode(x='x')
        mean_bar2 += totmean_line2
        # Standard Deviation Bar
        std_bar2 = stats_barbase2.encode(x=alt.X('StdDev2:Q', title='Std',
                                                scale=alt.Scale(domain=[-_ystd_uu2,_ystd_uu2]),
                                                axis=alt.Axis(format='~s')), y=alt.value(10.5))
        totstd_line2 = alt.Chart(pd.DataFrame({'x': [_ystd2]}))
        totstd_line2 = totstd_line2.mark_rule(color='red', size=5)
        totstd_line2 = totstd_line2.encode(x='x')
        std_bar2 += totstd_line2
        # Variance Bar
        var_bar2 = stats_barbase2.encode(x=alt.X('Var2:Q', title='Var',
                                                scale=alt.Scale(domain=[-_yvar_uu2,_yvar_uu2]),
                                                axis=alt.Axis(format='~s')), y=alt.value(10.5))
        totvar_line2 = alt.Chart(pd.DataFrame({'x': [_yvar2]}))
        totvar_line2 = totvar_line2.mark_rule(color='red', size=5)
        totvar_line2 = totvar_line2.encode(x='x')
        var_bar2 += totvar_line2

        # Concatenated
        # p = alt.vconcat(upper+label_stat, mean_bar|std_bar|var_bar, lower).configure_concat(spacing=2)
        p = alt.vconcat(upper, mean_bar1|std_bar1|var_bar1, mean_bar2|std_bar2|var_bar2, lower).configure_concat(spacing=2)
        p = p.configure_axisLeft(labelColor = '#3d84ba', titleColor = '#3d84ba')
        p = p.configure_axisRight(labelColor = '#f57542', titleColor = '#f57542')

        return p
Ejemplo n.º 16
0
def main():

    # Extract data
    data = pd.read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")
    data["date"] = pd.to_datetime(data.date)
    data["week"] = data["date"].apply(lambda x: x - pd.Timedelta(days=x.weekday()))
    data["week"] = data["week"].dt.date.apply(lambda x: str(x))

    # Aggregate data
    # Weekly aggregation
    weekly_data = (
        data.groupby(["location", "week"])
        .agg(
            {
                "new_cases": "sum",
                "stringency_index": "max",
                "total_deaths": "max",
                "population": "max",
                "total_tests": "max",
            }
        )
        .reset_index()
        .dropna()
    )
    # Latest snapshot
    max_week = weekly_data.groupby("location").agg({"week": "max"}).reset_index()
    latest_data = weekly_data.merge(max_week)
    latest_data["death_per_population_pct"] = (
        latest_data["total_deaths"] * 100 / latest_data["population"]
    )
    latest_data["tests_per_population"] = (
        latest_data["total_tests"] / latest_data["population"]
    )

    list_of_countries = latest_data["location"]

    # Country dimension table
    country_dimension = (
        data.groupby(["location"])
        .agg(
            {
                "median_age": "max",
                "aged_65_older": "max",
                "aged_70_older": "max",
                "gdp_per_capita": "max",
                "cardiovasc_death_rate": "max",
                "diabetes_prevalence": "max",
                "female_smokers": "max",
                "male_smokers": "max",
                "handwashing_facilities": "max",
                "life_expectancy": "max",
                "human_development_index": "max",
            }
        )
        .reset_index()
    )

    country_dimension = country_dimension.merge(
        latest_data[["location", "death_per_population_pct", "tests_per_population"]],
        how="left",
    )

    # Plot dashboards
    st.title("Covid visualizer")
    st.markdown(
        "THis is an example eda of covid data solely using python scripts."
        "There are several interesting attributes to consider."
    )

    st.header("Stringency Index and Cases")
    st.subheader(
        "This is a composite measure based on nine response indicators including school closures, workplace closures, "
        "and travel bans, rescaled to a value from 0 to 100, 100 being strictest"
    )

    country_filter = st.selectbox("Select country", list_of_countries, index=0)
    st.markdown("There is some more explanation which I like to give")

    weekly_new_cases = (
        alt.Chart(weekly_data[weekly_data.location == country_filter])
        .mark_line()
        .encode(x="week:T", y="sum(new_cases)")
        .properties(width=1000, height=500)
    )
    weekly_stringency = (
        alt.Chart(weekly_data[weekly_data.location == country_filter])
        .mark_bar(color="#CFD8DC", opacity=0.5)
        .encode(x="week:T", y="mean(stringency_index)")
        .properties(width=1000, height=500)
    )

    cases_stringency_corr = (
        alt.Chart(weekly_data[weekly_data.location == country_filter])
        .mark_point()
        .encode(y="new_cases", x="stringency_index")
        .properties(width=300, height=300)
    )

    final = alt.layer(weekly_stringency, weekly_new_cases).resolve_scale(
        y="independent"
    )

    col1, col2 = st.beta_columns((3, 1))

    col1.write(final)
    col2.markdown(
        "THis is an example where I write additional data with many more analysis. Showing India as an example for visualizzation. There will be"
        "more and more text to explain how to inteact with this chart and pick a country of choice."
        "Add a lot more text to make the point clear and also how to take the data with a grain of salt."
    )
    col2.write(cases_stringency_corr)

    # Plot Section 2
    st.header("Death % against various socioeconomic variables")
    st.subheader("Scatterplot grid show several values")
    grid = alt.vconcat(
        alt.hconcat(
            plot_scatter_with_regression("median_age", source=country_dimension),
            plot_scatter_with_regression("gdp_per_capita", source=country_dimension),
        ),
        alt.hconcat(
            plot_scatter_with_regression(
                "cardiovasc_death_rate", source=country_dimension
            ),
            plot_scatter_with_regression(
                "diabetes_prevalence", source=country_dimension
            ),
        ),
        alt.hconcat(
            plot_scatter_with_regression(
                "handwashing_facilities", source=country_dimension
            ),
            plot_scatter_with_regression(
                "human_development_index", source=country_dimension
            ),
        ),
    )

    col1, col2 = st.beta_columns((3, 1))
    col1.write(grid)
    col2.markdown("Here is where the explanation goes by each chart")

    # Plot section 3

    st.header("Testing and gdc per capita")
    st.subheader("Scatterplot grid show several values")

    test_by_gdp = country_dimension[
        ["location", "gdp_per_capita", "tests_per_population"]
    ].dropna()

    test_by_gdp = test_by_gdp.sort_values('tests_per_population', ascending=False).iloc[:60, ]

    tests_chart = (
        alt.Chart(test_by_gdp)
        .mark_bar()
        .encode(
            x=alt.X("location", sort="-y"),
            y="tests_per_population",
            color="gdp_per_capita",
        )
    ).properties(height=500)

    st.write(tests_chart)
Ejemplo n.º 17
0
        legend=alt.Legend(title='Total Records')
    )
)

circ = rect.mark_point().encode(
    alt.ColorValue('grey'),
    alt.Size('count()',
        legend=alt.Legend(title='Records in Selection')
    )
).transform_filter(
    pts
)

bar = alt.Chart(source).mark_bar().encode(
    x='Major_Genre:N',
    y='count()',
    color=alt.condition(pts, alt.ColorValue("steelblue"), alt.ColorValue("grey"))
).properties(
    selection=pts,
    width=550,
    height=200
)

alt.vconcat(
    rect + circ,
    bar
).resolve_legend(
    color="independent",
    size="independent"
)
Ejemplo n.º 18
0
                def get_group_chart(grid_df,
                                    min_value: float,
                                    max_value: float,
                                    title: str = ''):
                    rcharts = list()
                    for gv in group_values:
                        bar_df = grid_df[grid_df[group_var] == gv]
                        rcharts.append(
                            get_bar_chart(bar_df, min_value, max_value))
                    return alt.hconcat(*rcharts,
                                       title=alt.TitleParams(title,
                                                             anchor='middle',
                                                             align='center',
                                                             orient='top'))

                chart = alt.vconcat()

                min_unit = 0.05
                for r, rv in enumerate(row_values):
                    rcharts = list()
                    row_df = to_inspect[to_inspect[row_var] == rv]
                    min_value = math.floor(row_df['value'].min() /
                                           min_unit) * min_unit  # type: ignore
                    max_value = math.ceil(row_df['value'].max() /
                                          min_unit) * min_unit  # type: ignore
                    for cv in col_values:
                        grid_df = to_inspect[(to_inspect[row_var] == rv)
                                             & (to_inspect[col_var] == cv)]
                        # Only add title at the top row.
                        title = f'{col_var} = {cv}' if r == 0 else ''
                        rcharts.append(
def make_plot(df):
    """
    Generates plots on tab 2 of the dashboard.
    Input - Data frame to be plotted
    Returns - Stock trend plot 
                slider
                monthly change chart
    """
    def mds_special():
        """
        Function for default MDS configuration  for labels, titles etc. 
        to be applied to altair plots
        """
        font = "Arial"
        axisColor = "#000000"
        gridColor = "#DEDDDD"
        return {
            "config": {
                "title": {
                    "fontSize": 24,
                    "font": font,
                    "anchor": "middle",  # equivalent of left-aligned.
                    "fontColor": "#000000"
                },
                'view': {
                    "height": 300,
                    "width": 400
                },
                "axisX": {
                    "domain": True,
                    #"domainColor": axisColor,
                    "gridColor": gridColor,
                    "domainWidth": 1,
                    "grid": False,
                    "labelFont": font,
                    "labelFontSize": 12,
                    "labelAngle": 0,
                    "tickColor": axisColor,
                    "tickSize":
                    5,  # default, including it just to show you can change it
                    "titleFont": font,
                    "titleFontSize": 16,
                    "titlePadding":
                    10,  # guessing, not specified in styleguide
                    "title": "X Axis Title (units)",
                },
                "axisY": {
                    "domain": False,
                    "grid": True,
                    "gridColor": gridColor,
                    "gridWidth": 1,
                    "labelFont": font,
                    "labelFontSize": 14,
                    "labelAngle": 0,
                    #"ticks": False, # even if you don't have a "domain" you need to turn these off.
                    "titleFont": font,
                    "titleFontSize": 16,
                    "titlePadding":
                    10,  # guessing, not specified in styleguide
                    "title": "Y Axis Title (units)",
                    # titles are by default vertical left of axis so we need to hack this
                    #"titleAngle": 0, # horizontal
                    #"titleY": -10, # move it up
                    #"titleX": 18, # move it to the right so it aligns with the labels
                },
            }
        }

    # register the custom theme under a chosen name
    alt.themes.register('mds_special', mds_special)
    # enable the newly registered theme
    alt.themes.enable('mds_special')
    #alt.themes.enable('none') # to return to default

    # Create a plot from the cars dataset
    highlight = alt.selection(type='single',
                              on='mouseover',
                              nearest=True,
                              fields=['company'])
    brush = alt.selection(type='interval', encodings=['x'])

    # stock history chart
    chart = alt.Chart(df).mark_line().encode(
        alt.X('date', title='Date', scale=alt.Scale(domain=brush)),
        alt.Y('price', title='Stock price (USD)'),
        color=alt.Color('company', title="Company"),
        size=alt.condition(~highlight, alt.value(3),
                           alt.value(5))).add_selection(highlight).properties(
                               title='Historical Stock Prices',
                               width=900,
                               height=350)

    bars = alt.Chart(df).mark_bar().encode(
        y=alt.Y('monthly_return',
                title='Monthly Change %',
                axis=alt.Axis(format='%')),
        x=alt.X('date', title='Month', scale=alt.Scale(domain=brush)),
        color=alt.condition(
            alt.datum.monthly_return > 0,
            alt.value("steelblue"),  # The positive color
            alt.value("orange"))).properties(
                width=470, title='Monthly price change (%)').transform_filter(
                    highlight).facet(facet='company',
                                     title="Monthly stock price % changes",
                                     columns=2)

    #  monthly change chart
    lower = alt.Chart(df).mark_line().encode(
        alt.X('date', title=' ', scale=alt.Scale(domain=brush)),
        alt.Y('price', title=' ', axis=None),
        color=alt.Color('company', title="Company"),
        size=alt.condition(~highlight, alt.value(3), alt.value(5))
    ).add_selection(highlight).properties(
        title=
        'Feel free to drag across a time period below to zoom in the chart!',
        height=60,
        width=900).add_selection(brush)

    return alt.vconcat(chart, lower) & bars
Ejemplo n.º 20
0
def main(_):
  print("Loading data...")
  dfs = []
  for filename in os.listdir(FLAGS.data):
    if filename.endswith(".csv"):
      dfs.append(
          pd.read_csv(os.path.join(FLAGS.data, filename), encoding="utf-8"))
  data = pd.concat(dfs)
  print("%d Examples" % (len(set(data["id"]))))
  print("%d Annotations" % len(data))
  os.makedirs(FLAGS.plot_dir)

  with open(FLAGS.emotion_file, "r") as f:
    all_emotions = f.read().splitlines()
  all_emotions_neutral = all_emotions + ["neutral"]
  emotion2idx = {e: i for i, e in enumerate(all_emotions)}
  print("%d emotion Categories" % len(all_emotions))

  print("Processing data...")

  # Remove neutral labels
  data = data[data["neutral"] == 0]

  # Remove examples with no ratings (difficult examples)
  data = data[data[all_emotions_neutral].sum(axis=1) != 0]

  # Convert into num_examples x num_raters x num_ratings format
  data = data.groupby("id").filter(lambda x: len(x) >= 3)
  id_groups = data.groupby("id")

  worker2examples = {}  # dict mapping worker ids to (example, rater id) tuples
  max_num_raters = data.groupby("id").size().max()
  ratings = np.zeros(
      (len(id_groups), max_num_raters, len(all_emotions)))  # ignore "neutral"
  rater_msk = np.zeros(
      (len(id_groups), max_num_raters))  # for masking out non-existent raters
  print("Ratings shape", ratings.shape)

  # Get ratings and rater mask
  texts = []
  for ex_idx, (_, g) in enumerate(id_groups):
    texts.append(g.iloc[0]["text"])
    rater_count = 0

    # iterate through workers
    for _, row in g.iterrows():
      for e in all_emotions:
        ratings[ex_idx, rater_count, emotion2idx[e]] = row[e]
        rater_msk[ex_idx, rater_count] = 1

      worker_id = row["rater_id"]
      if worker_id in worker2examples:
        worker2examples[worker_id].append((ex_idx, rater_count))
      else:
        worker2examples[worker_id] = [(ex_idx, rater_count)]
      rater_count += 1

  print("Calculating leave-out (partial) correlations...")
  partial_corr_per_rater = []
  corr_per_rater = []
  for worker_id in worker2examples:
    partial_corrs, corrs = LeaveOut(ratings, rater_msk, worker2examples,
                                    worker_id)
    if len(partial_corrs) < len(all_emotions):
      continue

    partial_corr_per_rater.append(partial_corrs)
    corr_per_rater.append(corrs)
  corr_per_rater = np.array(corr_per_rater)
  partial_corr_per_rater = np.array(partial_corr_per_rater)

  # Verify that there are no NaN values
  assert np.isnan(corr_per_rater).sum() == 0

  # Apply Wilcoxon signed rank test to test significance of each dimension
  p_vals = np.apply_along_axis(wilcoxon, 0, partial_corr_per_rater)[1]

  # Apply Bonferroni correction
  reject, corr_pvals, _, newalpha = multipletests(
      p_vals, alpha=0.05, method="bonferroni")
  print("Which dimensions to keep?")
  print(reject)
  print(corr_pvals)
  print(newalpha)

  print("Running PPCA on all the data...")
  # Take all raters and split them randomly
  x = []
  y = []
  rater_counts = rater_msk.sum(axis=1).astype(int)
  all_ratings_avg = []
  for i, ex in enumerate(ratings):
    # Get actual raters based on mask
    keep = []
    for worker_rating in ex[:rater_counts[i]]:
      keep.append(list(worker_rating))
    all_ratings_avg.append(list(np.array(keep).mean(axis=0)))

    # Shuffle raters randomly
    random.shuffle(keep)

    num_raters = len(keep)
    x.append(list(np.array(keep[:int(num_raters / 2)]).mean(axis=0)))
    y.append(list(np.array(keep[int(num_raters / 2):]).mean(axis=0)))

  x = np.array(x)
  y = np.array(y)
  all_ratings_avg = np.array(all_ratings_avg)
  w, v = PPCA(x, y)  # final components (p-values determine which ones to keep)

  print("Plotting percentage of covariance explained...")
  PlotCovar(v)

  # Apply varimax rotation
  w_vari = Varimax(w)

  # Get mapping between ppcs and emotions
  map_df = pd.DataFrame(
      w_vari, index=all_emotions, columns=np.arange(len(all_emotions))).round(4)
  # Sort to move values to diagonal
  map_df = map_df[list(
      np.argsort(map_df.apply(lambda x: pd.Series.nonzero(x)[0]).values)[0])]
  f = plt.figure(figsize=(10, 6), dpi=300)
  sns.heatmap(
      map_df,
      center=0,
      cmap=sns.diverging_palette(240, 10, n=50),
      yticklabels=all_emotions)
  plt.xlabel("Component")
  plt.savefig(
      FLAGS.plot_dir + "/component_loadings.pdf",
      dpi=600,
      format="pdf",
      bbox_inches="tight")
  ppc2emotion = map_df.abs().idxmax().to_dict()
  emotion2ppc = {e: i for i, e in ppc2emotion.items()}
  print(ppc2emotion)

  print("Plotting frequency and mean left-out rater correlations...")
  corr_mean = corr_per_rater.mean(axis=0)
  corr_mean_ordered = [corr_mean[emotion2ppc[e]] for e in all_emotions]
  df_plot = pd.DataFrame({
      "emotion": all_emotions,
      "agreement": corr_mean_ordered
  })
  df_plot["count"] = df_plot["emotion"].map(
      data[all_emotions].sum(axis=0).to_dict())
  df_plot.sort_values("count", ascending=False, inplace=True)
  df_plot.to_csv(FLAGS.plot_dir + "/emotion_agreements.csv", index=False)

  # Get colors
  norm = plt.Normalize(df_plot["agreement"].min(), df_plot["agreement"].max())
  sm = plt.cm.ScalarMappable(cmap="BuPu", norm=norm)
  sm.set_array([])

  # Generate figure
  fig = plt.figure(dpi=600, figsize=(5, 6))
  ax = sns.barplot(
      data=df_plot,
      y="emotion",
      x="count",
      orient="h",
      hue="agreement",
      palette="BuPu",
      dodge=False,
      edgecolor="black",
      linewidth=1)
  ax.get_legend().remove()
  ax.figure.colorbar(sm)
  plt.text(18000, 31, "Interrater\nCorrelation", ha="center")
  plt.xlabel("Number of Examples")
  plt.ylabel("")
  plt.draw()
  labels = [item.get_text() for item in ax.get_xticklabels()]
  ax.set_xticklabels(["%dk" % (int(int(label) / 1000)) for label in labels])
  plt.tight_layout()
  fig.savefig(
      FLAGS.plot_dir + "/label_distr_agreement.pdf",
      dpi=600,
      format="pdf",
      bbox_inches="tight")

  print("Generating t-SNE plot...")
  # Get PPC scores for all examples
  all_ratings_avg = Demean(all_ratings_avg)  # demean all ratings
  ppc_scores = all_ratings_avg.dot(w_vari)  # project onto ppcs
  ppc_scores_abs = np.absolute(ppc_scores)

  # Load maximally distinct colors
  colors = pd.read_csv(
      FLAGS.rgb_colors, sep="\t", header=None, names=np.arange(3))

  # Set colors (todo(ddemszky): add names to colors in file)
  palette_rgb = colors.values
  with open(FLAGS.emotion_color_order) as f:
    color_order = f.read().splitlines()
  ppc2color = {emotion2ppc[e]: i for i, e in enumerate(color_order)}
  # get rgb value for each example based on weighted average of top emotions
  rgb_vals = []
  hex_vals = []
  top_categories = []
  threshold = 0.5  # exclude points not loading on any of the top 10 categories
  counter = 0
  rgb_max = 255
  other_color = palette_rgb[len(all_emotions), :]
  for i, scores in enumerate(ppc_scores_abs):

    top_ppcs = [
        idx for idx in (-scores).argsort()[:2] if scores[idx] > threshold
    ]
    top_emotions = ",".join([ppc2emotion[idx] for idx in top_ppcs
                            ]) if top_ppcs else "other"
    top_categories.append(top_emotions)
    if len(top_ppcs) < 1:  # doesn't have top emotions from list
      color = other_color  # use grey
      counter += 1
    else:
      # Weighted average of top emotions (square->weighted average->square root)
      color_ids = [ppc2color[idx] for idx in top_ppcs]
      weights = [scores[idx] for idx in top_ppcs]
      # Need to round, otherwise floating point precision issues will result
      # in values slightly above 1
      avg = np.round(
          np.sqrt(
              np.average(
                  np.power(palette_rgb[color_ids] * rgb_max, 2),
                  axis=0,
                  weights=weights)) / rgb_max, 4)
      if (avg > 1).sum() > 0:
        print(avg)
      color = avg
    rgb_vals.append(list(color))
    hex_vals.append("#%02x%02x%02x" %
                    tuple(np.array(color * rgb_max, dtype=int)))
  rgb_vals = np.array(rgb_vals)

  # Create t-SNE model
  tsne_model = TSNE(
      perplexity=30,
      n_components=2,
      n_iter=1000,
      random_state=23,
      learning_rate=500,
      init="pca")
  new_values = tsne_model.fit_transform(ppc_scores)
  x = []
  y = []
  for value in new_values:
    x.append(value[0])
    y.append(value[1])
  # Put data in dataframe
  df = pd.DataFrame({
      "x": x,
      "y": y,
      "color": hex_vals,
      "label(s)": top_categories,
      "text": texts
  })

  df = df[df["label(s)"] != "other"]
  df["top_label"] = df["label(s)"].str.split(",").str[0]

  # Two selections:
  # - a brush that is active on the top panel
  # - a multi-click that is active on the bottom panel
  brush = alt.selection(type="interval")
  click = alt.selection_multi(encodings=["color"])

  sample = df.sample(5000)  # max 5000 examples can be plotted
  points = alt.Chart(sample).mark_point(
      filled=True, size=50).encode(
          x="x:Q",
          y="y:Q",
          color=alt.Color("color", scale=None),
          tooltip=["label(s)", "text"]).properties(
              width=700, height=600).add_selection(brush)

  # Bottom panel is a bar chart
  bars = alt.Chart(sample).mark_bar().encode(
      x="count()",
      y="top_label:N",
      color=alt.condition(click, alt.Color("color:N", scale=None),
                          alt.value("lightgray")),
  ).transform_filter(brush.ref()).properties(
      width=700, selection=click)

  chart = alt.vconcat(
      points, bars, data=sample, title="t-SNE Projection of Examples")

  chart.save(FLAGS.plot_dir + "/tsne.html", format="html")
Ejemplo n.º 21
0
def macd_chart(source, width=900, height=600):
    """
    Calculates the MACD (DIF) and OSC. The below logic
    applies and should be used to signal:
    
    1. When DIF and DEA are positive, the MACD line passes (exceeds) the
     OSC line going upwards, and the divergence is positive,
     there is a buy signal confrmation.
    2. When DIF and DEA are negative, the MACD line exceeds the OSC line
     going downwards, and the divergence is negative, there is a
     sell signal confrmation.
    
    Args:
        source (pandas.DataFrame): Stock data with EMA columns.
        
        width (int): Chart width in pixels.
        
        height (int): Total chart height in pixels.
    
    Returns:
        (altair.vegalite.v4.api.Chart): Altair chart object.
    """
    # Common axis zoom selector for both charts.
    zoom = alt.selection_interval(bind='scales', encodings=['x'])

    # Coloration based on closing price being higher or
    # lower than opening price.
    open_close_color = alt.condition("datum.MACD > datum.DEA",
                                     alt.value("#06982d"),
                                     alt.value("#ae1325"))

    # Get MACD and signal line in correct form.
    df_list = []
    for d in ['MACD', 'DEA']:
        df_temp = source[['Date', d]].copy()
        df_temp = df_temp.rename(columns={d: 'VALUE'})
        df_temp['Label'] = d
        df_list.append(df_temp)
    df1 = pd.concat(df_list)

    macd_lines = alt.Chart(df1)\
                .properties()\
                .mark_line()\
                .encode(
                    x='Date:T',
                    y='VALUE',
                    color='Label',
                    opacity=alt.value(0.8))

    # Colored bars for MACD.
    bar = alt.Chart(source).mark_bar()\
            .properties(width=width, height=int(height*0.3))\
            .encode(
                x='Date:T',
                y='OSC',
                color=open_close_color)\
            .add_selection(zoom)

    # Candlestick chart.
    candle = candlestick_chart(source, width=width, height=int(0.7 * height))

    # Add EMA_12 and EMA_26 to the Candlestick chart.
    df_list = []
    for d in ['EMA_12', 'EMA_26']:
        df_temp = source[['Date', d]].copy()
        df_temp = df_temp.rename(columns={d: 'VALUE'})
        df_temp['Label'] = d
        df_list.append(df_temp)
    df2 = pd.concat(df_list)

    ema_lines = alt.Chart(df2)\
                .properties()\
                .mark_line()\
                .encode(
                    x='Date:T',
                    y='VALUE',
                    color='Label')\
                .add_selection(zoom)

    return alt.vconcat(candle + ema_lines, bar + macd_lines)
Ejemplo n.º 22
0
    def interactive_plot(self,
                         element='Cu',
                         display=True,
                         save=False,
                         filename='altair_dashboard.html',
                         base_dir=None):
        # Only select pixels that have Cu values
        cu_points = self.data[self.data[element].notnull()]
        alt.data_transformers.disable_max_rows()
        cu_points['num_present_elements'] = cu_points['element_group'].apply(
            lambda x: len(x.split('|')))

        #unpivot the table into a new table with ['pixel', 'element', 'concentration']
        cu_points_melted = cu_points.melt(id_vars='element_group',
                                          value_vars=self.elements,
                                          var_name='element',
                                          value_name='concentration')

        selection = alt.selection_multi(fields=['element_group'])

        #base bar chart
        bar_base = alt.Chart(cu_points_melted).transform_filter(
            selection).encode(y='element',
                              x='sum(concentration)',
                              text=alt.Text('sum(concentration)',
                                            format='0.2f'))

        #function to create scatter plot based on element
        scatter_base = lambda e: alt.Chart(cu_points).mark_circle().encode(
            x=element,
            y=e,
            tooltip=self.elements,
            color=alt.condition(selection,
                                'element_group:N',
                                alt.value('lightgray'),
                                legend=None)).add_selection(selection)

        counts = alt.Chart(cu_points).transform_filter(
            selection).transform_aggregate(
                count='count()').transform_calculate(
                    text="number of points selected: " +
                    alt.datum.count).mark_text(dy=-20,
                                               baseline="top",
                                               align="left").encode(
                                                   x=alt.value(100),
                                                   y=alt.value(5),
                                                   text='text:N',
                                               )

        # legend plot
        legend = alt.Chart(cu_points).mark_circle(size=100).encode(
            x='element_group:N',
            color=alt.condition(
                selection, 'element_group:N',
                alt.value('lightgray'))).add_selection(selection)

        #bar plots
        bar_final = bar_base.mark_bar()
        bar_text = bar_base.mark_text(align='left', baseline='middle', dx=3)

        #scatter plots
        scatters = alt.vconcat(*[
            alt.hconcat(*[(scatter_base(e).properties(width=300, height=150) +
                           counts) for e in self.elements[i:i + 3]
                          if e != 'cu'])
            for i in range(0,
                           len(self.elements) - 1, 3)
        ])

        #final plot, put together with altair's fancy syntax
        final_plot = alt.vconcat(scatters,
                                 legend | (bar_final + bar_text),
                                 center=True)

        if save:
            if base_dir is None:
                raise ValueError('base_dir cannot be None if saving')
            final_plot.save(os.path.join(base_dir, filename))

        if display:
            return final_plot
Ejemplo n.º 23
0
===================
This chart shows an example of using an interval selection to filter the
contents of an attached histogram, allowing the user to see the proportion
of items in each category within the selection.
"""
# category: interactive charts
import altair as alt
from vega_datasets import data

cars = data.cars.url

brush = alt.selection(type='interval')

points = alt.Chart().mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color=alt.condition(brush, 'Origin:N', alt.value('lightgray'))
).properties(
    selection=brush
)

bars = alt.Chart().mark_bar().encode(
    y='Origin:N',
    color='Origin:N',
    x='count(Origin):Q'
).transform_filter(
    brush.ref()
)

alt.vconcat(points, bars, data=cars)
Ejemplo n.º 24
0
                         legend=alt.Legend(title="Clade"))

#Encoding Size
size_encode = alt.Size('value:Q', legend=alt.Legend(title="% Prevalence"))

# ==== CHART CREATION ====

main_chart = alt.Chart(cd_data).mark_circle().encode(x=x_encode,
                                                     y=y_encode,
                                                     color=color_encode,
                                                     size=size_encode)

sum_chart = alt.Chart(sum_data).mark_bar().encode(
    x='Species:O',
    y=alt.Y('Total Disease:Q', title='% with disease'),
    color=color_encode,
).properties(height=100)

# combine the two charts into one image

full_chart = alt.vconcat(sum_chart, main_chart).configure(background="white")

# save graph

full_chart.save('../figures/summarydata.png')

#open image of chart

img = Image.open('../figures/summarydata.png')
img.show()
Ejemplo n.º 25
0
def describe_cat_var(dataframe, cat_vars, n_cols=3):
    """
    This function will take dataframe and categorical variable names and will
    plot the histogram of each categorical variable

    Parameters
    -----------
    dataframe: `pandas.DataFrame`
        The dataframe whose EDA analysis is to be performed
    cat_vars: `list`
        A list containing names of categorical variables
    n_cols: `int`, optional
        A number indicating how many plots should be displayed in a row

    Returns
    --------
    `altair`
        a grid of altair plot containing all histograms

    Examples
    ---------
    >>> X= pandas.DataFrame({
    'type':['Car','Bus','Car']
    'height':[10,20,30]
    })
    >>> cat_vars = ['type']
    >>> describe_cat_variable(X,cat_vars)
    """

    # Checking for valid inputs
    if not isinstance(dataframe, pd.DataFrame):
        raise Exception("The value of the argument 'dataframe' must be " +
                        "of type 'pandas.DataFrame'")

    if not isinstance(cat_vars, list) or \
            not all(isinstance(x, str) for x in cat_vars):
        raise Exception("The value of the argument 'cat_vars' must be " +
                        "a list of strings")

    if not isinstance(n_cols, int) or n_cols <= 0:
        raise Exception("The value of the argument 'n_cols' must be " +
                        "a positive non zero integer")

    col_set = set(dataframe.columns)
    col_subset = set(cat_vars)
    if not col_subset.issubset(col_set):
        raise Exception("The input categorical column names must belong to " +
                        "the dataframe")

    dataframe = dataframe.dropna()
    data = dataframe[col_subset]
    n = len(cat_vars)
    n_cols = n_cols
    n_rows = int(np.ceil(n / n_cols))
    z = 0

    # Plotting the histograms in loop
    for i in range(n_rows):
        for j in range(n_cols):
            if z < n:
                cols = cat_vars[z]
            else:
                break
            hist = alt.Chart(data).mark_bar(width=40).encode(
                x=alt.X(cols + ':O'),
                y='count()').properties(height=200,
                                        width=300,
                                        title='Histogram of ' + cat_vars[z])
            z = z + 1
            if j == 0:
                row_plot = hist
            else:
                row_plot = alt.hconcat(row_plot, hist)
        if i == 0:
            plot = row_plot
        else:
            plot = alt.vconcat(plot, row_plot)

    return plot
Ejemplo n.º 26
0
 plt = altair.vconcat(
     altair.Chart(change).mark_bar().encode(
         y=altair.Y(
             'Postcode District:N',
             title='Postcode District (highest %s at top, lowest at bottom)' %name,
             sort=altair.SortField(
                 order,
                 'descending'
             )
         ),
         x=altair.X('Change:Q', title='New vaccinations this week'),
         color=altair.Color(
             'Colour',
             scale=altair.Scale(
                 range=['grey','blue','orange'],
                 domain=['None','Regional Centre','Mobile Clinic'],
             ),
             legend=altair.Legend(title='')
         ),
     ).properties(
         height=1000,
         width=450,
         title='NI COVID-19 Vaccinations last week by Postcode District'
     )
 ).properties(
     title=altair.TitleParams(
         ['Vaccinations data from HSCNI COVID-19 dashboard, mid-2018 populations from NISRA',
         'Mobile vaccination clinic locations for last week from nidirect',
         'https://twitter.com/ni_covid19_data on %s' %datetime.datetime.now().strftime('%A %-d %B %Y')],
         baseline='bottom',
         orient='bottom',
         anchor='end',
         fontWeight='normal',
         fontSize=10,
         dy=10
     ),
 )
Ejemplo n.º 27
0
                "count": uniques,
                "pct": uniques / df_len,
            })
        else:
            head_sr = uniques.head(half_lim_values)
            tail_sr = uniques.tail(half_lim_values)
            remainder = uniques[half_lim_values:-half_lim_values]
            counts = np.concatenate((head_sr, [remainder.sum()], tail_sr))
#                 counts = pd.concat((head_sr, [remainder.sum()], tail_sr))
            chart_df = pd.DataFrame({
                "value": head_sr.index.to_list() + ["..[Others].."] + tail_sr.index.to_list(),
                "count": counts,
                "pct": counts / df_len,
            })
#         display(chart_df.head())
        bars = alt.Chart(chart_df, title=f"Field: {col}").mark_bar().encode(
            x=alt.X("pct:Q", axis=alt.Axis(format='%'), title="Percentage of records"),
            y=alt.Y("value:N", sort=None, title="Value"),
        )
        text = bars.mark_text(
            align="left",
            baseline="middle",
            dx=3  # Nudges text to right so it doesn't appear on top of the bar
        ).encode(text="count:Q")
        charts.append(bars + text)

chart = alt.vconcat(
    *charts,
    title=f"Most- and least-common values per categorical field (from {df_len} total records)",
)
Ejemplo n.º 28
0
    alt.Y('Miles_per_Gallon', type='quantitative'),
    alt.Color('Origin', type='nominal')
)


base = alt.Chart(cars).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
).properties(
    width=150,
    height=150
)

alt.vconcat(
   base.encode(color='Cylinders:Q').properties(title='quantitative'),
   base.encode(color='Cylinders:O').properties(title='ordinal'),
   base.encode(color='Cylinders:N').properties(title='nominal'),
)


# ---

pop = data.population.url

base = alt.Chart(pop).mark_bar().encode(
    alt.Y('mean(people):Q', title='total population')
).properties(
    width=200,
    height=200
)
Ejemplo n.º 29
0
def showRatingDistribution(data, name=''):
    """Create an interaactive visualization showing the distribution of ratings
        
        Args:
        data (DataFrame): the input data frame that must at least consists
        two columns 'name' and 'rating' for app names and ratings.
        name (str): the name of the platform (optional) to be displayed.
        
        Return:
        Chart: an Altair chart object that corresponds to the visualization
        """
    ## The color expression for highlighting the bar under mouse
    color_expression = "highlight._vgsid_==datum._vgsid_"
    color_condition = alt.ConditionalPredicateValueDef(color_expression,
                                                       "SteelBlue")

    ## There are two types of selection in our chart:
    ## (1) A selection for highlighting a bar when the mouse is hovering over
    highlight_selection = alt.selection_single(name="highlight",
                                               empty="all",
                                               on="mouseover")

    ## (2) A selection for updating the rating distribution when the mouse is clicked
    ## Note the encodings=['y'] parameter is needed to specify that once a selection
    ## is triggered, it will propagate the encoding channel 'y' as a condition for
    ## any subsequent filter done on this selection. In short, it means use the data
    ## field associated with the 'y' axis as a potential filter condition.
    rating_selection = alt.selection_single(name="PROVIDER",
                                            empty="all",
                                            encodings=['y'])

    ## We need to compute the max count to scale our distribution appropriately
    maxCount_BORO = int(data['BORO'].value_counts().max())
    maxCount_SSID = int(data['PROVIDER'].value_counts().max())

    ## Our visualization consists of two bar charts placed side by side. The first one
    ## sorts the apps by their average ratings as below. Note the compound selection
    ## that is constructed by adding the two selections together.
    barMean = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
                alt.Y('BORO:O', axis=alt.Axis(title="Location of Hotspot"),
                      sort=alt.SortField(field="BORO", op="count", order='descending'),
                      ),
                alt.X("count()", axis=alt.Axis(title="Number of Hotspot"),
                      scale = alt.Scale(domain=(0,maxCount_BORO)),
                      ),
                alt.ColorValue("LightGrey", condition=color_condition),
                ).properties(
                             selection = highlight_selection+rating_selection
                             )

    ## The second one uses the selected app specified by the rating_selection
    ## to filter the data, and build a histogram based on the ratings. Note
    ## the use of rating_selection.ref() as a condition for transform_filter().
    ## The scale was explicitly constructed for the X axis to fill out the
    ## the potential empty values, e.g. no one gave an app a score of 3, but
    ## we still want to show 1, 2, 3, 4, and 5 in the axis (but not in with .5).
    barRating = alt.Chart() \
        .mark_bar(stroke="Black") \
        .encode(
                alt.X("PROVIDER:O", axis=alt.Axis(title="PROVIDER"),
                      sort=alt.SortField(field="PROVIDER", op="count", order='descending'),
                      ),
                alt.Y("count()", axis=alt.Axis(title="Number of Hotspot"),
                      scale=alt.Scale(domain=(0,maxCount_SSID)),
                      ),
                alt.ColorValue("LightGrey"),
                ).properties(
                             selection = highlight_selection
                             ).transform_filter(
                                                rating_selection.ref()
                                                )

    states = "https://raw.githubusercontent.com/hvo/datasets/master/nyc_zip.geojson"

    # US states background
    background = alt.Chart(states).mark_geoshape(
        fill='lightgray',
        stroke='white').properties(title='Map', width=500,
                                   height=500).project('albersUsa')

    points = alt.Chart(data).mark_point(filled=True, size=200).encode(
        longitude='LON:Q',
        latitude='LAT:Q',
        color=alt.value('SteelBlue'),
        size=alt.value(30)).transform_filter(rating_selection.ref())

    ## We just need to concatenate the plots horizontally, and return the result.
    return alt.hconcat(alt.vconcat(
        barMean,
        barRating,
        data=data,
        title="{} Hotspot Distribution".format(name)), (background + points),
                       data=data)
    'https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-09/firsts.csv'
)
firsts.to_csv('/Users/vivekparashar/Downloads/firsts.csv')

# Create/Convert a pandas dataframe to dplython df
firsts = DplyFrame(firsts)

firsts.columns
firsts.gender.unique()
firsts.category.unique()

# firsts df summary by category
t1 = (firsts >> mutate(year_grp=((X.year / 10).round()) * 10) >> group_by(
    X.year_grp, X.category) >> summarize(nrows=X.accomplishment.count()))
c1 = alt.Chart(t1).mark_circle().encode(x='year_grp:O',
                                        y='category:O',
                                        size='nrows:Q')
c3 = alt.Chart(t1).mark_bar().encode(x='year_grp', y='nrows', color='category')
# firsts df summary by gender
t2 = (firsts >> mutate(year_grp=((X.year / 10).round()) * 10) >> group_by(
    X.year_grp, X.gender) >> summarize(nrows=X.accomplishment.count()))
c2 = alt.Chart(t2).mark_circle().encode(x='year_grp:O',
                                        y='gender:O',
                                        size='nrows:Q')

chart = alt.vconcat(c2, c1, c3)

chart.save(
    '/Users/vivekparashar/OneDrive/OneDrive-GitHub/Challenges-and-Competitions/TidyTuesday/Data/2020-11-17/chart.png',
    scale_factor=2.0)
def plot_planning(planning, need, timeline):
    # Plot graph - Requirement
    source = need.copy()
    source = source.rename(columns={0: "Hours"})
    source["Date"] = source.index

    bars_need = (
        alt.Chart(source)
            .mark_bar()
            .encode(
            y="Hours:Q",
            column=alt.Column("Date:N"),
            tooltip=["Date", "Hours"],
        )
            .interactive()
            .properties(
            width=550 / len(timeline) - 22,
            height=75,
            title='Requirement',
        )
    )

    # Plot graph - Optimized planning
    source = planning.filter(like="Total hours", axis=0).copy()
    source["Date"] = list(source.index.values)
    source = source.rename(columns={"Solution": "Hours"}).reset_index()
    source[["Date", "Line"]] = source["Date"].str.split(",", expand=True)
    source["Date"] = source["Date"].str.split("[").str[1]
    source["Line"] = source["Line"].str.split("]").str[0]
    source["Min capacity"] = 7
    source["Max capacity"] = 12
    source = source.round({"Hours": 1})
    source["Load%"] = pd.Series(
        ["{0:.0f}%".format(val / 8 * 100) for val in source["Hours"]],
        index=source.index,
    )

    bars = (
        alt.Chart(source)
            .mark_bar()
            .encode(
            x="Line:N",
            y="Hours:Q",
            column=alt.Column("Date:N"),
            color="Line:N",
            tooltip=["Date", "Line", "Hours", "Load%"],
        )
            .interactive()
            .properties(
            width=550 / len(timeline) - 22,
            height=150,
            title="Optimized Production Schedule",
        )
    )

    chart = alt.vconcat(bars, bars_need)
    chart.save("planning_time_model3.html")

    dp.Report(dp.Plot(chart, caption="Production schedule model 3 - Time")).publish(
        name="Optimized production schedule model 3 - Time",
        description="Optimized production schedule model 3 - Time",
        open=True,
        visibily="PUBLIC",
    )
Ejemplo n.º 32
0
def altair_frozen_weights_performance_ridge_plot(data, xaxis_title = "Dev Metric", title_main = "Dense Variably Unfrozen", task_name = "MSR", 
    step_all = 75, width_all = 600, step_small = 30, width_small = 400, overlap = 1, max_bins = 30, color_scheme = 'redyellowblue', return_all = True):

    assert type(data) is pd.core.frame.DataFrame, "Parameter `data` must be of type pandas.core.frame.DataFrame."
    assert all(e in data.columns.to_list() for e in ['Frozen Weights Pct', 'Epoch', 'Dev Metric']), "Parameter `data` must contain the following columns: ['Frozen Weights Pct', 'Epoch', 'Dev Metric']."

    # generate the combined epochs plot
    domain_ = [min(data['Dev Metric']), max(data['Dev Metric'])]
    c0 = alt.Chart(data, height=step_all)\
        .transform_joinaggregate(mean_acc='mean(Dev Metric)', groupby=['Frozen Weights Pct'])\
        .transform_bin(['bin_max', 'bin_min'], 'Dev Metric', bin=alt.Bin(maxbins=max_bins))\
        .transform_aggregate(value='count()', groupby=['Frozen Weights Pct', 'mean_acc', 'bin_min', 'bin_max'])\
        .transform_impute(impute='value', groupby=['Frozen Weights Pct', 'mean_acc'], key='bin_min', value=domain_[0])\
        .mark_area(interpolate='monotone', fillOpacity=0.8, stroke='lightgray', strokeWidth=0.5)\
        .encode(
            alt.X('bin_min:Q', bin='binned', title=xaxis_title, scale=alt.Scale(domain=domain_)),
            alt.Y('value:Q', scale=alt.Scale(range=[step_all, -step_all * overlap]), axis=None),
            alt.Fill('mean_acc:Q', legend=None,scale=alt.Scale(domain=[sum(x) for x in zip(domain_[::-1], [-0.05, 0.05])], scheme=color_scheme)))\
        .properties(width = width_all, height = step_all)\
        .facet(
            row=alt.Row(
                'Frozen Weights Pct:O',
                title='Forzen Weights Pct (Binned)',
                header=alt.Header(
                    labelAngle=0, labelAlign='right', labelFontSize=15, labelFont='Lato', labelColor=berkeley_palette['pacific'], titleFontSize=20
                )
            )
        ).properties(title={'text':title_main, 'subtitle': " ".join([task_name,"- All Epochs"])}, bounds='flush')
        

    # if not returning all plots, then return the main "All Epochs" plot
    if not (return_all):
        return c0.configure_facet(spacing=0).configure_view(stroke=None).configure_title(anchor='middle')
    
    # generate the individual epochs plots
    subplots = [None] * 4
    for i in range(1,5):

        domain_ = [min(data[(data['Epoch'] == i)]['Dev Metric']), max(data[(data['Epoch'] == i)]['Dev Metric'])]

        o = alt.Chart(data[(data['Epoch'] == i)], height=step_small)\
            .transform_joinaggregate(mean_acc='mean(Dev Metric)', groupby=['Frozen Weights Pct'])\
            .transform_bin(['bin_max', 'bin_min'], 'Dev Metric', bin=alt.Bin(maxbins=max_bins))\
            .transform_aggregate(value='count()', groupby=['Frozen Weights Pct', 'mean_acc', 'bin_min', 'bin_max'])\
            .transform_impute(impute='value', groupby=['Frozen Weights Pct', 'mean_acc'], key='bin_min', value=domain_[0])\
            .mark_area(interpolate='monotone', fillOpacity=0.8, stroke='lightgray', strokeWidth=0.5)\
            .encode(
                alt.X('bin_min:Q', bin='binned', title=xaxis_title, scale=alt.Scale(domain=domain_)),
                alt.Y('value:Q', scale=alt.Scale(range=[step_small, -step_small * overlap]), axis=None),
                alt.Fill('mean_acc:Q', legend=None, scale=alt.Scale(domain=[sum(x) for x in zip(domain_[::-1], [-0.05, 0.05])], scheme=color_scheme)))\
            .properties(width = width_small, height = step_small)\
            .facet(
                row=alt.Row(
                    'Frozen Weights Pct:O',
                    title='Forzen Weights Pct (Binned)',
                    header=alt.Header(
                        labelAngle=0, labelAlign='right', labelFontSize=15, labelFont='Lato', labelColor=berkeley_palette['pacific'], titleFontSize=20
                    )
                )
            ).properties(title={'text':title_main, 'subtitle': " ".join([task_name, "- Epoch", str(i)])}, bounds='flush')

        subplots[i-1] = o

    viz = alt.hconcat(alt.vconcat(alt.hconcat(subplots[0], subplots[1]), alt.hconcat(subplots[2], subplots[3])), c0)\
        .configure_facet(spacing=0)\
        .configure_view(stroke=None)\
        .configure_title(anchor='middle')

    return viz
Ejemplo n.º 33
0
===================
This chart shows an example of using an interval selection to filter the
contents of an attached histogram, allowing the user to see the proportion
of items in each category within the selection.
"""
# category: interactive charts
import altair as alt
from vega_datasets import data

source = data.cars()

brush = alt.selection(type='interval')

points = alt.Chart().mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color=alt.condition(brush, 'Origin:N', alt.value('lightgray'))
).add_selection(
    brush
)

bars = alt.Chart().mark_bar().encode(
    y='Origin:N',
    color='Origin:N',
    x='count(Origin):Q'
).transform_filter(
    brush
)

alt.vconcat(points, bars, data=source)
Ejemplo n.º 34
0
    color=alt.condition(brush, color, alt.value('lightgray')),
    size=alt.Size('precipitation:Q', scale=alt.Scale(range=[5, 200]))
).properties(
    width=600,
    height=300
).add_selection(
    brush
).transform_filter(
    click
)

# Bottom panel is a bar chart of weather type
bars = alt.Chart().mark_bar().encode(
    x='count()',
    y='weather:N',
    color=alt.condition(click, color, alt.value('lightgray')),
).transform_filter(
    brush
).properties(
    width=600,
).add_selection(
    click
)

alt.vconcat(
    points,
    bars,
    data=source,
    title="Seattle Weather: 2012-2015"
)