def plot_planning(planning, need, timeline):
    # Plot graph - Requirement
    source = need.copy()
    source = source.rename(columns={0: "Hours"})
    source["Date"] = source.index

    bars_need = (alt.Chart(source).mark_bar().encode(
        y="Hours:Q",
        column=alt.Column("Date:N"),
        tooltip=["Date", "Hours"],
    ).interactive().properties(
        width=550 / len(timeline) - 22,
        height=75,
        title='Requirement',
    ))

    # Plot graph - Optimized planning
    source = planning.filter(like="Total hours", axis=0).copy()
    source["Date"] = list(source.index.values)
    source = source.rename(columns={"Solution": "Hours"}).reset_index()
    source[["Date", "Line"]] = source["Date"].str.split(",", expand=True)
    source["Date"] = source["Date"].str.split("[").str[1]
    source["Line"] = source["Line"].str.split("]").str[0]
    source["Min capacity"] = 7
    source["Max capacity"] = 12
    source = source.round({"Hours": 1})
    source["Load%"] = pd.Series(
        ["{0:.0f}%".format(val / 8 * 100) for val in source["Hours"]],
        index=source.index,
    )

    bars = (alt.Chart(source).mark_bar().encode(
        x="Line:N",
        y="Hours:Q",
        column=alt.Column("Date:N"),
        color="Line:N",
        tooltip=["Date", "Line", "Hours", "Load%"],
    ).interactive().properties(
        width=550 / len(timeline) - 22,
        height=150,
        title="Optimized Production Schedule",
    ))

    chart = alt.vconcat(bars, bars_need)
    chart.save("planning_time_model2.html")

    dp.Report(dp.Plot(
        chart, caption="Production schedule model 2 - Time")).publish(
            name="Optimized production schedule model 2 - Time",
            description="Optimized production schedule model 2 - Time",
            open=True,
            visibily="PUBLIC",
        )
Exemple #2
0
def plot_classes(counters, state):
    bars = (alt.Chart(counters).mark_bar().encode(
        y="class",
        x="n_count",
        row=alt.Row("sample:N"),
        column=alt.Column("state:N", sort="descending"),
    ))
    text = bars.mark_text(
        align="left",
        baseline="middle",
        dx=3,  # Nudges text to right so it doesn't appear on top of the bar
    ).encode(text="n_count",
             row=alt.Row("sample:N"),
             column=alt.Column("state:N"))
    return bars, text
Exemple #3
0
def main():
    df = pd.read_csv('Endangered_Species.csv')
    chart = alt.Chart(df).mark_bar().encode(
        alt.X('Species:N'), alt.Y('Population:Q'), alt.Column('Year'),
        alt.Color('Species:N')).properties(
            title="Endangered Species Population")
    chart.save('Endangered_Species_Population.json')
def summarize_repeat_classes(prefix, alignment_methods, output_class,
                             map_methods):
    repeat_classes = ['SINE', 'LINE', 'LTR', 'Simple_repeat', 'Satellite']

    methods = []
    fractions = []
    numbers = []
    rclasses = []
    for method in alignment_methods:
        repeat_sets, fractions_of_each_class = get_variant_sets_and_fractions(
            method=method, repeats=repeat_classes, prefix=prefix)
        fractions.extend(list(fractions_of_each_class.values()))
        numbers.extend([len(s) for s in repeat_sets.values()])
        # Remove the space in "Simple repeat".
        rclasses.extend(['SINE', 'LINE', 'LTR', 'Simple repeat', 'Satellite'])
        methods.extend([map_methods[method]] *
                       len(list(fractions_of_each_class.values())))

    df_repeat_classes = pd.DataFrame()
    df_repeat_classes['Number'] = numbers
    df_repeat_classes['Fraction'] = fractions
    df_repeat_classes['Repeat Class'] = rclasses
    df_repeat_classes['Alignment Method'] = methods
    df_repeat_classes.to_csv(output_class, sep='\t', index=None)

    if PLOT:
        # Y: number of biased HETs.
        alt.Chart(df_repeat_classes).mark_bar().encode(
            x=alt.X('Alignment Method:N', sort=alignment_methods, title=None),
            y='Number',
            color='Alignment Method:N',
            column=alt.Column(
                'Repeat Class:N',
                sort=['SINE', 'LINE', 'LTR', 'Satellite', 'Simple repeat']))
Exemple #5
0
def scatter_matrix(df):
    """Scatter matrix plot

    Each column will be scatter plotted against all columns.
    """

    base = (alt.Chart(df).transform_fold(
        list(df.columns),
        as_=['key_x', 'value_x'
             ]).transform_fold(list(df.columns),
                               as_=['key_y', 'value_y']).encode(
                                   x=alt.X('value_y:Q',
                                           title=None,
                                           scale=alt.Scale(zero=False)),
                                   y=alt.Y('value_x:Q',
                                           title=None,
                                           scale=alt.Scale(zero=False)),
                               ).properties(width=150, height=150))

    plot = (alt.layer(
        base.mark_circle(),
        base.transform_regression('value_y', 'value_x', method='poly',
                                  order=4).mark_line(color='red'),
    ).facet(
        column=alt.Column('key_x:N', sort=list(df.columns), title=None),
        row=alt.Row('key_y:N', sort=list(reversed(df.columns)), title=None),
    ).resolve_scale(x='independent',
                    y='independent').configure_header(labelFontStyle='bold'))
    return plot
Exemple #6
0
def plot_leaderboard_twist(data):
    """Plots the per-day twistiness ranking, both as a bar chart and as a heatmap."""

    print('leaderboard_twist')

    totals = data.groupby(level=('year', 'day')).sum()
    twist = pd.DataFrame({'twist': totals['two_stars'] / totals['one_star']}).reset_index()

    alt.Chart(twist) \
        .encode(
            x='year:O',
            y=alt.Y('twist:Q', title='Twistiness'),
            color='year:N',
            column=alt.Column('day:O', title='Day of contest')) \
        .mark_bar() \
        .configure_scale(bandPaddingInner=0.2) \
        .save('out/twist.html')

    color_scale = alt.Scale(scheme='yelloworangered', type='log')
    alt.Chart(twist) \
        .encode(
            x=alt.X('day:O', title='Day of contest'),
            y='year:O',
            color=alt.Color('twist:Q', title='Twistiness (log)', scale=color_scale)) \
        .mark_bar() \
        .configure_scale(bandPaddingInner=0.1) \
        .save('out/twist.heat.html')
Exemple #7
0
def plot_leaderboard_time(data, series, file_name):
    """Plots the per-day time distribution for the leaderboard rankings.

    Args:
        data: Leaderboard data frame.
        series: Which series to use; 'one_star' or 'two_stars'.
        file_name: Output file base name.
    """

    print(f'leaderboard_time:{series}')

    quantiles = data.loc[(slice(None), slice(None), [1, 25, 50, 75, 100])].unstack()
    points = (quantiles[series] / 60).rename(columns=lambda r: f'r{r}').reset_index()

    y_title = f'Time to get {series.replace("_", " ")} (ranks 25..75, min)'
    y_scale = alt.Scale(type='log')

    base = alt.Chart(points).encode(x='year:O', color='year:N')
    rule = base.mark_rule().encode(alt.Y('r1:Q', title=y_title, scale=y_scale), alt.Y2('r100:Q'))
    bar = base.mark_bar().encode(alt.Y('r25:Q'), alt.Y2('r75:Q'))
    faceted = (rule + bar).facet(column=alt.Column('day:O', title='Day of contest'))
    faceted.configure_scale(bandPaddingInner=0.4).save(file_name + '.html')

    points['yday'] = points.day + (points.year - points.year.mean())/(1.5*(points.year.max() - points.year.min()))
    alt.Chart(points) \
        .encode(x=alt.X('yday:Q', title='day'), color='year:N') \
        .mark_rule() \
        .encode(alt.Y('r25:Q', title=y_title, scale=y_scale), alt.Y2('r75:Q')) \
        .properties(width=1000, height=600) \
        .save(file_name + '.byday.html')
Exemple #8
0
def plot_forces(variables: pd.DataFrame) -> None:
    forces = variables.drop(["Height", "Weight"], axis=1).melt()
    forces[["type", "variable"]] = forces["variable"].str.split(expand=True)

    tables.describe_table(forces,
                          groupby=["variable", "type"],
                          description="variables")
    row_kwargs = dict(shorthand="variable", title=None, sort=forces_order)
    column = alt.Column("type", title=None)

    forces_plot = (plot_kde().facet(
        data=forces.query("type != 'Imb'"),
        row=alt.Row(header=alt.Header(labelAngle=0, labelAlign="left"),
                    **row_kwargs),
        column=column,
    ).resolve_scale(y="independent").properties(bounds="flush"))

    imb_plot = (plot_kde().facet(
        data=forces.query("type == 'Imb'"),
        row=alt.Row(header=alt.Header(labelFontSize=0), **row_kwargs),
        column=column,
    ).resolve_scale(y="independent").properties(bounds="flush"))

    plots = (forces_plot | imb_plot).configure_facet(spacing=5)
    st.altair_chart(plots)
Exemple #9
0
    def player_roll_chart(self):
        """ """
        # Make Altair bar chart
        plt_df = self.player_count.round(2)
        roll_chart = alt.Chart(plt_df).mark_bar(strokeWidth=0.5,
                                                stroke="black").encode(
            x=alt.X("Player:O", axis=alt.Axis(title=None, labels=False,
                    ticks=False)),
            y='Count:Q',
            color=alt.Color('Player:N', scale=alt.Scale(
                            domain=self.player_names, range=self.player_colors),
                            legend=alt.Legend()),
            column=alt.Column("Roll:N", header=alt.Header(title=None,
                              labelOrient="bottom", labelFontSize=22)),
            tooltip=list(self.player_count.columns)
        ).configure_view(
            strokeWidth=0
        ).configure_title(
            fontSize=32, limit=800, dx=45, dy=-50,
            font="Arial", align="center", anchor="middle"
        ).configure_legend(
            strokeColor="black", padding=10, orient="bottom", cornerRadius=10,
            direction="horizontal", labelFontSize=10
        ).properties(
            title="Roll Count by Player",
            width=self.screen_width / 45
        ).configure_axis(
            grid=False, labelFontSize=14, titleFontSize=16
        )

        return roll_chart
Exemple #10
0
def dur_dist_plot(dur_dist, to_json_for_lab=None):
    if to_json_for_lab is not None:
        alt.data_transformers.register("json", to_json_for_lab)
    alt.data_transformers.enable("json")
    return (alt.Chart(dur_dist).transform_density(
        "duration",
        as_=["duration", "density"],
        extent=[0, 70],
        groupby=["cluster"]).mark_area(orient="horizontal").encode(
            y="duration:Q",
            color="cluster:N",
            x=alt.X(
                "density:Q",
                stack="center",
                impute=None,
                title=None,
                axis=alt.Axis(labels=False, values=[0], grid=False,
                              ticks=True),
            ),
            column=alt.Column(
                "cluster:N",
                header=alt.Header(
                    titleOrient="bottom",
                    labelOrient="bottom",
                    labelPadding=0,
                ),
            ),
        ).properties(width=100).configure_facet(spacing=0).configure_view(
            stroke=None))
Exemple #11
0
def punchcode():
    dat = df.copy()
    dat['mnth_yr'] = dat['workshop_start'].dt.to_period('M').astype(str)
    dat['workshop_category'] = dat['workshop_category'].apply(
        lambda x: 'Corporate' if (x == 'Corporate') else 'Public')
    dat['contrib'] = dat['workshop_hours'] * dat['class_size']

    chart = alt.Chart(
        dat[dat.name != 'Capstone']).mark_circle(color='#bbc6cbe6').encode(
            x=alt.X('mnth_yr:T', axis=alt.Axis(title='')),
            y='name:O',
            size=alt.Size('sum(contrib):Q', legend=None),
            column=alt.Column('workshop_category:O',
                              title=None,
                              sort="descending",
                              header=alt.Header(titleColor='#bbc6cbe6',
                                                labelColor='#bbc6cbe6',
                                                labelAngle=30,
                                                titleFontSize=40,
                                                titleAngle=30))).properties(
                                                    width=300,
                                                    height=320).configure_axis(
                                                        labelColor='#bbc6cbe6',
                                                        titleColor='#bbc6cbe6',
                                                        grid=False)
    return chart.to_json()
Exemple #12
0
def cbo_bar_chart(cbo_data, var, title, bar_width=30, width=600, height=250):
    """
    Creates a bar chart comparing the current and new CBO projections
    Parameters
    ----------
    cbo_data: data containing both current and new CBO projections
        concatenated together
    var: Y-axis variable
    title: title of the chart
    bar_width: width of the bars in the plot
    width: width of the chart
    height: height of the chart
    """
    # we divide up total width equally among facets of the chart
    _width = width / len(cbo_data["index"].value_counts())
    chart = (alt.Chart(cbo_data, title=title).mark_bar(width=bar_width).encode(
        x=alt.X(
            "Projections",
            axis=alt.Axis(title=None,
                          labels=False,
                          ticks=False,
                          labelFontSize=15),
        ),
        y=alt.Y(var, axis=alt.Axis(labelFontSize=10, titleFontSize=15)),
        color=alt.Color("Projections"),
        column=alt.Column("index",
                          header=alt.Header(title=None, labelOrient="bottom")),
    ).properties(
        height=height,
        width=_width).configure_view(stroke="transparent").configure_facet(
            spacing=0).configure_title(fontSize=20))
    return chart
Exemple #13
0
def price_subplot(df,
                  color='Category',
                  color_sort_order=['Clothes', 'Accessory'],
                  color_scale='tableau10',
                  price_scale=alt.Scale()):
    color_kwargs = {"scale": alt.Scale(scheme=color_scale)}
    if color_sort_order:
        color_kwargs['sort'] = color_sort_order
    chart = alt.Chart(df, width=50)
    result = chart.mark_circle(size=12, opacity=0.7).encode(
        x=alt.X(
            'jitter:Q',
            title=None,
            axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
            scale=alt.Scale(),
        ),
        y=alt.Y("Price:Q", axis=alt.Axis(), scale=price_scale),
        color=alt.Color(f'{color}:N', **color_kwargs),
        tooltip=['Product', 'Price', 'Price-2019']).transform_calculate(
            # Generate Gaussian jitter with a Box-Muller transform
            jitter='sqrt(-2*log(random()))*cos(2*PI*random())')
    result = chart.mark_rule(
        color='red', size=2).encode(y=alt.Y("median(Price-2019):Q")) + result

    return result.facet(column=alt.Column(
        'Year:O',
        header=alt.Header(
            labelAngle=-90,
            titleOrient='top',
            labelOrient='bottom',
            labelAlign='right',
            labelPadding=3,
        ),
    ))
def _build_base_change_chart(data):
    """Create the base change chart."""
    width = 100
    height = 200
    placeholder_width = (4 * width) + 80  # 4 charts, plus constant spacing
    title = 'Biallelic base changes from reference'
    base_change_data = pd.DataFrame(data, columns=['ref', 'alt', 'count'])

    base_change_chart = _placeholder_for_empty_chart('No biallelic SNPs',
                                                     width=placeholder_width,
                                                     height=height,
                                                     title=title)
    if not base_change_data.empty:
        bars = alt.Chart(base_change_data).mark_bar().encode(
            x=alt.X('alt', title='to alt'),
            y=alt.Y('count', title='Count', axis=alt.Axis(format='s')),
            color=alt.Color('alt',
                            legend=None,
                            sort=BASES,
                            scale=alt.Scale(scheme='category20',
                                            domain=BASES)),
            tooltip=alt.Tooltip('count', format='.4s'))
        labels = bars.mark_text(dy=-5, fontWeight='bold').encode(text='alt')

        base_change_chart = (bars + labels) \
            .properties(width=100, height=200) \
            .facet(column=alt.Column('ref',
                                     title=title,
                                     sort=BASES))

    return base_change_chart
Exemple #15
0
def view_annotation_summary(df):
    if 'pid' in df.columns:
        pids = df['pid'].unique()
        charts = []
        for pid in pids:
            charts.append(
                alt.Chart(df[df['pid'] == pid],
                          title=pid,
                          width=1200,
                          height=800).mark_bar().encode(
                              x=alt.X(
                                  'DURATION_IN_SECONDS:Q',
                                  axis=alt.Axis(title='Duration (seconds)')),
                              y=alt.Y('LABEL_NAME:N',
                                      axis=alt.Axis(title='Annotations'))))
        chart = alt.vconcat(*charts).configure_axis(labelFontSize=14)
    else:
        if 'annotator' in df.columns:
            chart = alt.Chart(df, width=1200, height=800).mark_bar().encode(
                x=alt.X('DURATION_IN_SECONDS:Q',
                        axis=alt.Axis(title='Duration (seconds)')),
                column=alt.Column('LABEL_NAME:N',
                                  axis=alt.Axis(title='Annotations')),
                y=alt.Y('annotator:N', axis=alt.Axis(title='Annotator')),
                color='annotator:N')
    return chart
def bv_violinPlot(data, engine, xlabel, ylabel1, ylabel2):

    data = data.copy() 
    data.rename(columns={'plotY':ylabel1, 'plotX1':ylabel2}, inplace=True)
    data = data[[ylabel1, ylabel2]].copy()

    if engine == 'Static':
        plt.rcParams['figure.figsize'] = (9,6)
        fig = sns.violinplot(x = 'variable', y = 'value', data = data.melt())
        fig.grid(b=True, which='major', color='k', linewidth=0.25)
        fig.grid(b=True, which='minor', color='k', linewidth=0.125)
        plt.close()
        return pn.pane.Matplotlib(fig.figure, tight=True)

    elif engine == 'Interactive':
        p = alt.Chart(data.dropna().melt())
        p = p.transform_density('value',
                                as_=['value', 'density'],
                                groupby=['variable'])
        p = p.mark_area(orient='horizontal').encode(
            y=alt.Y('value:Q', axis=alt.Axis(format='~s')),
            color='variable:N',
            x=alt.X('density:Q', stack='center',
                    impute=None, title=None,
                    axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True)),
            column=alt.Column('variable:N', header=alt.Header(titleOrient='bottom',
                                                            labelOrient='bottom',
                                                            labelPadding=0)))
        p = p.properties(width = 200, height = 280)
        p = p.configure_facet(spacing=0)
        p = p.configure_view(stroke=None)
        return p
Exemple #17
0
def squares(proba, actual, predicted):
    df = pd.DataFrame(proba, copy=True)
    metadata = pd.DataFrame({'predicted': predicted, 'actual': actual})
    for col in df.columns:
        df[col][metadata['actual'] != col] = None

    df['predicted'] = predicted
    df['actual'] = actual
    df.columns = df.columns.astype(str)

    df = df.melt(id_vars=['predicted', 'actual'])

    bins = [b / 10 for b in range(11)]
    df['bins'] = pd.cut(df['value'], bins=bins, include_lowest=True)
    b = pd.DataFrame({
        'count':
        df.groupby(['bins', 'variable', 'actual', 'predicted'])['bins'].size()
    }).reset_index()
    b['bins'] = b['bins'].astype(str)

    chart = alt.Chart(b, title='').mark_bar().encode(
        x=alt.X('count', axis=alt.Axis(ticks=False, labels=False, title='')),
        y=alt.Y('bins:N',
                title='Prediction Score',
                sort=alt.EncodingSortField(field='bins',
                                           op='sum',
                                           order='descending')),
        column=alt.Column('actual', title=''),
        color='predicted:N',
        tooltip=['count', 'predicted']).properties(width=100).configure_axis(
            grid=False).configure_view(strokeOpacity=0)

    return chart
Exemple #18
0
def tibble(actual, predicted, ncol=10):
    df = []
    for class_name in np.unique(actual):
        actual_mask = actual == class_name
        predicted_f = np.sort(predicted[actual_mask])
        nrow = (len(predicted_f) // ncol)
        padding = predicted_f.shape[0] % ncol
        column_index = list(np.tile(range(ncol), nrow))
        column_index = column_index + [
            range(ncol)[l]
            for l in range(len(predicted_f) - len(column_index))
        ]
        row_index = list(np.repeat(range(nrow), ncol)) + list(
            np.repeat(nrow,
                      len(predicted_f) % ncol))
        res = pd.DataFrame({
            'Predicted Label': predicted_f,
            'row': column_index,
            'column': row_index,
            'Class Index': class_name
        })
        df += [res]
    df = pd.concat(df)

    chart = alt.Chart(df).mark_circle().encode(
        x=alt.X('row:O', axis=alt.Axis(ticks=False, labels=False, title='')),
        y=alt.Y('column:O', axis=alt.Axis(ticks=False, labels=False,
                                          title='')),
        column=alt.Column('Class Index', title=''),
        color='Predicted Label:N',
        tooltip=['Predicted Label'
                 ]).properties(width=ncol * 6, height=nrow * 8).configure_axis(
                     grid=False, domainWidth=0).configure_view(strokeOpacity=0)

    return chart
Exemple #19
0
    def bar_graph_specialities_availability(self, years, cities, specialities):
        """
        Plot bar graph of available specialites from selected cities.

        :param years: selected years
        :param cities: selected city / all cities
        :param specialities: list of selected specialities
        """

        if len(specialities) == 0:
            st.warning("Nicio specialitate nu a fost selectă")
            return
        st.title("Grafice comparative")
        df = self.__filter_available_specialities_by_year(
            years, cities, specialities)
        if len(years) > 1:
            chart = alt.Chart(df).mark_bar().encode(
                x=alt.X('Oraș:N', axis=alt.Axis(title='Oraș')),
                y=alt.Y('Locuri disponibile:Q',
                        axis=alt.Axis(grid=False, title='Locuri disponibile'),
                        sort="-x"),
                column=alt.Column('An:N'),
                color=alt.Color('Specialitate:N'),
                tooltip=['Specialitate:N', 'Locuri disponibile:Q', 'Oraș:N'])
            st.altair_chart(chart)
        else:
            chart = alt.Chart(df).mark_bar().encode(
                x=alt.X('Locuri disponibile:Q',
                        axis=alt.Axis(grid=False, title='Locuri disponibile'),
                        sort="-x"),
                y=alt.Y('Oraș'),
                color=alt.Color('Specialitate:N'),
                tooltip=['Specialitate:N', 'Locuri disponibile:Q', 'Oraș:N'])
            st.altair_chart(chart)
def altairPlot():
    import altair as alt
    from vega_datasets import data

    source = data.movies.url

    stripplot = alt.Chart(source, width=80).mark_circle(size=8).encode(
        x=alt.X(
            'jitter:Q',
            title=None,
            axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
            scale=alt.Scale(),
        ),
        y=alt.Y('IMDB_Rating:Q'),
        color=alt.Color('Major_Genre:N', legend=None),
        column=alt.Column(
            'Major_Genre:N',
            header=alt.Header(
                labelAngle=-90,
                titleOrient='top',
                labelOrient='bottom',
                labelAlign='right',
                labelPadding=3,
            ),
        ),
    ).transform_calculate(
        # Generate Gaussian jitter with a Box-Muller transform
        jitter='sqrt(-2*log(random()))*cos(2*PI*random())').configure_facet(
            spacing=0).configure_view(stroke=None)
    return stripplot
Exemple #21
0
def create_viz(emotions_dict, src_list):

    new_df = pd.DataFrame(columns=['keywords', 'c2', 'values', 'Emotion'])

    for key in emotions_dict.keys():
        word = key
        value_trusted_src = emotions_dict[key][src_list[0]]
        value_other_src1 = emotions_dict[key][src_list[1]]
        value_other_src2 = emotions_dict[key][src_list[2]]

        rows_trusted_src = get_rows_src(value_trusted_src, "Trusted Source",
                                        word)
        rows_other_src1 = get_rows_src(value_other_src1, "Other Source 1",
                                       word)
        rows_other_src2 = get_rows_src(value_other_src2, 'Other Source 2',
                                       word)

        new_df = add_to_df(rows_trusted_src, new_df)
        new_df = add_to_df(rows_other_src1, new_df)
        new_df = add_to_df(rows_other_src2, new_df)

    chart = alt.Chart(new_df).mark_bar().encode(
        x=alt.X('c2:N', axis=alt.Axis(title='')),
        y=alt.Y('sum(values):Q', axis=alt.Axis(grid=False, title='')),
        column=alt.Column('keywords:N'),
        color=alt.Color('Emotion:N',
                        scale=alt.Scale(range=[
                            '#96ceb4', '#ffcc5c', '#ff6f69', '#ff9BD8',
                            '#877DD8'
                        ], )))
    chart = chart.properties(width=200, height=500)
    chart.save('./emotion_sample.svg')
Exemple #22
0
def bestTeamPlot(year, top):

    bigDf = pd.read_csv("App/Data/CumulativeSeasons.csv")

    dfSeason = bigDf[bigDf['season'] == str(year + 2000) + "/" +
                     str((year + 1) + 2000)]

    if top:
        df = dfSeason.groupby(
            ['result',
             'team_long_name']).size()['won'].sort_values(ascending=False)[:5]
    else:
        df = dfSeason.groupby(['result', 'team_long_name'
                               ]).size()['won'].sort_values()[:5]
    teamList = df.index.tolist()

    num_players = 11
    df_won = []
    df_lost = []
    df_draw = []
    for i in df.index:
        won = int(
            (dfSeason.groupby(['team_long_name', "result"]).size()[i][2]) /
            num_players)
        lost = int(
            (dfSeason.groupby(['team_long_name', "result"]).size()[i][1]) /
            num_players)
        draw = int(
            (dfSeason.groupby(['team_long_name', "result"]).size()[i][0]) /
            num_players)

        df_won.append(won)
        df_lost.append(lost)
        df_draw.append(draw)
    best = pd.DataFrame({
        'Team': teamList,
        'Wins': df_won,
        'Losts': df_lost,
        'Draw': df_draw
    })

    best.to_csv("best.csv", index=None)

    chart = alt.Chart(
        pd.melt(best, id_vars=['Team'], var_name='Result', value_name='Total'),
        height=400,
        width=165).mark_bar().encode(
            alt.X('Result:N', axis=alt.Axis(title="", labels=False)),
            alt.Y('Total:Q', axis=alt.Axis(title='Total', grid=False)),
            alt.Tooltip(["Total:Q"]),
            color=alt.Color('Result:N'),
            column=alt.Column(
                'Team:O',
                sort=alt.EncodingSortField("Total",
                                           op='max',
                                           order='descending'),
                title="")).configure_view(stroke='transparent').interactive()

    return chart.to_json()
Exemple #23
0
def outcome_bars(data, name=None, width=100):
    """
    Create a bar chart showing the percentage of hands won, lost, and pushed
    """
    # if it's a dataframe already, just add the name for the legend
    if isinstance(data, pd.DataFrame):
        data_list = [data]
    elif isinstance(data, list):
        # check if it's a list of dicionaries, like player history, or a list
        # of lists
        for item in data:
            l_o_d = isinstance(item, dict)
        # if it's a list of dictionaries, just convert them
        if l_o_d:
            data_list = [pd.DataFrame(data)]
        else:
            data_list = [pd.DataFrame(item) for item in data]
    else:
        msg = "'data' must be a DataFrame or list"
        raise TypeError(msg)
    # calculate percentages
    # assign name to data
    if not name:
        name = [f"Game{i}" for i in range(len(data))]
    plot_data_list = []  # list to hold dataframes that will be plotted
    for _name, _data in zip(name, data_list):
        win, loss, push = results_pct(_data)
        plot_data_list.append(
            {
                "game": _name,
                "result": "Win",
                "pct": win,
                "order": 1
            }, )
        plot_data_list.append({
            "game": _name,
            "result": "Loss",
            "pct": loss,
            "order": 2
        })
        plot_data_list.append({
            "game": _name,
            "result": "Push",
            "pct": push,
            "order": 3
        })
    plot_data = pd.DataFrame(plot_data_list)

    # create altair chart
    chart = alt.Chart(plot_data, width=width).mark_bar().encode(
        x=alt.X("game",
                axis=alt.Axis(labelAngle=-45),
                title=None,
                sort=["Win", "Loss", "Push"]),
        y=alt.Y("pct:Q"),
        color=alt.Color("game:O", legend=None),
        column=alt.Column("result:O", title="Result"),
        tooltip=[alt.Tooltip("pct", title="Pct")])
    return chart
def make_category_plot_separate_top_n(infile, n_to_separate=20):
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)
    pd.set_option('display.width', None)
    grouped_flows = infra.pd.read_parquet(infile)
    grouped_flows = grouped_flows.reset_index()
    grouped_flows["bytes_total"] = grouped_flows["bytes_up"] + grouped_flows["bytes_down"]

    # Figure out sorting order by total amount.
    cat_totals = grouped_flows.groupby("category").sum().reset_index()
    cat_sort_order = cat_totals.sort_values("bytes_total", ascending=False).set_index("bytes_total").reset_index()
    cat_sort_list = cat_sort_order["category"].tolist()

    user_totals = grouped_flows.groupby("user").sum().reset_index()
    user_sort_order = user_totals.sort_values("bytes_total", ascending=False).set_index("bytes_total").reset_index()
    user_sort_list = user_sort_order["user"].tolist()

    # Generate a frame from the sorted user list that identifies the top users
    top_annotation_frame = user_sort_order[["user"]]
    bottom_n = len(user_sort_order) - n_to_separate
    top_annotation_frame = top_annotation_frame.assign(topN="Bottom {}".format(bottom_n))
    top_annotation_frame.loc[top_annotation_frame.index < n_to_separate, "topN"] = "Top {}".format(n_to_separate)

    grouped_flows["GB"] = grouped_flows["bytes_total"] / (1000**3)
    grouped_flows = grouped_flows[["category", "user", "GB"]].groupby(["user", "category"]).sum()
    grouped_flows = grouped_flows.reset_index()
    grouped_flows["logGB"] = grouped_flows["GB"].transform(np.log10)
    grouped_flows = grouped_flows.merge(top_annotation_frame, on="user")

    alt.Chart(grouped_flows).mark_rect().encode(
        x=alt.X("user:N",
                title="User (Sorted by Total GB)",
                axis=alt.Axis(labels=False),
                sort=user_sort_list,
                ),
        y=alt.Y("category:N",
                title="Category (Sorted by Total GB)",
                sort=cat_sort_list,
                ),
        # shape="direction",
        color=alt.Color(
            "GB:Q",
            title="Total GB",
            scale=alt.Scale(scheme="viridis"),
            ),
    ).facet(
        column=alt.Column(
            "topN:N",
            sort="descending",
            title="",
        ),
    ).resolve_scale(
        x="independent",
        color="independent"
    ).save(
        "renders/users_per_category_split_outliers.png",
        scale_factor=2,
    )
Exemple #25
0
def income_expenses_over_time(df_orig):
    # Time interval aggregation level
    time_interval = st.sidebar.radio(
        "Time interval:", ("Month", "Quarter", "Year"), index=1)
    dfn, n_levels = time_interval_aggregation(df_orig, time_interval)
    if st.sidebar.checkbox('Invert sign of "Income"', value=True):
        dfn.loc["Income", :] = -dfn.loc["Income", :].values
    st.subheader('Income and Expenses over Time')
    plot_type = st.sidebar.selectbox('Plot type', ["pyplot", "altair", "bokeh"], key="plot_type")
    df_L0 = dfn.groupby(["Account_L0"]) \
        .sum() \
        .transpose() \
        .reset_index()

    df_L0.columns.name = "Account"
    if plot_type == "pyplot":
        fig = plt.figure(figsize=(14, 5))
        ax = plt.axes()
        df_L0.plot.bar(ax=ax, x=time_interval, y=["Income", "Expenses"],
                       xlabel=time_interval, ylabel=df_L0["level_0"][0], rot=90)
        ax.locator_params(axis="x", tight=True, nbins=40)
        st.pyplot(fig)
    elif plot_type == "altair":
        n_intervals = df_L0.shape[0]
        df_new = df_L0.drop(columns="level_0") \
            .set_index(time_interval) \
            .stack() \
            .reset_index() \
            .rename(columns={0: dfn.columns.levels[0][0]})
        custom_spacing = 2
        chart = alt.Chart(df_new).mark_bar().encode(
            column=alt.Column(time_interval, spacing=custom_spacing, header=alt.Header(title="Income and Expenses",
                                                                                       labelOrient='bottom',
                                                                                       labelAlign='right',
                                                                                       labelAngle=-90)),
            x=alt.X('Account:O', axis=alt.Axis(title=None, labels=False, ticks=False)),
            y=alt.Y('{}:Q'.format(dfn.columns.levels[0][0]), title=dfn.columns.levels[0][0], axis=alt.Axis(grid=False)),
            color=alt.Color('Account', scale=alt.Scale(range=['#EA98D2', '#659CCA'])),
            tooltip=[alt.Tooltip('Account:O', title='Account'),
                     alt.Tooltip('{}:Q'.format(dfn.columns.levels[0][0]), title=dfn.columns.levels[0][0]),
                     alt.Tooltip('{}:N'.format(time_interval), title=time_interval)]
        ).properties(width=(700 - n_intervals * custom_spacing) / n_intervals)
        st.altair_chart(chart, use_container_width=False)
    elif plot_type == "bokeh":
        x = [(ti, acnt) for ti in df_L0[time_interval] for acnt in ["Income", "Expenses"]]
        counts = sum(zip(df_L0['Income'], df_L0['Expenses']), ())
        source = ColumnDataSource(data=dict(x=x, counts=counts))
        p = figure(x_range=FactorRange(*x), plot_height=450, plot_width=900, title="Income and Expenses",
                   toolbar_location="above", tooltips=[("Period, Account", "@x"), ("Value", "@counts")])
        p.vbar(x='x', top='counts', width=0.9, source=source)
        p.y_range.start = 0
        p.x_range.range_padding = 0.5
        p.xaxis.major_label_orientation = 1
        p.xgrid.grid_line_color = None
        st.bokeh_chart(p)
    return
def _build_vaf_histograms(histogram_json):
    """Create VAF histograms split by genotype."""
    guides = {REF: 0, HET: 0.5, HOM: 1}
    hist_data = pd.DataFrame()
    for key in histogram_json:
        g = pd.DataFrame(histogram_json[key])
        pretty, group = _prettify_genotype(key)
        g['GT'] = pretty  # pretty genotype name
        g['g'] = group  # main/other genotypes
        g['l'] = guides.get(pretty, None)  # vertical line as guide
        hist_data = hist_data.append(g)

    main_hist_data = hist_data[hist_data['g'] == 'main']
    other_hist_data = hist_data[hist_data['g'] == 'others']

    # Main genotypes (ref, het, hom-alt)
    # Histogram bars themselves
    # s = bin_start, e = bin_end, c = count
    bars = alt.Chart(main_hist_data).mark_bar().encode(
        x=alt.X('s', title='VAF'),
        x2='e',
        y=alt.Y('c', title='Count', stack=True, axis=alt.Axis(format='s')))
    # Vertical lines
    guides = alt.Chart(main_hist_data).mark_rule().encode(x='l')
    # Facet into 3 plots by genotype
    vaf_histograms = (bars + guides) \
      .properties(width=200, height=200) \
      .facet(column=alt.Column('GT',
                               title='Main genotypes',
                               sort=[REF, HET, HOM])) \
      .resolve_scale(y='independent')

    # Other genotypes (uncalled, het with two alt alleles)
    # s = bin_start, e = bin_end, c = count
    other_vaf_histograms = alt.Chart(other_hist_data) \
      .mark_bar().encode(
          x=alt.X('s', title='VAF'),
          x2='e',
          y=alt.Y('c', title='Count', stack=True, axis=alt.Axis(format='s')),
          column=alt.Column('GT', title='Other genotypes')) \
      .properties(width=150, height=150) \
      .resolve_scale(y='independent')
    return vaf_histograms, other_vaf_histograms
Exemple #27
0
def plot(data):
    """
    Takes in a Dataframe data containing information about the population,
    educational attainment, and internet access for U.S. counties.
    Plots a grouped bar chart visualization comparing 5 urban and 5 rural
    counties, and the relationship between attaining a Bachelor's Degree
    and lacking internet access for these counties.
    """
    counties = [
        'New York County', 'Los Angeles County', 'Cook County',
        'Harris County', 'Maricopa County', 'Chaves County',
        'Aroostook County', 'Clallam County', 'McCracken County',
        'St. Landry Parish'
    ]

    data = clean(data, counties)
    data = calculate_percentage(data)

    # Plot
    q4_chart = alt.Chart(data).mark_bar().encode(
        x=alt.X('Statistic',
                type='nominal',
                sort=counties,
                title=None,
                axis=alt.Axis(labels=False)),
        y='Percentage:Q',
        color=alt.Color('Statistic:N',
                        scale=alt.Scale(range=['#96ceb4', '#ffcc5c']),
                        title=None),
        column=alt.Column('County:N',
                          sort=counties,
                          header=alt.Header(titleOrient='bottom',
                                            labelOrient='bottom',
                                            labelAngle=-90,
                                            labelPadding=90,
                                            labelBaseline='middle'))
    ).properties(
        title={
            'text': [
                'Internet Access and Education Attainment in Urban vs. ' +
                'Rural Counties (2016)'
            ],
            'subtitle': [
                '', '.         Urban         Urban         Urban      ' +
                '   Urban         Urban         Rural          Rural' +
                '          Rural          Rural          Rural'
            ],
            'subtitlePadding':
            10
        }).configure_title(fontSize=18,
                           orient='top',
                           offset=12,
                           anchor='start').configure_axisX(labelPadding=100)

    q4_chart.save('q4_chart.html')
def faceted_bar_chart(
        df: pd.DataFrame(),
        xcol: str,
        xtitle: str,
        ycol: str,
        ytitle: str,
        colorcol: str,
        textcol: str,
        title: str,
        columncol: str,
        legend_title="Hardware") -> alt.vegalite.v4.api.FacetChart:
    """
    Method that outputs a raw faceted bar chart. This does not process the input df, so it has to come already processed.
    Parameters
    ----------
    df_: str
        dataframe from which the bar chart will be created.
    xcol: str
        dataframe column name that will be used for the x axis of the plot.
    xtitle:str
        title of the x-axis.
    ycol: str
        dataframe column name that will be used for the y axis of the plot.
    ytitle:str
        title of the y-axis.
    colorcol:str
        dataframe column name that which will hold the separation between colors.
    textcol: str
        dataframe column name that will be used for the displaying the numeric values inside the plot.
    columncol:str
        dataframe column name which holds the separation between all the faceted charts, x axis above plot.
    title: str
        Chart title.

    Returns
    -------
    alt.vegalite.v4.api.Chart
        Faceted bar chart created from the input dataframe.
    """
    bars = alt.Chart().mark_bar().encode(
        x=alt.X(xcol + ':N', title=xtitle),
        y=alt.Y(ycol + ':Q', title=ytitle),
        color=alt.Color(colorcol + ':N', title=legend_title),
    )
    text = bars.mark_text(
        angle=270,
        align='left',
        baseline='middle',
        dx=10  # Nudges text to right so it doesn't appear on top of the bar
    ).encode(text=alt.Text(ycol + ':Q', format='.1f'))
    return alt.layer(bars, text, data=df).facet(column=alt.Column(
        columncol + ':N',
        header=alt.Header(labelAngle=-85, labelAlign='right'),
        title=title)).interactive()
Exemple #29
0
def _generate_chart(results: pd.DataFrame, sorted: bool = False):
    if sorted:
        column = alt.Column("config:O", sort=SELECTED_CLUSTERS_TO_VISUALIZE)
    else:
        column = "config:O"

    return (
        alt.Chart(results)
        .mark_circle()
        .encode(x="yx_planes:Q", y="read_duration:Q", color="reader:N", column=column,)
    )
Exemple #30
0
def gen_chart(data: pd.DataFrame) -> alt.Chart:
    return (
        alt.Chart(data[data.split == "test"])
        .mark_line(opacity=0.5)
        .encode(
            x=alt.X("dimension:Q"),  # , scale=alt.Scale(domain=xlim)),
            y=alt.Y("coef:Q"),  # , scale=alt.Scale(domain=ylim)),
            color="n:N",
            column=alt.Column("regions:N"),
        )
    ).properties(width=200, height=300)