Example #1
0
 def plot_char_percent_vs_accuracy_smooth(self, category=False):
     if category:
         return (ggplot(self.char_plot_df) +
                 aes(x='char_percent', y='correct', color='category_jmlr') +
                 geom_smooth())
     else:
         return (ggplot(self.char_plot_df) +
                 aes(x='char_percent', y='correct') +
                 geom_smooth(method='mavg'))
Example #2
0
 def plot_char_percent_vs_accuracy_smooth(self, category=False):
     if category:
         return (
             ggplot(self.char_plot_df)
             + aes(x="char_percent", y="correct", color="category_jmlr")
             + geom_smooth()
         )
     else:
         return (
             ggplot(self.char_plot_df)
             + aes(x="char_percent", y="correct")
             + geom_smooth(method="mavg")
         )
Example #3
0
 def plot_char_percent_vs_accuracy_smooth(self, category=False):
     if category:
         return (
             ggplot(self.char_plot_df)
             + aes(x='char_percent', y='correct', color='category_jmlr')
             + geom_smooth()
         )
     else:
         return (
             ggplot(self.char_plot_df)
             + aes(x='char_percent', y='correct')
             + geom_smooth(method='mavg')
         )
Example #4
0
def test_non_linear_smooth_no_ci():
    p = (
        ggplot(df_linear, aes('x')) + geom_point(aes(y='y_noisy')) +
        geom_smooth(
            aes(y='y_noisy'), method='loess', span=.3, color='blue', se=False))

    assert p == 'non_linear_smooth_no_ci'
Example #5
0
def duration_TL(Data):
    print('======= Creating duration_TL =======')
    x = Data.Duration[pd.isna(Data.Duration) == True]
    
    if ((len(x)+10)) >= len(Data):
       print("WARNING: All values for Duration are NA's")
    
    else:
        #Filter Symptomes and Correct Durations
        Symptomes = Data[(Data.Group == "sy") & (Data.Duration < 180)]
        
        #Setting data with missing times
        Symptomes['Date'] = pd.to_datetime(Symptomes['Date'])
        
        if len(Symptomes) == 0:
            print('No duration for TL_2')
        else: 
            sdate = min(Symptomes["Date"])   # start date
            edate = max(Symptomes["Date"])   # end date
            delta = edate - sdate       # as timedelta
#            from datetime import timedelta
            day = []
            for i in range(delta.days + 1):
                d= sdate + timedelta(days=i)
                day.append(d)
                
            DF = pd.DataFrame(day)
            DF.columns = ['Date']
            data_with_missing_times = pd.merge(DF, Symptomes, on='Date', how='outer')
            data_with_missing_times.Date = pd.to_datetime(data_with_missing_times.Date)
            if delta.days > 1825:
                datebreaks = '18 months'
            else:
                if delta.days > 1095:
                    datebreaks = '12 months'                
                else:
                    datebreaks = '6 months'

                
            plot = (p9.ggplot(data=data_with_missing_times, mapping=p9.aes(x='Date', 
                                                                           y='Duration'))
            + p9.geom_smooth(color = 'red', size = 5, method="loess", se=False)
            + p9.theme_classic()
            + p9.theme(axis_text = p9.element_text(size=33), 
                       axis_title = p9.element_text(size = 33,face = 'bold'))
            + p9.scale_x_datetime(date_labels = '%Y-%m', date_breaks = datebreaks)
            + p9.labs(x='',y=''))    

            if (len(data_with_missing_times) > 0):

                plot.save(filename = 'TL_2.jpeg',
                         plot = plot,
                         path = "pdf/iteration/",
                         width = 25, height = 5,
                         dpi = 320)
                

            else: 
                print('Plot not created; no data found.')
        return(print('=================================duration_TL DONE ============================='))
Example #6
0
def test_legend_fill_ratio():
    p = (ggplot(df_linear, aes('x', color='x<0.5'))
         + geom_point(aes(y='y_noisy'))
         + geom_smooth(aes(y='y_noisy'), method='lm', size=0.5, span=.3)
         )

    assert p == 'legend_fill_ratio'
Example #7
0
def test_legend_fill_ratio():
    p = (ggplot(df_linear, aes('x', color='x<0.5'))
         + geom_point(aes(y='y_noisy'))
         + geom_smooth(aes(y='y_noisy'), method='lm', size=0.5, span=.3)
         )

    assert p == 'legend_fill_ratio'
Example #8
0
def analyze_encodes() -> pd.DataFrame:
    dfs = []
    for decode_path in DECODES_PATHS:
        df = pd.read_csv(decode_path)
        dfs.append(df)
    df = pd.concat(dfs)

    df['malformed'] = df['malformed'].astype(int)

    df['alpha'] = df['num_channels'] / df['num_clients']


    plot_df = df.groupby('alpha').mean()
    plot_df.index.name = 'alpha'
    plot_df.reset_index(inplace=True)

    n_alphas = df['alpha'].unique()
    print(f"Using {n_alphas.shape} different alphas from {len(DECODES_PATHS)} runs")

    plot = (p9.ggplot(plot_df) + p9.aes('alpha', 'malformed') + p9.geom_point() + p9.geom_smooth(method='lm')  + p9.labels.labs(x='Alpha', y='Collision Percentage')
    )

    plot.draw()
    plt.show()


    return df
Example #9
0
def _plot_regret_single(df: pd.DataFrame) -> gg.ggplot:
  """Plots the average regret through time for single variable."""
  p = (gg.ggplot(df)
       + gg.aes(x='episode', y='average_regret')
       + gg.geom_smooth(method=smoothers.mean, span=0.1, size=1.75, alpha=0.1,
                        colour='#313695', fill='#313695'))
  return p
Example #10
0
def plot_series(
        df,
        x=None,
        y=None,
        tick_text_size=6,
        line_size=1.5,
        y_axis_label="Point score",
        x_axis_label="",
        color="stock",
        use_smooth_line=False
):
    assert len(df) > 0
    assert len(x) > 0 and len(y) > 0
    assert line_size > 0.0
    assert isinstance(tick_text_size, int) and tick_text_size > 0
    assert y_axis_label is not None
    assert x_axis_label is not None
    args = {'x': x, 'y': y}
    if color:
        args['color'] = color
    plot = p9.ggplot(df, p9.aes(**args)) \
        + p9.labs(x=x_axis_label, y=y_axis_label) \
        + p9.theme(
            axis_text_x=p9.element_text(angle=30, size=tick_text_size),
            axis_text_y=p9.element_text(size=tick_text_size),
            legend_position="none",
        )
    if use_smooth_line:
        plot += p9.geom_smooth(size=line_size)
    else:
        plot += p9.geom_line(size=line_size)
    return plot_as_inline_html_data(plot)
Example #11
0
def test_continuous_x():
    n = len(df_continuous_x)
    p = (ggplot(df_continuous_x, aes('x', 'y'))
         + geom_point()
         + geom_smooth(df_continuous_x[3:n-3], method='loess',
                       color='blue', fullrange=False))
    assert p == 'continuous_x'
Example #12
0
def test_continuous_x_fullrange():
    n = len(df_continuous_x)
    p = (ggplot(df_continuous_x, aes('x', 'y')) + geom_point() + geom_smooth(
        df_continuous_x[3:n - 3], method='loess', color='blue',
        fullrange=True))

    assert p == 'continuous_x_fullrange'
Example #13
0
    def test_gpr(self):
        try:
            from sklearn import gaussian_process  # noqa:401
        except ImportError:
            return

        p = self.p + geom_smooth(aes(y='y_noisy'), method='gpr')
        p.draw_test()
Example #14
0
    def test_gpr(self):
        try:
            from sklearn import gaussian_process  # noqa:401
        except ImportError:
            return

        p = self.p + geom_smooth(aes(y='y_noisy'), method='gpr')
        p.draw_test()
Example #15
0
def test_linear_smooth():
    p = (ggplot(df_linear, aes('x'))
         + geom_point(aes(y='y_noisy'))
         + geom_smooth(aes(y='y_noisy'), method='lm', span=.3,
                       color='blue')
         )

    assert p == 'linear_smooth'
Example #16
0
def test_non_linear_smooth_no_ci():
    p = (ggplot(df_linear, aes('x'))
         + geom_point(aes(y='y_noisy'))
         + geom_smooth(aes(y='y_noisy'), method='loess', span=.3,
                       color='blue', se=False)
         )

    assert p == 'non_linear_smooth_no_ci'
Example #17
0
def test_linear_smooth():
    p = (ggplot(df_linear, aes('x'))
         + geom_point(aes(y='y_noisy'))
         + geom_smooth(aes(y='y_noisy'), method='lm', span=.3,
                       color='blue')
         )

    assert p == 'linear_smooth'
Example #18
0
    def __plot(
        self,
        plot_data,
        x,
        y,
        colour,
        lbl_x,
        lbl_y,
        facet,
        facet_scales,
        facet_by,
        smoothed,
        points,
        error_bars,
        save,
    ):
        cbbPalette = [
            "#000000",
            "#E69F00",
            "#56B4E9",
            "#009E73",
            "#0072B2",
            "#D55E00",
            "#CC79A7",
        ]
        plt = ggplot(data=plot_data, mapping=aes(x=x, y=y, colour=colour))
        plt += xlab(lbl_x)
        plt += ylab(lbl_y)
        # + facet_grid("site~", scales="free")
        # + geom_line()
        if facet:
            # TODO: use facet as save
            nrow, ncol = self.get_facet_rows(plot_data, facet_by)
            plt += facet_wrap(facet_by, nrow=nrow, ncol=ncol, scales=facet_scales)
        if points:
            plt += geom_point()
        if error_bars:
            # TODO use generic way to compute them
            pass
            # self.plt += geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std"))
        # TODO: use smooth as save
        if smoothed:
            plt += geom_smooth(
                method="mavg",
                se=False,
                method_args={"window": 4, "center": True, "min_periods": 1},
            )
        else:
            plt += geom_line()
        plt += scale_colour_manual(values=cbbPalette, guide=False)
        plt += scale_x_continuous(labels=label_x)

        plt += theme(figure_size=(15, 18), dpi=150)

        if save:
            plt.save(**save)
        return plt
Example #19
0
def _plot_regret_group(df: pd.DataFrame, group_col: str) -> gg.ggplot:
  """Plots the average regret through time when grouped."""
  group_name = group_col.replace('_', ' ')
  df[group_name] = df[group_col].astype('category')
  p = (gg.ggplot(df)
       + gg.aes(x='episode', y='average_regret',
                group=group_name, colour=group_name, fill=group_name)
       + gg.geom_smooth(method=smoothers.mean, span=0.1, size=1.75, alpha=0.1)
       + gg.scale_colour_manual(values=FIVE_COLOURS)
       + gg.scale_fill_manual(values=FIVE_COLOURS))
  return p
Example #20
0
def plot_and_save(scale_data_df_cleaned, smooth_factor, temp_file_name):
    fasting_start = to_datetime('2019-10-15')
    plot_output = (
        ggplot(scale_data_df_cleaned, aes(x='timestamp', y='weight')) +
        #   facet_wrap('~', ncol = 1, scales = 'free') +
        geom_point(size=0.5) + geom_smooth(span=smooth_factor, color='red') +
        geom_vline(aes(xintercept=fasting_start), color='blue', size=1.2) +
        geom_label(aes(x=to_datetime('2019-11-30'),
                       y=max(scale_data_df_cleaned.loc[:, 'weight'])),
                   label='IF starts!',
                   size=15))
    plot_output.save(temp_file_name, width=13, height=10, dpi=80)
Example #21
0
    def plot_replicates_lowess_regression_smoothing(self):
        """
        Applies a lowess smoothing regression to replicates plot in order to estimate the true function.

        """

        from plotnine import ggplot, ylab, xlab, geom_line, aes, geom_smooth, theme_bw, scale_color_grey

        plot = ((ggplot(self.data, aes('Time', 'Current', color='Channel')) +
                 ylab(u'Current (μA)') + xlab('Time (seconds)') + geom_line() +
                 geom_smooth(span=self.span, method='lowess')))

        print(plot)
        return plot
Example #22
0
def plot_company_rank(df):
    assert isinstance(df, pd.DataFrame)
    #assert 'sector' in df.columns
    n_bin = len(df['bin'].unique())
    plot = (p9.ggplot(
        df, p9.aes('date', 'rank', group='asx_code', color='sector')) +
            p9.geom_smooth(span=0.3, se=False) +
            p9.geom_text(p9.aes(label='asx_code', x='x', y='y'),
                         nudge_x=1.2,
                         size=6,
                         show_legend=False) + p9.xlab('') +
            p9.facet_wrap('~bin', nrow=n_bin, ncol=1, scales="free_y") +
            p9.theme(axis_text_x=p9.element_text(angle=30, size=7),
                     figure_size=(8, 20),
                     subplots_adjust={'right': 0.8}))
    return plot_as_inline_html_data(plot)
Example #23
0
def test_init_and_fit_kwargs():
    df = pd.DataFrame({
        'x': np.arange(11),
        'y': [0, 0, 0, 0.05, 0.25, 0.5, 0.75, 0.95, 1, 1, 1]
    })

    p = (
        ggplot(df, aes('x', 'y')) + geom_point() + geom_smooth(
            method='glm',
            method_args={
                'family': sm.families.Binomial(),  # init parameter
                'method': 'minimize'  # fit parameter
            },
            se=False))

    assert p == 'init_and_fit_kwargs'
Example #24
0
def plot_company_rank(ld: LazyDictionary) -> p9.ggplot:
    df = ld["rank"]
    # assert 'sector' in df.columns
    n_bin = len(df["bin"].unique())
    # print(df)
    plot = (p9.ggplot(
        df, p9.aes("date", "rank", group="asx_code", color="asx_code")) +
            p9.geom_smooth(span=0.3, se=False) + p9.geom_text(
                p9.aes(label="asx_code", x="x", y="y"),
                nudge_x=1.2,
                size=6,
                show_legend=False,
            ) + p9.facet_wrap("~bin", nrow=n_bin, ncol=1, scales="free_y"))
    return user_theme(
        plot,
        figure_size=(12, 20),
        subplots_adjust={"right": 0.8},
    )
Example #25
0
def cell_division(adata):
    """ Plots total_counts as a function of the principal circle nodes to
    visualize the moment of cell division.

    Parameters
    ----------------
    adata: AnnData
        The AnnData object being used for the analysis. Must be previously
        evaluated by `tl.celldiv_moment`.

    Returns
    ------------
    A plotnine line-plot to help visualize the moment of cell division and
    direction of the cell cycle.

    If method = 'counts' when tl.celldiv_moment was run,
    cell division is defined by the largest drop in total_counts. The changes in
    counts are represented by the
    bars at the bottom, and the suggested moment of cell division is marked in
    red. The cell cycle should follow an incremental increase in total counts
    until around the moment of cell division.

    Alternatively, if method='g2m' in tl.celldiv_moment, the G2-M signature
    dynamics are used to define the moment of cell division.
    """
    ref_var = adata.uns['scycle']['cell_div_moment']['ref_var']
    edge_to_0 = adata.uns['scycle']['cell_div_moment']['cell_div_edge'][0]
    edges = adata.uns['princirc_gr']['edges']
    edges['cell_div'] = edges['e1'] == edge_to_0

    cell_div_count = edges[edges['e1'] == edge_to_0]['mean_var']

    cell_div_plot = (ggplot(edges, aes('e1', 'mean_var'))
     + geom_point(aes(y = 'mean_var'), size = 2)
     + geom_path(aes(y = 'mean_var'))
     + geom_smooth(aes(y = 'mean_var'), method = 'lm', linetype = 'dashed')
     + annotate("point", x = edge_to_0, y = cell_div_count, color = 'red', size = 2)
     + labs(x = 'Edge position', y = ref_var)
     + geom_col(aes(y = 'diff_var', fill = 'cell_div'))
     + scale_fill_manual(values = ['darkgrey', 'red'], guide = False)
     + theme_std)

    return cell_div_plot
Example #26
0
def plot_portfolio_stock_performance(ld: LazyDictionary,
                                     figure_width: int = 12,
                                     date_text_size=7) -> p9.ggplot:

    df = ld["df"]
    df = df[df["stock_cost"] > 0.0]

    # latest_date = df.iloc[-1, 6]
    # latest_profit = df[df["date"] == latest_date]
    # print(df)
    pivoted_df = df.pivot(index="stock", columns="date", values="stock_profit")
    latest_date = pivoted_df.columns[-1]
    # print(latest_date)
    mean_profit = pivoted_df.mean(axis=1)
    n_stocks = len(mean_profit)
    # if we want ~4 stocks per facet plot, then we need to specify the appropriate calculation for df.qcut()
    bins = pd.qcut(mean_profit, int(100 / n_stocks) + 1)
    # print(bins)
    df = df.merge(bins.to_frame(name="bins"),
                  left_on="stock",
                  right_index=True)
    # print(df)
    textual_df = df[df["date"] == latest_date]
    # print(textual_df)
    # melted_df = make_portfolio_dataframe(df, melt=True)

    plot = (p9.ggplot(
        df, p9.aes("date", "stock_profit", group="stock", colour="stock")) +
            p9.geom_smooth(size=1.0, span=0.3, se=False) +
            p9.facet_wrap("~bins", ncol=1, nrow=len(bins), scales="free_y") +
            p9.geom_text(
                p9.aes(x="date", y="stock_profit", label="stock"),
                color="black",
                size=9,
                data=textual_df,
                position=p9.position_jitter(width=10, height=10),
            ))
    return user_theme(
        plot,
        y_axis_label="$ AUD",
        figure_size=(figure_width, int(len(bins) * 1.2)),
        axis_text_x=p9.element_text(angle=30, size=date_text_size),
    )
Example #27
0
def plot_company_rank(df: pd.DataFrame):
    # assert 'sector' in df.columns
    n_bin = len(df["bin"].unique())
    #print(df)
    plot = (
        p9.ggplot(df, p9.aes("date", "rank", group="asx_code", color="asx_code"))
        + p9.geom_smooth(span=0.3, se=False)
        + p9.geom_text(
            p9.aes(label="asx_code", x="x", y="y"),
            nudge_x=1.2,
            size=6,
            show_legend=False,
        )
        + p9.xlab("")
        + p9.facet_wrap("~bin", nrow=n_bin, ncol=1, scales="free_y")
        + p9.theme(
            axis_text_x=p9.element_text(angle=30, size=7),
            figure_size=(8, 20),
            subplots_adjust={"right": 0.8},
        )
    )
    return plot_as_inline_html_data(plot)
Example #28
0
def plot_series(
    df,
    x=None,
    y=None,
    tick_text_size=6,
    line_size=1.5,
    y_axis_label="Point score",
    x_axis_label="",
    color="stock",
    use_smooth_line=False,
):
    if df is None or len(df) < 1:
        return None

    assert len(x) > 0 and len(y) > 0
    assert line_size > 0.0
    assert isinstance(tick_text_size, int) and tick_text_size > 0
    assert y_axis_label is not None
    assert x_axis_label is not None
    args = {"x": x, "y": y}
    if color:
        args["color"] = color
    plot = p9.ggplot(df, p9.aes(**args))
    if use_smooth_line:
        plot += p9.geom_smooth(
            size=line_size, span=0.2, se=False
        )  # plotnine doesnt support confidence intervals with Loess smoothings, so se=False
    else:
        plot += p9.geom_line(size=line_size)
    return user_theme(
        plot,
        x_axis_label=x_axis_label,
        y_axis_label=y_axis_label,
        axis_text_x=p9.element_text(angle=30, size=tick_text_size),
        axis_text_y=p9.element_text(size=tick_text_size),
    )
Example #29
0
from plotnine.data import economics
from plotnine import ggplot, aes, facet_grid, labs, geom_point, geom_smooth, xlab, ylab

g=(ggplot(economics)
        + aes(x="date", y="uempmed")
        + geom_point()
        + geom_smooth(color="red", span=0.5)
        + xlab("date (year)")
        + ylab("unemploynment"))

g.save("19.png")
Example #30
0
 def test_mavg(self):
     p = self.p + geom_smooth(aes(y='y_noisy'), method='mavg',
                              method_args={'window': 10})
     p.draw_test()
Example #31
0
def label_x(dates):
    res = [(datetime.datetime(2018, 1, 1) + datetime.timedelta(x)).strftime("%d-%m") for x in dates]
    print(res)
    return res


(ggplot(data=res, mapping=aes(x='julian', y='value', colour='type'))
    + xlab("Day")
    + ylab("Mean number of detected songs")
    + facet_grid("type~", scales="free")
    # + geom_line()
	# + facet_wrap("type", nrow=2, ncol=1)
    + geom_point()
    # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std"))
    + geom_smooth(method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1})
	+ scale_colour_manual(values=cbbPalette, guide=False)
    + scale_x_continuous(labels=label_x)).save("figs/song_events_aci_BARROW_mean_smoothed.png", height=10, width=16, dpi=150)

(ggplot(data=res, mapping=aes(x='julian', y='n_events_sum', colour='site'))
    + xlab("Day")
    + ylab("Total number of detected songs")
    # + facet_grid("site~", scales="free")
	# + facet_wrap("site", nrow=2, ncol=3)
    + geom_point()
    # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std"))
    + geom_smooth(method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1})
	+ scale_colour_manual(values=cbbPalette, guide=False)
    + scale_x_continuous(labels=label_x)).save("figs/song_events_BARW0_sum.png", height=10, width=16, dpi=150)

#################
def plot_scatter(dat, figsize=(16, 12)):
    return (pn.ggplot(dat, pn.aes(x='val', y='response')) + pn.geom_point() +
            pn.geom_smooth(method='lm') +
            pn.facet_wrap("var", scales='free_x') + pn.theme_bw() +
            pn.theme(figure_size=figsize, subplots_adjust={'hspace': 0.25}))
Example #33
0
                iglo.julian.max() + 2)
hatch_lbl_pos = hatch_start + (hatch_end - hatch_start) / 2

xmin = min(inc_start, iglo.julian.min())
xmax = min(iglo_nest[iglo_nest.type == "hatch"].julian.max(),
           iglo.julian.max() + 2)

(ggplot(data=iglo, mapping=aes(x='julian', y='ACI_mean', colour='site'))
 #+ facet_grid("panel~", scales="free")
 + xlab("Day") + ylab("Mean daily ACI (standardized)") + geom_point() +
 theme(legend_position="none")
 # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std"))
 + geom_smooth(method="mavg",
               se=False,
               method_args={
                   "window": 4,
                   "center": True,
                   "min_periods": 1
               }) + annotate("rect",
                             xmin=[inc_start, hatch_start],
                             xmax=[inc_end, hatch_end],
                             ymin=-math.inf,
                             ymax=math.inf,
                             alpha=0.1,
                             fill=["red", "blue"]) +
 annotate("text",
          x=[inc_lbl_pos, hatch_lbl_pos],
          y=1.8,
          label=["Incubation initiation", "Hatch"])
 # + geom_line(data = inc, mapping=aes(x="julian", y="uniqueID"), colour="black")
 #    + geom_smooth(data=inc, mapping=aes(x="julian", y="uniqueID"), colour="black", method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1})
Example #34
0
def eval(fold=BUZZER_DEV_FOLD):
    if not os.path.isdir(report_dir):
        os.mkdir(report_dir)

    valid = read_data(fold)
    print('# {} data: {}'.format(fold, len(valid)))
    valid_iter = chainer.iterators.SerialIterator(valid,
                                                  args.batch_size,
                                                  repeat=False,
                                                  shuffle=False)

    args.n_input = valid[0][1][0].shape[0]
    model = RNNBuzzer(args.n_input, args.n_layers, args.n_hidden,
                      args.n_output, args.dropout)
    chainer.serializers.load_npz(args.model_path, model)
    if args.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    predictions = []
    buzzes = dict()
    for batch in tqdm(valid_iter):
        qids, vectors, labels, positions = list(map(list, zip(*batch)))
        batch = convert_seq(batch, device=args.gpu)
        preds = model.predict(batch['xs'], softmax=True)
        preds = [p.tolist() for p in preds]
        predictions.extend(preds)
        for i in range(len(qids)):
            buzzes[qids[i]] = []
            for pos, pred in zip(positions[i], preds[i]):
                buzzes[qids[i]].append((pos, pred))
            buzzes[qids[i]] = list(map(list, zip(*buzzes[qids[i]])))

    buzz_dir = os.path.join(buzzes_dir.format(fold))
    with open(buzz_dir, 'wb') as f:
        pickle.dump(buzzes, f)

    results = dict()
    for example_idx in range(len(valid)):
        qid, vectors, labels, positions = valid[example_idx]
        preds = predictions[example_idx]
        q_len = positions[-1]
        for i, pos in enumerate(positions):
            rel_pos = int(100 * pos / q_len)
            if rel_pos not in results:
                results[rel_pos] = []
            results[rel_pos].append((labels[i], preds[i][1]))

    freq = {'x': [], 'y': [], 'type': []}
    for k, rs in results.items():
        rs, scores = list(map(list, zip(*rs)))
        freq['x'].append(k / 100)
        freq['y'].append(sum(rs) / len(rs))
        freq['type'].append('acc')

        freq['x'].append(k / 100)
        freq['y'].append(sum(x > 0.5 for x in scores) / len(scores))
        freq['type'].append('0.5')

        freq['x'].append(k / 100)
        freq['y'].append(sum(x > 0.3 for x in scores) / len(scores))
        freq['type'].append('0.3')

        freq['x'].append(k / 100)
        freq['y'].append(sum(x > 0.7 for x in scores) / len(scores))
        freq['type'].append('0.7')
    freq_df = pd.DataFrame(freq)

    p0 = ggplot(freq_df) + geom_smooth(aes(x='x', y='y', color='type'))
    p0.save(os.path.join(report_dir, '{}_acc_buzz.pdf'.format(fold)))

    stack_freq = {'x': [], 'y': [], 'type': []}
    threshold = 0.5
    for k, rs in results.items():
        num = len(rs)
        only_oracle = sum((c == 1 and b <= threshold) for c, b in rs)
        only_buzzer = sum((c == 0 and b > threshold) for c, b in rs)
        both = sum((c == 1 and b > threshold) for c, b in rs)
        neither = sum((c == 0 and b <= threshold) for c, b in rs)

        stack_freq['x'].append(k / 100)
        stack_freq['y'].append(only_oracle / num)
        stack_freq['type'].append('only_oracle')

        stack_freq['x'].append(k / 100)
        stack_freq['y'].append(only_buzzer / num)
        stack_freq['type'].append('only_buzzer')

        stack_freq['x'].append(k / 100)
        stack_freq['y'].append(both / num)
        stack_freq['type'].append('both')

        stack_freq['x'].append(k / 100)
        stack_freq['y'].append(neither / num)
        stack_freq['type'].append('neither')

    stack_freq_df = pd.DataFrame(stack_freq)

    p1 = ggplot(stack_freq_df) + geom_area(aes(x='x', y='y', fill='type'))
    p1.save(os.path.join(report_dir, '{}_stack_area.pdf'.format(fold)))
def quick_color_check(target_matrix, source_matrix, num_chips):
    """ Quickly plot target matrix values against source matrix values to determine
    over saturated color chips or other issues.

    Inputs:
    source_matrix      = a 22x4 matrix containing the average red value, average green value, and
                             average blue value for each color chip of the source image
    target_matrix      = a 22x4 matrix containing the average red value, average green value, and
                             average blue value for each color chip of the target image
    num_chips          = number of color card chips included in the matrices (integer)

    :param source_matrix: numpy.ndarray
    :param target_matrix: numpy.ndarray
    :param num_chips: int
    """
    # Imports
    from plotnine import ggplot, geom_point, geom_smooth, theme_seaborn, facet_grid, geom_label, scale_x_continuous, \
        scale_y_continuous, scale_color_manual, aes
    import pandas as pd

    # Extract and organize matrix info
    tr = target_matrix[:num_chips, 1:2]
    tg = target_matrix[:num_chips, 2:3]
    tb = target_matrix[:num_chips, 3:4]
    sr = source_matrix[:num_chips, 1:2]
    sg = source_matrix[:num_chips, 2:3]
    sb = source_matrix[:num_chips, 3:4]

    # Create columns of color labels
    red = []
    blue = []
    green = []
    for i in range(num_chips):
        red.append('red')
        blue.append('blue')
        green.append('green')

    # Make a column of chip numbers
    chip = np.arange(0, num_chips).reshape((num_chips, 1))
    chips = np.row_stack((chip, chip, chip))

    # Combine info
    color_data_r = np.column_stack((sr, tr, red))
    color_data_g = np.column_stack((sg, tg, green))
    color_data_b = np.column_stack((sb, tb, blue))
    all_color_data = np.row_stack((color_data_b, color_data_g, color_data_r))

    # Create a dataframe with headers
    dataset = pd.DataFrame({'source': all_color_data[:, 0], 'target': all_color_data[:, 1],
                            'color': all_color_data[:, 2]})

    # Add chip numbers to the dataframe
    dataset['chip'] = chips
    dataset = dataset.astype({'color': str, 'chip': str, 'target': float, 'source': float})

    # Make the plot
    p1 = ggplot(dataset, aes(x='target', y='source', color='color', label='chip')) + \
        geom_point(show_legend=False, size=2) + \
        geom_smooth(method='lm', size=.5, show_legend=False) + \
        theme_seaborn() + facet_grid('.~color') + \
        geom_label(angle=15, size=7, nudge_y=-.25, nudge_x=.5, show_legend=False) + \
        scale_x_continuous(limits=(-5, 270)) + scale_y_continuous(limits=(-5, 275)) + \
        scale_color_manual(values=['blue', 'green', 'red'])

    # Reset debug
    if params.debug is not None:
        if params.debug == 'print':
            p1.save(os.path.join(params.debug_outdir, 'color_quick_check.png'))
        elif params.debug == 'plot':
            print(p1)
Example #36
0
from plotnine.data import economics
from plotnine import ggplot, aes, facet_grid, labs, geom_point, geom_smooth, xlab, ylab

g=(ggplot(economics)
        + aes(x="date", y="uempmed")
        + geom_point()
        + geom_smooth(color="red")
        + xlab("date (year)")
        + ylab("unemploynment"))

g.save("18.png")
Example #37
0
    res = [(datetime.datetime(2018, 1, 1) +
            datetime.timedelta(x)).strftime("%d-%m") for x in dates]
    print(res)
    return res


(ggplot(data=res, mapping=aes(x='julian', y='value', colour='site')) +
 xlab("Day") + ylab("Mean number of detected songs")
 # + facet_grid("site~", scales="free")
 # + geom_line()
 + facet_wrap("site", nrow=6, ncol=2, scales="free_y") + geom_point()
 # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std"))
 + geom_smooth(method="mavg",
               se=False,
               method_args={
                   "window": 4,
                   "center": True,
                   "min_periods": 1
               }) +  # + scale_colour_manual(values=cbbPalette, guide=False)
 scale_x_continuous(labels=label_x)).save("figs/song_events_all_smoothed.png",
                                          height=10,
                                          width=16,
                                          dpi=150)

(ggplot(data=res, mapping=aes(x='julian', y='value', colour='site')) +
 xlab("Day") + ylab("Mean number of detected songs")
 # + facet_grid("site~", scales="free")
 + geom_line() + facet_wrap("site", nrow=6, ncol=2, scales="free_y") +
 geom_point()
 # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std"))
 +  # + geom_smooth(method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1})
Example #38
0
 def test_gls(self):
     p = self.p + geom_smooth(aes(y='y_noisy'), method='gls')
     p.draw_test()
Example #39
0
 def test_lowess(self):
     p = self.p + geom_smooth(aes(y='y_noisy'), method='lowess')
     with pytest.warns(UserWarning):
         p.draw_test()
Example #40
0
def test_discrete_x():
    p = (ggplot(df_discrete_x, aes('x', 'y'))
         + geom_point()
         + geom_smooth(color='blue'))

    assert p == 'discrete_x'
Example #41
0
def test_discrete_x_fullrange():
    p = (ggplot(df_discrete_x, aes('x', 'y'))
         + geom_point()
         + geom_smooth(color='blue', fullrange=True))

    assert p == 'discrete_x_fullrange'
Example #42
0
def quick_color_check(target_matrix, source_matrix, num_chips):
    """ Quickly plot target matrix values against source matrix values to determine
    over saturated color chips or other issues.

    Inputs:
    source_matrix      = a 22x4 matrix containing the average red value, average green value, and
                             average blue value for each color chip of the source image
    target_matrix      = a 22x4 matrix containing the average red value, average green value, and
                             average blue value for each color chip of the target image
    num_chips          = number of color card chips included in the matrices (integer)

    :param source_matrix: numpy.ndarray
    :param target_matrix: numpy.ndarray
    :param num_chips: int
    """
    # Imports
    from plotnine import ggplot, geom_point, geom_smooth, theme_seaborn, facet_grid, geom_label, scale_x_continuous, \
        scale_y_continuous, scale_color_manual, aes
    import pandas as pd

    # Extract and organize matrix info
    tr = target_matrix[:num_chips, 1:2]
    tg = target_matrix[:num_chips, 2:3]
    tb = target_matrix[:num_chips, 3:4]
    sr = source_matrix[:num_chips, 1:2]
    sg = source_matrix[:num_chips, 2:3]
    sb = source_matrix[:num_chips, 3:4]

    # Create columns of color labels
    red = []
    blue = []
    green = []
    for i in range(num_chips):
        red.append('red')
        blue.append('blue')
        green.append('green')

    # Make a column of chip numbers
    chip = np.arange(0, num_chips).reshape((num_chips, 1))
    chips = np.row_stack((chip, chip, chip))

    # Combine info
    color_data_r = np.column_stack((sr, tr, red))
    color_data_g = np.column_stack((sg, tg, green))
    color_data_b = np.column_stack((sb, tb, blue))
    all_color_data = np.row_stack((color_data_b, color_data_g, color_data_r))

    # Create a dataframe with headers
    dataset = pd.DataFrame({
        'source': all_color_data[:, 0],
        'target': all_color_data[:, 1],
        'color': all_color_data[:, 2]
    })

    # Add chip numbers to the dataframe
    dataset['chip'] = chips
    dataset = dataset.astype({
        'color': str,
        'chip': str,
        'target': float,
        'source': float
    })

    # Make the plot
    p1 = ggplot(dataset, aes(x='target', y='source', color='color', label='chip')) + \
        geom_point(show_legend=False, size=2) + \
        geom_smooth(method='lm', size=.5, show_legend=False) + \
        theme_seaborn() + facet_grid('.~color') + \
        geom_label(angle=15, size=7, nudge_y=-.25, nudge_x=.5, show_legend=False) + \
        scale_x_continuous(limits=(-5, 270)) + scale_y_continuous(limits=(-5, 275)) + \
        scale_color_manual(values=['blue', 'green', 'red'])

    # Autoincrement the device counter
    params.device += 1

    # Reset debug
    if params.debug is not None:
        if params.debug == 'print':
            p1.save(os.path.join(params.debug_outdir, 'color_quick_check.png'))
        elif params.debug == 'plot':
            print(p1)
Example #43
0
def test_sorts_by_x():
    df = pd.DataFrame({'x': [5, 0, 1, 2, 3, 4],
                       'y': range(6)})
    p = ggplot(df, aes('x', 'y')) + geom_smooth(stat='identity')

    assert p == 'sorts_by_x'