def plot_char_percent_vs_accuracy_smooth(self, category=False): if category: return (ggplot(self.char_plot_df) + aes(x='char_percent', y='correct', color='category_jmlr') + geom_smooth()) else: return (ggplot(self.char_plot_df) + aes(x='char_percent', y='correct') + geom_smooth(method='mavg'))
def plot_char_percent_vs_accuracy_smooth(self, category=False): if category: return ( ggplot(self.char_plot_df) + aes(x="char_percent", y="correct", color="category_jmlr") + geom_smooth() ) else: return ( ggplot(self.char_plot_df) + aes(x="char_percent", y="correct") + geom_smooth(method="mavg") )
def plot_char_percent_vs_accuracy_smooth(self, category=False): if category: return ( ggplot(self.char_plot_df) + aes(x='char_percent', y='correct', color='category_jmlr') + geom_smooth() ) else: return ( ggplot(self.char_plot_df) + aes(x='char_percent', y='correct') + geom_smooth(method='mavg') )
def test_non_linear_smooth_no_ci(): p = ( ggplot(df_linear, aes('x')) + geom_point(aes(y='y_noisy')) + geom_smooth( aes(y='y_noisy'), method='loess', span=.3, color='blue', se=False)) assert p == 'non_linear_smooth_no_ci'
def duration_TL(Data): print('======= Creating duration_TL =======') x = Data.Duration[pd.isna(Data.Duration) == True] if ((len(x)+10)) >= len(Data): print("WARNING: All values for Duration are NA's") else: #Filter Symptomes and Correct Durations Symptomes = Data[(Data.Group == "sy") & (Data.Duration < 180)] #Setting data with missing times Symptomes['Date'] = pd.to_datetime(Symptomes['Date']) if len(Symptomes) == 0: print('No duration for TL_2') else: sdate = min(Symptomes["Date"]) # start date edate = max(Symptomes["Date"]) # end date delta = edate - sdate # as timedelta # from datetime import timedelta day = [] for i in range(delta.days + 1): d= sdate + timedelta(days=i) day.append(d) DF = pd.DataFrame(day) DF.columns = ['Date'] data_with_missing_times = pd.merge(DF, Symptomes, on='Date', how='outer') data_with_missing_times.Date = pd.to_datetime(data_with_missing_times.Date) if delta.days > 1825: datebreaks = '18 months' else: if delta.days > 1095: datebreaks = '12 months' else: datebreaks = '6 months' plot = (p9.ggplot(data=data_with_missing_times, mapping=p9.aes(x='Date', y='Duration')) + p9.geom_smooth(color = 'red', size = 5, method="loess", se=False) + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size=33), axis_title = p9.element_text(size = 33,face = 'bold')) + p9.scale_x_datetime(date_labels = '%Y-%m', date_breaks = datebreaks) + p9.labs(x='',y='')) if (len(data_with_missing_times) > 0): plot.save(filename = 'TL_2.jpeg', plot = plot, path = "pdf/iteration/", width = 25, height = 5, dpi = 320) else: print('Plot not created; no data found.') return(print('=================================duration_TL DONE ============================='))
def test_legend_fill_ratio(): p = (ggplot(df_linear, aes('x', color='x<0.5')) + geom_point(aes(y='y_noisy')) + geom_smooth(aes(y='y_noisy'), method='lm', size=0.5, span=.3) ) assert p == 'legend_fill_ratio'
def analyze_encodes() -> pd.DataFrame: dfs = [] for decode_path in DECODES_PATHS: df = pd.read_csv(decode_path) dfs.append(df) df = pd.concat(dfs) df['malformed'] = df['malformed'].astype(int) df['alpha'] = df['num_channels'] / df['num_clients'] plot_df = df.groupby('alpha').mean() plot_df.index.name = 'alpha' plot_df.reset_index(inplace=True) n_alphas = df['alpha'].unique() print(f"Using {n_alphas.shape} different alphas from {len(DECODES_PATHS)} runs") plot = (p9.ggplot(plot_df) + p9.aes('alpha', 'malformed') + p9.geom_point() + p9.geom_smooth(method='lm') + p9.labels.labs(x='Alpha', y='Collision Percentage') ) plot.draw() plt.show() return df
def _plot_regret_single(df: pd.DataFrame) -> gg.ggplot: """Plots the average regret through time for single variable.""" p = (gg.ggplot(df) + gg.aes(x='episode', y='average_regret') + gg.geom_smooth(method=smoothers.mean, span=0.1, size=1.75, alpha=0.1, colour='#313695', fill='#313695')) return p
def plot_series( df, x=None, y=None, tick_text_size=6, line_size=1.5, y_axis_label="Point score", x_axis_label="", color="stock", use_smooth_line=False ): assert len(df) > 0 assert len(x) > 0 and len(y) > 0 assert line_size > 0.0 assert isinstance(tick_text_size, int) and tick_text_size > 0 assert y_axis_label is not None assert x_axis_label is not None args = {'x': x, 'y': y} if color: args['color'] = color plot = p9.ggplot(df, p9.aes(**args)) \ + p9.labs(x=x_axis_label, y=y_axis_label) \ + p9.theme( axis_text_x=p9.element_text(angle=30, size=tick_text_size), axis_text_y=p9.element_text(size=tick_text_size), legend_position="none", ) if use_smooth_line: plot += p9.geom_smooth(size=line_size) else: plot += p9.geom_line(size=line_size) return plot_as_inline_html_data(plot)
def test_continuous_x(): n = len(df_continuous_x) p = (ggplot(df_continuous_x, aes('x', 'y')) + geom_point() + geom_smooth(df_continuous_x[3:n-3], method='loess', color='blue', fullrange=False)) assert p == 'continuous_x'
def test_continuous_x_fullrange(): n = len(df_continuous_x) p = (ggplot(df_continuous_x, aes('x', 'y')) + geom_point() + geom_smooth( df_continuous_x[3:n - 3], method='loess', color='blue', fullrange=True)) assert p == 'continuous_x_fullrange'
def test_gpr(self): try: from sklearn import gaussian_process # noqa:401 except ImportError: return p = self.p + geom_smooth(aes(y='y_noisy'), method='gpr') p.draw_test()
def test_linear_smooth(): p = (ggplot(df_linear, aes('x')) + geom_point(aes(y='y_noisy')) + geom_smooth(aes(y='y_noisy'), method='lm', span=.3, color='blue') ) assert p == 'linear_smooth'
def test_non_linear_smooth_no_ci(): p = (ggplot(df_linear, aes('x')) + geom_point(aes(y='y_noisy')) + geom_smooth(aes(y='y_noisy'), method='loess', span=.3, color='blue', se=False) ) assert p == 'non_linear_smooth_no_ci'
def __plot( self, plot_data, x, y, colour, lbl_x, lbl_y, facet, facet_scales, facet_by, smoothed, points, error_bars, save, ): cbbPalette = [ "#000000", "#E69F00", "#56B4E9", "#009E73", "#0072B2", "#D55E00", "#CC79A7", ] plt = ggplot(data=plot_data, mapping=aes(x=x, y=y, colour=colour)) plt += xlab(lbl_x) plt += ylab(lbl_y) # + facet_grid("site~", scales="free") # + geom_line() if facet: # TODO: use facet as save nrow, ncol = self.get_facet_rows(plot_data, facet_by) plt += facet_wrap(facet_by, nrow=nrow, ncol=ncol, scales=facet_scales) if points: plt += geom_point() if error_bars: # TODO use generic way to compute them pass # self.plt += geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std")) # TODO: use smooth as save if smoothed: plt += geom_smooth( method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1}, ) else: plt += geom_line() plt += scale_colour_manual(values=cbbPalette, guide=False) plt += scale_x_continuous(labels=label_x) plt += theme(figure_size=(15, 18), dpi=150) if save: plt.save(**save) return plt
def _plot_regret_group(df: pd.DataFrame, group_col: str) -> gg.ggplot: """Plots the average regret through time when grouped.""" group_name = group_col.replace('_', ' ') df[group_name] = df[group_col].astype('category') p = (gg.ggplot(df) + gg.aes(x='episode', y='average_regret', group=group_name, colour=group_name, fill=group_name) + gg.geom_smooth(method=smoothers.mean, span=0.1, size=1.75, alpha=0.1) + gg.scale_colour_manual(values=FIVE_COLOURS) + gg.scale_fill_manual(values=FIVE_COLOURS)) return p
def plot_and_save(scale_data_df_cleaned, smooth_factor, temp_file_name): fasting_start = to_datetime('2019-10-15') plot_output = ( ggplot(scale_data_df_cleaned, aes(x='timestamp', y='weight')) + # facet_wrap('~', ncol = 1, scales = 'free') + geom_point(size=0.5) + geom_smooth(span=smooth_factor, color='red') + geom_vline(aes(xintercept=fasting_start), color='blue', size=1.2) + geom_label(aes(x=to_datetime('2019-11-30'), y=max(scale_data_df_cleaned.loc[:, 'weight'])), label='IF starts!', size=15)) plot_output.save(temp_file_name, width=13, height=10, dpi=80)
def plot_replicates_lowess_regression_smoothing(self): """ Applies a lowess smoothing regression to replicates plot in order to estimate the true function. """ from plotnine import ggplot, ylab, xlab, geom_line, aes, geom_smooth, theme_bw, scale_color_grey plot = ((ggplot(self.data, aes('Time', 'Current', color='Channel')) + ylab(u'Current (μA)') + xlab('Time (seconds)') + geom_line() + geom_smooth(span=self.span, method='lowess'))) print(plot) return plot
def plot_company_rank(df): assert isinstance(df, pd.DataFrame) #assert 'sector' in df.columns n_bin = len(df['bin'].unique()) plot = (p9.ggplot( df, p9.aes('date', 'rank', group='asx_code', color='sector')) + p9.geom_smooth(span=0.3, se=False) + p9.geom_text(p9.aes(label='asx_code', x='x', y='y'), nudge_x=1.2, size=6, show_legend=False) + p9.xlab('') + p9.facet_wrap('~bin', nrow=n_bin, ncol=1, scales="free_y") + p9.theme(axis_text_x=p9.element_text(angle=30, size=7), figure_size=(8, 20), subplots_adjust={'right': 0.8})) return plot_as_inline_html_data(plot)
def test_init_and_fit_kwargs(): df = pd.DataFrame({ 'x': np.arange(11), 'y': [0, 0, 0, 0.05, 0.25, 0.5, 0.75, 0.95, 1, 1, 1] }) p = ( ggplot(df, aes('x', 'y')) + geom_point() + geom_smooth( method='glm', method_args={ 'family': sm.families.Binomial(), # init parameter 'method': 'minimize' # fit parameter }, se=False)) assert p == 'init_and_fit_kwargs'
def plot_company_rank(ld: LazyDictionary) -> p9.ggplot: df = ld["rank"] # assert 'sector' in df.columns n_bin = len(df["bin"].unique()) # print(df) plot = (p9.ggplot( df, p9.aes("date", "rank", group="asx_code", color="asx_code")) + p9.geom_smooth(span=0.3, se=False) + p9.geom_text( p9.aes(label="asx_code", x="x", y="y"), nudge_x=1.2, size=6, show_legend=False, ) + p9.facet_wrap("~bin", nrow=n_bin, ncol=1, scales="free_y")) return user_theme( plot, figure_size=(12, 20), subplots_adjust={"right": 0.8}, )
def cell_division(adata): """ Plots total_counts as a function of the principal circle nodes to visualize the moment of cell division. Parameters ---------------- adata: AnnData The AnnData object being used for the analysis. Must be previously evaluated by `tl.celldiv_moment`. Returns ------------ A plotnine line-plot to help visualize the moment of cell division and direction of the cell cycle. If method = 'counts' when tl.celldiv_moment was run, cell division is defined by the largest drop in total_counts. The changes in counts are represented by the bars at the bottom, and the suggested moment of cell division is marked in red. The cell cycle should follow an incremental increase in total counts until around the moment of cell division. Alternatively, if method='g2m' in tl.celldiv_moment, the G2-M signature dynamics are used to define the moment of cell division. """ ref_var = adata.uns['scycle']['cell_div_moment']['ref_var'] edge_to_0 = adata.uns['scycle']['cell_div_moment']['cell_div_edge'][0] edges = adata.uns['princirc_gr']['edges'] edges['cell_div'] = edges['e1'] == edge_to_0 cell_div_count = edges[edges['e1'] == edge_to_0]['mean_var'] cell_div_plot = (ggplot(edges, aes('e1', 'mean_var')) + geom_point(aes(y = 'mean_var'), size = 2) + geom_path(aes(y = 'mean_var')) + geom_smooth(aes(y = 'mean_var'), method = 'lm', linetype = 'dashed') + annotate("point", x = edge_to_0, y = cell_div_count, color = 'red', size = 2) + labs(x = 'Edge position', y = ref_var) + geom_col(aes(y = 'diff_var', fill = 'cell_div')) + scale_fill_manual(values = ['darkgrey', 'red'], guide = False) + theme_std) return cell_div_plot
def plot_portfolio_stock_performance(ld: LazyDictionary, figure_width: int = 12, date_text_size=7) -> p9.ggplot: df = ld["df"] df = df[df["stock_cost"] > 0.0] # latest_date = df.iloc[-1, 6] # latest_profit = df[df["date"] == latest_date] # print(df) pivoted_df = df.pivot(index="stock", columns="date", values="stock_profit") latest_date = pivoted_df.columns[-1] # print(latest_date) mean_profit = pivoted_df.mean(axis=1) n_stocks = len(mean_profit) # if we want ~4 stocks per facet plot, then we need to specify the appropriate calculation for df.qcut() bins = pd.qcut(mean_profit, int(100 / n_stocks) + 1) # print(bins) df = df.merge(bins.to_frame(name="bins"), left_on="stock", right_index=True) # print(df) textual_df = df[df["date"] == latest_date] # print(textual_df) # melted_df = make_portfolio_dataframe(df, melt=True) plot = (p9.ggplot( df, p9.aes("date", "stock_profit", group="stock", colour="stock")) + p9.geom_smooth(size=1.0, span=0.3, se=False) + p9.facet_wrap("~bins", ncol=1, nrow=len(bins), scales="free_y") + p9.geom_text( p9.aes(x="date", y="stock_profit", label="stock"), color="black", size=9, data=textual_df, position=p9.position_jitter(width=10, height=10), )) return user_theme( plot, y_axis_label="$ AUD", figure_size=(figure_width, int(len(bins) * 1.2)), axis_text_x=p9.element_text(angle=30, size=date_text_size), )
def plot_company_rank(df: pd.DataFrame): # assert 'sector' in df.columns n_bin = len(df["bin"].unique()) #print(df) plot = ( p9.ggplot(df, p9.aes("date", "rank", group="asx_code", color="asx_code")) + p9.geom_smooth(span=0.3, se=False) + p9.geom_text( p9.aes(label="asx_code", x="x", y="y"), nudge_x=1.2, size=6, show_legend=False, ) + p9.xlab("") + p9.facet_wrap("~bin", nrow=n_bin, ncol=1, scales="free_y") + p9.theme( axis_text_x=p9.element_text(angle=30, size=7), figure_size=(8, 20), subplots_adjust={"right": 0.8}, ) ) return plot_as_inline_html_data(plot)
def plot_series( df, x=None, y=None, tick_text_size=6, line_size=1.5, y_axis_label="Point score", x_axis_label="", color="stock", use_smooth_line=False, ): if df is None or len(df) < 1: return None assert len(x) > 0 and len(y) > 0 assert line_size > 0.0 assert isinstance(tick_text_size, int) and tick_text_size > 0 assert y_axis_label is not None assert x_axis_label is not None args = {"x": x, "y": y} if color: args["color"] = color plot = p9.ggplot(df, p9.aes(**args)) if use_smooth_line: plot += p9.geom_smooth( size=line_size, span=0.2, se=False ) # plotnine doesnt support confidence intervals with Loess smoothings, so se=False else: plot += p9.geom_line(size=line_size) return user_theme( plot, x_axis_label=x_axis_label, y_axis_label=y_axis_label, axis_text_x=p9.element_text(angle=30, size=tick_text_size), axis_text_y=p9.element_text(size=tick_text_size), )
from plotnine.data import economics from plotnine import ggplot, aes, facet_grid, labs, geom_point, geom_smooth, xlab, ylab g=(ggplot(economics) + aes(x="date", y="uempmed") + geom_point() + geom_smooth(color="red", span=0.5) + xlab("date (year)") + ylab("unemploynment")) g.save("19.png")
def test_mavg(self): p = self.p + geom_smooth(aes(y='y_noisy'), method='mavg', method_args={'window': 10}) p.draw_test()
def label_x(dates): res = [(datetime.datetime(2018, 1, 1) + datetime.timedelta(x)).strftime("%d-%m") for x in dates] print(res) return res (ggplot(data=res, mapping=aes(x='julian', y='value', colour='type')) + xlab("Day") + ylab("Mean number of detected songs") + facet_grid("type~", scales="free") # + geom_line() # + facet_wrap("type", nrow=2, ncol=1) + geom_point() # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std")) + geom_smooth(method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1}) + scale_colour_manual(values=cbbPalette, guide=False) + scale_x_continuous(labels=label_x)).save("figs/song_events_aci_BARROW_mean_smoothed.png", height=10, width=16, dpi=150) (ggplot(data=res, mapping=aes(x='julian', y='n_events_sum', colour='site')) + xlab("Day") + ylab("Total number of detected songs") # + facet_grid("site~", scales="free") # + facet_wrap("site", nrow=2, ncol=3) + geom_point() # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std")) + geom_smooth(method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1}) + scale_colour_manual(values=cbbPalette, guide=False) + scale_x_continuous(labels=label_x)).save("figs/song_events_BARW0_sum.png", height=10, width=16, dpi=150) #################
def plot_scatter(dat, figsize=(16, 12)): return (pn.ggplot(dat, pn.aes(x='val', y='response')) + pn.geom_point() + pn.geom_smooth(method='lm') + pn.facet_wrap("var", scales='free_x') + pn.theme_bw() + pn.theme(figure_size=figsize, subplots_adjust={'hspace': 0.25}))
iglo.julian.max() + 2) hatch_lbl_pos = hatch_start + (hatch_end - hatch_start) / 2 xmin = min(inc_start, iglo.julian.min()) xmax = min(iglo_nest[iglo_nest.type == "hatch"].julian.max(), iglo.julian.max() + 2) (ggplot(data=iglo, mapping=aes(x='julian', y='ACI_mean', colour='site')) #+ facet_grid("panel~", scales="free") + xlab("Day") + ylab("Mean daily ACI (standardized)") + geom_point() + theme(legend_position="none") # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std")) + geom_smooth(method="mavg", se=False, method_args={ "window": 4, "center": True, "min_periods": 1 }) + annotate("rect", xmin=[inc_start, hatch_start], xmax=[inc_end, hatch_end], ymin=-math.inf, ymax=math.inf, alpha=0.1, fill=["red", "blue"]) + annotate("text", x=[inc_lbl_pos, hatch_lbl_pos], y=1.8, label=["Incubation initiation", "Hatch"]) # + geom_line(data = inc, mapping=aes(x="julian", y="uniqueID"), colour="black") # + geom_smooth(data=inc, mapping=aes(x="julian", y="uniqueID"), colour="black", method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1})
def eval(fold=BUZZER_DEV_FOLD): if not os.path.isdir(report_dir): os.mkdir(report_dir) valid = read_data(fold) print('# {} data: {}'.format(fold, len(valid))) valid_iter = chainer.iterators.SerialIterator(valid, args.batch_size, repeat=False, shuffle=False) args.n_input = valid[0][1][0].shape[0] model = RNNBuzzer(args.n_input, args.n_layers, args.n_hidden, args.n_output, args.dropout) chainer.serializers.load_npz(args.model_path, model) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() predictions = [] buzzes = dict() for batch in tqdm(valid_iter): qids, vectors, labels, positions = list(map(list, zip(*batch))) batch = convert_seq(batch, device=args.gpu) preds = model.predict(batch['xs'], softmax=True) preds = [p.tolist() for p in preds] predictions.extend(preds) for i in range(len(qids)): buzzes[qids[i]] = [] for pos, pred in zip(positions[i], preds[i]): buzzes[qids[i]].append((pos, pred)) buzzes[qids[i]] = list(map(list, zip(*buzzes[qids[i]]))) buzz_dir = os.path.join(buzzes_dir.format(fold)) with open(buzz_dir, 'wb') as f: pickle.dump(buzzes, f) results = dict() for example_idx in range(len(valid)): qid, vectors, labels, positions = valid[example_idx] preds = predictions[example_idx] q_len = positions[-1] for i, pos in enumerate(positions): rel_pos = int(100 * pos / q_len) if rel_pos not in results: results[rel_pos] = [] results[rel_pos].append((labels[i], preds[i][1])) freq = {'x': [], 'y': [], 'type': []} for k, rs in results.items(): rs, scores = list(map(list, zip(*rs))) freq['x'].append(k / 100) freq['y'].append(sum(rs) / len(rs)) freq['type'].append('acc') freq['x'].append(k / 100) freq['y'].append(sum(x > 0.5 for x in scores) / len(scores)) freq['type'].append('0.5') freq['x'].append(k / 100) freq['y'].append(sum(x > 0.3 for x in scores) / len(scores)) freq['type'].append('0.3') freq['x'].append(k / 100) freq['y'].append(sum(x > 0.7 for x in scores) / len(scores)) freq['type'].append('0.7') freq_df = pd.DataFrame(freq) p0 = ggplot(freq_df) + geom_smooth(aes(x='x', y='y', color='type')) p0.save(os.path.join(report_dir, '{}_acc_buzz.pdf'.format(fold))) stack_freq = {'x': [], 'y': [], 'type': []} threshold = 0.5 for k, rs in results.items(): num = len(rs) only_oracle = sum((c == 1 and b <= threshold) for c, b in rs) only_buzzer = sum((c == 0 and b > threshold) for c, b in rs) both = sum((c == 1 and b > threshold) for c, b in rs) neither = sum((c == 0 and b <= threshold) for c, b in rs) stack_freq['x'].append(k / 100) stack_freq['y'].append(only_oracle / num) stack_freq['type'].append('only_oracle') stack_freq['x'].append(k / 100) stack_freq['y'].append(only_buzzer / num) stack_freq['type'].append('only_buzzer') stack_freq['x'].append(k / 100) stack_freq['y'].append(both / num) stack_freq['type'].append('both') stack_freq['x'].append(k / 100) stack_freq['y'].append(neither / num) stack_freq['type'].append('neither') stack_freq_df = pd.DataFrame(stack_freq) p1 = ggplot(stack_freq_df) + geom_area(aes(x='x', y='y', fill='type')) p1.save(os.path.join(report_dir, '{}_stack_area.pdf'.format(fold)))
def quick_color_check(target_matrix, source_matrix, num_chips): """ Quickly plot target matrix values against source matrix values to determine over saturated color chips or other issues. Inputs: source_matrix = a 22x4 matrix containing the average red value, average green value, and average blue value for each color chip of the source image target_matrix = a 22x4 matrix containing the average red value, average green value, and average blue value for each color chip of the target image num_chips = number of color card chips included in the matrices (integer) :param source_matrix: numpy.ndarray :param target_matrix: numpy.ndarray :param num_chips: int """ # Imports from plotnine import ggplot, geom_point, geom_smooth, theme_seaborn, facet_grid, geom_label, scale_x_continuous, \ scale_y_continuous, scale_color_manual, aes import pandas as pd # Extract and organize matrix info tr = target_matrix[:num_chips, 1:2] tg = target_matrix[:num_chips, 2:3] tb = target_matrix[:num_chips, 3:4] sr = source_matrix[:num_chips, 1:2] sg = source_matrix[:num_chips, 2:3] sb = source_matrix[:num_chips, 3:4] # Create columns of color labels red = [] blue = [] green = [] for i in range(num_chips): red.append('red') blue.append('blue') green.append('green') # Make a column of chip numbers chip = np.arange(0, num_chips).reshape((num_chips, 1)) chips = np.row_stack((chip, chip, chip)) # Combine info color_data_r = np.column_stack((sr, tr, red)) color_data_g = np.column_stack((sg, tg, green)) color_data_b = np.column_stack((sb, tb, blue)) all_color_data = np.row_stack((color_data_b, color_data_g, color_data_r)) # Create a dataframe with headers dataset = pd.DataFrame({'source': all_color_data[:, 0], 'target': all_color_data[:, 1], 'color': all_color_data[:, 2]}) # Add chip numbers to the dataframe dataset['chip'] = chips dataset = dataset.astype({'color': str, 'chip': str, 'target': float, 'source': float}) # Make the plot p1 = ggplot(dataset, aes(x='target', y='source', color='color', label='chip')) + \ geom_point(show_legend=False, size=2) + \ geom_smooth(method='lm', size=.5, show_legend=False) + \ theme_seaborn() + facet_grid('.~color') + \ geom_label(angle=15, size=7, nudge_y=-.25, nudge_x=.5, show_legend=False) + \ scale_x_continuous(limits=(-5, 270)) + scale_y_continuous(limits=(-5, 275)) + \ scale_color_manual(values=['blue', 'green', 'red']) # Reset debug if params.debug is not None: if params.debug == 'print': p1.save(os.path.join(params.debug_outdir, 'color_quick_check.png')) elif params.debug == 'plot': print(p1)
from plotnine.data import economics from plotnine import ggplot, aes, facet_grid, labs, geom_point, geom_smooth, xlab, ylab g=(ggplot(economics) + aes(x="date", y="uempmed") + geom_point() + geom_smooth(color="red") + xlab("date (year)") + ylab("unemploynment")) g.save("18.png")
res = [(datetime.datetime(2018, 1, 1) + datetime.timedelta(x)).strftime("%d-%m") for x in dates] print(res) return res (ggplot(data=res, mapping=aes(x='julian', y='value', colour='site')) + xlab("Day") + ylab("Mean number of detected songs") # + facet_grid("site~", scales="free") # + geom_line() + facet_wrap("site", nrow=6, ncol=2, scales="free_y") + geom_point() # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std")) + geom_smooth(method="mavg", se=False, method_args={ "window": 4, "center": True, "min_periods": 1 }) + # + scale_colour_manual(values=cbbPalette, guide=False) scale_x_continuous(labels=label_x)).save("figs/song_events_all_smoothed.png", height=10, width=16, dpi=150) (ggplot(data=res, mapping=aes(x='julian', y='value', colour='site')) + xlab("Day") + ylab("Mean number of detected songs") # + facet_grid("site~", scales="free") + geom_line() + facet_wrap("site", nrow=6, ncol=2, scales="free_y") + geom_point() # + geom_errorbar(aes(ymin="ACI_mean - ACI_std", ymax="ACI_mean + ACI_std")) + # + geom_smooth(method="mavg", se=False, method_args={"window": 4, "center": True, "min_periods": 1})
def test_gls(self): p = self.p + geom_smooth(aes(y='y_noisy'), method='gls') p.draw_test()
def test_lowess(self): p = self.p + geom_smooth(aes(y='y_noisy'), method='lowess') with pytest.warns(UserWarning): p.draw_test()
def test_discrete_x(): p = (ggplot(df_discrete_x, aes('x', 'y')) + geom_point() + geom_smooth(color='blue')) assert p == 'discrete_x'
def test_discrete_x_fullrange(): p = (ggplot(df_discrete_x, aes('x', 'y')) + geom_point() + geom_smooth(color='blue', fullrange=True)) assert p == 'discrete_x_fullrange'
def quick_color_check(target_matrix, source_matrix, num_chips): """ Quickly plot target matrix values against source matrix values to determine over saturated color chips or other issues. Inputs: source_matrix = a 22x4 matrix containing the average red value, average green value, and average blue value for each color chip of the source image target_matrix = a 22x4 matrix containing the average red value, average green value, and average blue value for each color chip of the target image num_chips = number of color card chips included in the matrices (integer) :param source_matrix: numpy.ndarray :param target_matrix: numpy.ndarray :param num_chips: int """ # Imports from plotnine import ggplot, geom_point, geom_smooth, theme_seaborn, facet_grid, geom_label, scale_x_continuous, \ scale_y_continuous, scale_color_manual, aes import pandas as pd # Extract and organize matrix info tr = target_matrix[:num_chips, 1:2] tg = target_matrix[:num_chips, 2:3] tb = target_matrix[:num_chips, 3:4] sr = source_matrix[:num_chips, 1:2] sg = source_matrix[:num_chips, 2:3] sb = source_matrix[:num_chips, 3:4] # Create columns of color labels red = [] blue = [] green = [] for i in range(num_chips): red.append('red') blue.append('blue') green.append('green') # Make a column of chip numbers chip = np.arange(0, num_chips).reshape((num_chips, 1)) chips = np.row_stack((chip, chip, chip)) # Combine info color_data_r = np.column_stack((sr, tr, red)) color_data_g = np.column_stack((sg, tg, green)) color_data_b = np.column_stack((sb, tb, blue)) all_color_data = np.row_stack((color_data_b, color_data_g, color_data_r)) # Create a dataframe with headers dataset = pd.DataFrame({ 'source': all_color_data[:, 0], 'target': all_color_data[:, 1], 'color': all_color_data[:, 2] }) # Add chip numbers to the dataframe dataset['chip'] = chips dataset = dataset.astype({ 'color': str, 'chip': str, 'target': float, 'source': float }) # Make the plot p1 = ggplot(dataset, aes(x='target', y='source', color='color', label='chip')) + \ geom_point(show_legend=False, size=2) + \ geom_smooth(method='lm', size=.5, show_legend=False) + \ theme_seaborn() + facet_grid('.~color') + \ geom_label(angle=15, size=7, nudge_y=-.25, nudge_x=.5, show_legend=False) + \ scale_x_continuous(limits=(-5, 270)) + scale_y_continuous(limits=(-5, 275)) + \ scale_color_manual(values=['blue', 'green', 'red']) # Autoincrement the device counter params.device += 1 # Reset debug if params.debug is not None: if params.debug == 'print': p1.save(os.path.join(params.debug_outdir, 'color_quick_check.png')) elif params.debug == 'plot': print(p1)
def test_sorts_by_x(): df = pd.DataFrame({'x': [5, 0, 1, 2, 3, 4], 'y': range(6)}) p = ggplot(df, aes('x', 'y')) + geom_smooth(stat='identity') assert p == 'sorts_by_x'