def test_custom_format(): x = [3.987, 2, 42.42] labels = ['3.99 USD', '2.00 USD', '42.42 USD'] formatter = custom_format('{:.2f} USD') assert formatter(x) == labels formatter = custom_format('%.2f USD', style='old') assert formatter(x) == labels formatter = custom_format('%.2f USD', style='ancient') with pytest.raises(ValueError): formatter(x)
def plot_bargraph(count_plot_df, plot_df): """ Plots the bargraph Arguments: count_plot_df - The dataframe that contains lemma counts plot_df - the dataframe that contains the odds ratio and lemmas """ graph = ( p9.ggplot(count_plot_df.astype({"count": int}), p9.aes(x="lemma", y="count")) + p9.geom_col(position=p9.position_dodge(width=0.5), fill="#253494") + p9.coord_flip() + p9.facet_wrap("repository", scales='free_x') + p9.scale_x_discrete(limits=(plot_df.sort_values( "odds_ratio", ascending=True).lemma.tolist())) + p9.scale_y_continuous(labels=custom_format('{:,.0g}')) + p9.labs(x=None) + p9.theme_seaborn( context='paper', style="ticks", font="Arial", font_scale=0.95) + p9.theme( # 640 x 480 figure_size=(6.66, 5), strip_background=p9.element_rect(fill="white"), strip_text=p9.element_text(size=12), axis_title=p9.element_text(size=12), axis_text_x=p9.element_text(size=10), )) return graph
def scatter_plot(df, xcol, ycol, domain, xname=None, yname=None, log=False, width=6, height=6, clamp=True, tickCount=5): assert len(domain) == 2 POINT_SIZE = 0.5 DASH_PATTERN = (0, (3, 1)) if xname == None: xname = xcol if yname == None: yname = ycol # formater for axes' labels ax_formatter = mizani.custom_format('{:n}') if clamp: # clamp overflowing values if required df = df.copy(deep=True) df.loc[df[xcol] > domain[1], xcol] = domain[1] df.loc[df[ycol] > domain[1], ycol] = domain[1] # generate scatter plot scatter = p9.ggplot(df) scatter += p9.aes(x=xcol, y=ycol) scatter += p9.geom_point(size=POINT_SIZE, na_rm=True) scatter += p9.labs(x=xname, y=yname) if log: # log scale scatter += p9.scale_x_log10(limits=domain, labels=ax_formatter) scatter += p9.scale_y_log10(limits=domain, labels=ax_formatter) else: scatter += p9.scale_x_continuous(limits=domain, labels=ax_formatter) scatter += p9.scale_y_continuous(limits=domain, labels=ax_formatter) #scatter += p9.theme_xkcd() scatter += p9.theme_bw() scatter += p9.theme( panel_grid_major=p9.element_line(color='#666666', alpha=0.5)) scatter += p9.theme(figure_size=(width, height)) # generate additional lines scatter += p9.geom_abline(intercept=0, slope=1, linetype=DASH_PATTERN) # diagonal scatter += p9.geom_vline(xintercept=domain[1], linetype=DASH_PATTERN) # vertical rule scatter += p9.geom_hline(yintercept=domain[1], linetype=DASH_PATTERN) # horizontal rule res = scatter return res
def scatter_plot2(df1, df2, xcol, ycol, domain, color1='black', color2='red', xname=None, yname=None, log=False, width=6, height=6, clamp=True, tickCount=5): assert len(domain) == 2 POINT_SIZE = 1.5 DASH_PATTERN = (0, (6, 2)) if xname is None: xname = xcol if yname is None: yname = ycol # formatter for axes' labels ax_formatter = mizani.custom_format('{:n}') if clamp: # clamp overflowing values if required df1 = df1.copy(deep=True) df1.loc[df1[xcol] > domain[1], xcol] = domain[1] df1.loc[df1[ycol] > domain[1], ycol] = domain[1] df2 = df2.copy(deep=True) df2.loc[df2[xcol] > domain[1], xcol] = domain[1] df2.loc[df2[ycol] > domain[1], ycol] = domain[1] # generate scatter plot scatter = p9.ggplot(df1) scatter += p9.aes(x=xcol, y=ycol) scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, color=color1, alpha=0.5) scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, data=df2, color=color2, alpha=0.5) scatter += p9.labs(x=xname, y=yname) # rug plots scatter += p9.geom_rug(na_rm=True, sides="tr", color=color1, alpha=0.05) scatter += p9.geom_rug(na_rm=True, sides="tr", data=df2, color=color2, alpha=0.05) if log: # log scale scatter += p9.scale_x_log10(limits=domain, labels=ax_formatter) scatter += p9.scale_y_log10(limits=domain, labels=ax_formatter) else: scatter += p9.scale_x_continuous(limits=domain, labels=ax_formatter) scatter += p9.scale_y_continuous(limits=domain, labels=ax_formatter) # scatter += p9.theme_xkcd() scatter += p9.theme_bw() scatter += p9.theme(panel_grid_major=p9.element_line(color='#666666', alpha=0.5)) scatter += p9.theme(panel_grid_minor=p9.element_blank()) scatter += p9.theme(figure_size=(width, height)) scatter += p9.theme(text=p9.element_text(size=24, color="black")) # generate additional lines scatter += p9.geom_abline(intercept=0, slope=1, linetype=DASH_PATTERN) # diagonal scatter += p9.geom_vline(xintercept=domain[1], linetype=DASH_PATTERN) # vertical rule scatter += p9.geom_hline(yintercept=domain[1], linetype=DASH_PATTERN) # horizontal rule res = scatter return res
def test_empty_breaks(): x = [] assert custom_format()(x) == [] assert comma_format()(x) == [] assert currency_format()(x) == [] assert percent_format()(x) == [] assert scientific_format()(x) == [] assert date_format()(x) == [] assert mpl_format()(x) == [] assert log_format()(x) == [] assert timedelta_format()(x) == []
def box_plot(df, x_col, fill_col, agg_col, agg_func='count', filter_col=None, filterby=[""], no_outliers=False, title=None, ylabel=None, grouped=True, flip=True, dots=True): ''' :param df: dataframe,x_col: variable located on x axis,fill_col: variable for coloring the dots,agg_col: name of the column to which the aggregation function is going to perform :optional agg_fun: for example: sum,min,mean,median,max,etc. filter_col: after groupying filtering option,filterby: list of characters to be filtered out (located in filter column),outliers,title :return: ggplot graphs of univariate analysis :type: boxplot :usage: after cleaning dataframe ''' # set display texts fill_label = get_display_text(fill_col) if grouped: aux, aux_str = _get_grouped_aux(df, x_col, fill_col, agg_col, agg_func, filter_col, filterby) else: aux = df aux_str = agg_col # sort x labels aux = aux.sort_values(by=[x_col]) xcol_list = aux.astype(str)[x_col].unique().tolist() # graph creation graph = (ggplot(aux) + geom_boxplot(aes(x=x_col, y=aux_str)) + theme_bw() + theme(axis_line_x=element_line(color='gray'), axis_line_y=element_line(color='gray'), line=element_line(color='white')) + scale_fill_manual(values=colors.OFICIAL_COLORS, name=fill_label) + scale_x_discrete(limits=xcol_list) + scale_y_continuous(labels=custom_format('{:,.0f}'))) # dots if dots: graph += geom_jitter(aes(x=x_col, y=aux_str, fill=fill_col)) # no outliers aux_describe = aux[aux_str].describe().loc if no_outliers: graph += ylim(aux_describe["min"], aux_describe["75%"]) # title if title != None: graph += ggtitle("Box plot:" + str(title)) # flip if flip: graph += coord_flip() # set y label if not ylabel: ylabel = get_display_text(agg_col) graph += ylab(ylabel) # show graph.draw() plt.show()