Beispiel #1
0
def test_custom_format():
    x = [3.987, 2, 42.42]
    labels = ['3.99 USD', '2.00 USD', '42.42 USD']
    formatter = custom_format('{:.2f} USD')
    assert formatter(x) == labels

    formatter = custom_format('%.2f USD', style='old')
    assert formatter(x) == labels

    formatter = custom_format('%.2f USD', style='ancient')
    with pytest.raises(ValueError):
        formatter(x)
Beispiel #2
0
def test_custom_format():
    x = [3.987, 2, 42.42]
    labels = ['3.99 USD', '2.00 USD', '42.42 USD']
    formatter = custom_format('{:.2f} USD')
    assert formatter(x) == labels

    formatter = custom_format('%.2f USD', style='old')
    assert formatter(x) == labels

    formatter = custom_format('%.2f USD', style='ancient')
    with pytest.raises(ValueError):
        formatter(x)
Beispiel #3
0
def plot_bargraph(count_plot_df, plot_df):
    """
    Plots the bargraph 
    Arguments:
        count_plot_df - The dataframe that contains lemma counts
        plot_df - the dataframe that contains the odds ratio and lemmas
    """

    graph = (
        p9.ggplot(count_plot_df.astype({"count": int}),
                  p9.aes(x="lemma", y="count")) +
        p9.geom_col(position=p9.position_dodge(width=0.5), fill="#253494") +
        p9.coord_flip() + p9.facet_wrap("repository", scales='free_x') +
        p9.scale_x_discrete(limits=(plot_df.sort_values(
            "odds_ratio", ascending=True).lemma.tolist())) +
        p9.scale_y_continuous(labels=custom_format('{:,.0g}')) +
        p9.labs(x=None) + p9.theme_seaborn(
            context='paper', style="ticks", font="Arial", font_scale=0.95) +
        p9.theme(
            # 640 x 480
            figure_size=(6.66, 5),
            strip_background=p9.element_rect(fill="white"),
            strip_text=p9.element_text(size=12),
            axis_title=p9.element_text(size=12),
            axis_text_x=p9.element_text(size=10),
        ))
    return graph
def scatter_plot(df,
                 xcol,
                 ycol,
                 domain,
                 xname=None,
                 yname=None,
                 log=False,
                 width=6,
                 height=6,
                 clamp=True,
                 tickCount=5):
    assert len(domain) == 2

    POINT_SIZE = 0.5
    DASH_PATTERN = (0, (3, 1))

    if xname == None:
        xname = xcol
    if yname == None:
        yname = ycol

    # formater for axes' labels
    ax_formatter = mizani.custom_format('{:n}')

    if clamp:  # clamp overflowing values if required
        df = df.copy(deep=True)
        df.loc[df[xcol] > domain[1], xcol] = domain[1]
        df.loc[df[ycol] > domain[1], ycol] = domain[1]

    # generate scatter plot
    scatter = p9.ggplot(df)
    scatter += p9.aes(x=xcol, y=ycol)
    scatter += p9.geom_point(size=POINT_SIZE, na_rm=True)
    scatter += p9.labs(x=xname, y=yname)

    if log:  # log scale
        scatter += p9.scale_x_log10(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_log10(limits=domain, labels=ax_formatter)
    else:
        scatter += p9.scale_x_continuous(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_continuous(limits=domain, labels=ax_formatter)

    #scatter += p9.theme_xkcd()
    scatter += p9.theme_bw()
    scatter += p9.theme(
        panel_grid_major=p9.element_line(color='#666666', alpha=0.5))
    scatter += p9.theme(figure_size=(width, height))

    # generate additional lines
    scatter += p9.geom_abline(intercept=0, slope=1,
                              linetype=DASH_PATTERN)  # diagonal
    scatter += p9.geom_vline(xintercept=domain[1],
                             linetype=DASH_PATTERN)  # vertical rule
    scatter += p9.geom_hline(yintercept=domain[1],
                             linetype=DASH_PATTERN)  # horizontal rule

    res = scatter

    return res
def scatter_plot2(df1, df2, xcol, ycol, domain, color1='black', color2='red', xname=None, yname=None, log=False, width=6, height=6, clamp=True, tickCount=5):
    assert len(domain) == 2

    POINT_SIZE = 1.5
    DASH_PATTERN = (0, (6, 2))

    if xname is None:
        xname = xcol
    if yname is None:
        yname = ycol

    # formatter for axes' labels
    ax_formatter = mizani.custom_format('{:n}')

    if clamp:  # clamp overflowing values if required
        df1 = df1.copy(deep=True)
        df1.loc[df1[xcol] > domain[1], xcol] = domain[1]
        df1.loc[df1[ycol] > domain[1], ycol] = domain[1]

        df2 = df2.copy(deep=True)
        df2.loc[df2[xcol] > domain[1], xcol] = domain[1]
        df2.loc[df2[ycol] > domain[1], ycol] = domain[1]

    # generate scatter plot
    scatter = p9.ggplot(df1)
    scatter += p9.aes(x=xcol, y=ycol)
    scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, color=color1, alpha=0.5)
    scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, data=df2, color=color2, alpha=0.5)
    scatter += p9.labs(x=xname, y=yname)

    # rug plots
    scatter += p9.geom_rug(na_rm=True, sides="tr", color=color1, alpha=0.05)
    scatter += p9.geom_rug(na_rm=True, sides="tr", data=df2, color=color2, alpha=0.05)

    if log:  # log scale
        scatter += p9.scale_x_log10(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_log10(limits=domain, labels=ax_formatter)
    else:
        scatter += p9.scale_x_continuous(limits=domain, labels=ax_formatter)
        scatter += p9.scale_y_continuous(limits=domain, labels=ax_formatter)

    # scatter += p9.theme_xkcd()
    scatter += p9.theme_bw()
    scatter += p9.theme(panel_grid_major=p9.element_line(color='#666666', alpha=0.5))
    scatter += p9.theme(panel_grid_minor=p9.element_blank())
    scatter += p9.theme(figure_size=(width, height))
    scatter += p9.theme(text=p9.element_text(size=24, color="black"))

    # generate additional lines
    scatter += p9.geom_abline(intercept=0, slope=1, linetype=DASH_PATTERN)  # diagonal
    scatter += p9.geom_vline(xintercept=domain[1], linetype=DASH_PATTERN)  # vertical rule
    scatter += p9.geom_hline(yintercept=domain[1], linetype=DASH_PATTERN)  # horizontal rule

    res = scatter

    return res
Beispiel #6
0
def test_empty_breaks():
    x = []
    assert custom_format()(x) == []
    assert comma_format()(x) == []
    assert currency_format()(x) == []
    assert percent_format()(x) == []
    assert scientific_format()(x) == []
    assert date_format()(x) == []
    assert mpl_format()(x) == []
    assert log_format()(x) == []
    assert timedelta_format()(x) == []
Beispiel #7
0
def test_empty_breaks():
    x = []
    assert custom_format()(x) == []
    assert comma_format()(x) == []
    assert currency_format()(x) == []
    assert percent_format()(x) == []
    assert scientific_format()(x) == []
    assert date_format()(x) == []
    assert mpl_format()(x) == []
    assert log_format()(x) == []
    assert timedelta_format()(x) == []
Beispiel #8
0
def box_plot(df,
             x_col,
             fill_col,
             agg_col,
             agg_func='count',
             filter_col=None,
             filterby=[""],
             no_outliers=False,
             title=None,
             ylabel=None,
             grouped=True,
             flip=True,
             dots=True):
    '''
    :param df: dataframe,x_col: variable  located on x axis,fill_col: variable for coloring the dots,agg_col: name of the column to which the aggregation function is going to perform
    :optional agg_fun: for example: sum,min,mean,median,max,etc. filter_col: after groupying filtering option,filterby: list of characters to be filtered out (located in filter column),outliers,title
    :return: ggplot graphs of univariate analysis
    :type: boxplot
    :usage: after cleaning dataframe
    '''

    # set display texts

    fill_label = get_display_text(fill_col)

    if grouped:
        aux, aux_str = _get_grouped_aux(df, x_col, fill_col, agg_col, agg_func,
                                        filter_col, filterby)
    else:
        aux = df
        aux_str = agg_col

    # sort x labels
    aux = aux.sort_values(by=[x_col])

    xcol_list = aux.astype(str)[x_col].unique().tolist()

    # graph creation
    graph = (ggplot(aux) + geom_boxplot(aes(x=x_col, y=aux_str)) + theme_bw() +
             theme(axis_line_x=element_line(color='gray'),
                   axis_line_y=element_line(color='gray'),
                   line=element_line(color='white')) +
             scale_fill_manual(values=colors.OFICIAL_COLORS, name=fill_label) +
             scale_x_discrete(limits=xcol_list) +
             scale_y_continuous(labels=custom_format('{:,.0f}')))

    # dots
    if dots:
        graph += geom_jitter(aes(x=x_col, y=aux_str, fill=fill_col))

    # no outliers
    aux_describe = aux[aux_str].describe().loc
    if no_outliers:
        graph += ylim(aux_describe["min"], aux_describe["75%"])

    # title
    if title != None:
        graph += ggtitle("Box plot:" + str(title))

    # flip
    if flip:
        graph += coord_flip()

    # set y label
    if not ylabel:
        ylabel = get_display_text(agg_col)
    graph += ylab(ylabel)

    # show
    graph.draw()
    plt.show()