Python geom_boxplotの例、plotnine.geom_boxplot Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_geom_boxplot.py プロジェクト: tr8dr/plotnine

class TestAesthetics:
    p = (ggplot(df, aes('x')) + geom_boxplot(aes(y='y'), size=2) +
         geom_boxplot(df[:2 * m], aes(y='y+25', fill='x'), size=2) +
         geom_boxplot(df[2 * m:], aes(y='y+30', color='x'), size=2) +
         geom_boxplot(df[2 * m:], aes(y='y+55', linetype='x'), size=2))

    def test_aesthetics(self):
        assert self.p == 'aesthetics'

    def test_aesthetics_coordflip(self):
        assert self.p + coord_flip() == 'aesthetics+coord_flip'

コード例 #2

0

ファイルを表示

def plot_boxplot_series(df, normalisation_method=None):
    """
    Treating each column as a separate boxplot and each row as an independent observation
    (ie. different company)
    render a series of box plots to identify a shift in performance from the observations.
    normalisation_method should be one of the values present in
    SectorSentimentSearchForm.normalisation_choices
    """

    # and plot the normalised data
    if normalisation_method is None or normalisation_method == "1":
        normalized_df = df
        y_label = "Percentage change"
    elif normalisation_method == "2":
        normalized_df = (df - df.min()) / (df.max() - df.min())
        y_label = "Percentage change (min/max. scaled)"
    else:
        normalized_df = df / df.max(axis=0)  # div by max if all else fails...
        y_label = "Percentage change (normalised by dividing by max)"

    n_inches = len(df.columns) / 5
    melted = normalized_df.melt(ignore_index=False).dropna()
    plot = (p9.ggplot(melted, p9.aes(x="fetch_date", y="value")) +
            p9.geom_boxplot(outlier_colour="blue") + p9.coord_flip())
    return user_theme(plot, y_axis_label=y_label, figure_size=(12, n_inches))

コード例 #3

0

ファイルを表示

def _make_plots(df_plt, out_file_base, y='AUC', facet_grid='', h_line=''):
    len_x = len(np.unique(df_plt['resolution']))
    if 'sparsity_l1' in df_plt.columns:
        df_plt['Sparsity'] = df_plt['sparsity_l1']
        len_x2 = len(np.unique(df_plt['Sparsity']))
    else:
        len_x2 = 0
    if len_x2 > 1:
        gplt = plt9.ggplot(df_plt,
                           plt9.aes(
                               fill='Sparsity',
                               x='resolution',
                               y=y,
                           ))
        gplt = gplt + plt9.geom_boxplot(alpha=0.8, outlier_alpha=0)
        gplt = gplt + plt9.geom_jitter(
            plt9.aes(color='Sparsity'), alpha=0.25, width=0.2)
    else:
        gplt = plt9.ggplot(df_plt, plt9.aes(x='resolution', y=y))
        gplt = gplt + plt9.geom_boxplot(alpha=0.8, outlier_alpha=0)
        gplt = gplt + plt9.geom_jitter(alpha=0.25, width=0.2)
    gplt = gplt + plt9.theme_bw(base_size=12)
    if facet_grid != '':
        gplt = gplt + plt9.facet_grid('{} ~ .'.format(facet_grid))
    if y == 'f1-score':
        gplt = gplt + plt9.labs(x='Resolution', y='F1 score', title='')
    elif y in ['AUC', 'MCC']:
        gplt = gplt + plt9.labs(x='Resolution', y=y, title='')
    else:
        gplt = gplt + plt9.labs(
            x='Resolution', y=y.capitalize().replace('_', ' '), title='')
    gplt = gplt + plt9.theme(
        # legend_position='none',
        axis_text_x=plt9.element_text(angle=-45, hjust=0))
    if len_x2 != 0 and len_x2 < 9:
        gplt = gplt + plt9.scale_fill_brewer(palette='Dark2', type='qual')
    if h_line != '':
        gplt = gplt + plt9.geom_hline(plt9.aes(yintercept=h_line),
                                      linetype='dashdot')
    gplt.save('{}-resolution__{}.png'.format(out_file_base,
                                             y.replace('-', '_')),
              dpi=300,
              width=4 * ((len_x + len_x2) / 4),
              height=5,
              limitsize=False)

コード例 #4

0

ファイルを表示

ファイル: sentiment_scoring_interpretation.py プロジェクト: AghilasSini/AT-Annotator

def plot_score(df, plot_fn):
    f = (p9.ggplot(df, p9.aes(x="emotion_cat", y="score")) +
         p9.geom_boxplot() + p9.labs(x="Model", y="EMOTION FEEL Score") +
         p9.theme_538() + p9.theme(legend_position="top",
                                   legend_direction="horizontal",
                                   figure_size=(10, 5)) +
         p9.theme(plot_background=p9.element_rect(
             fill=BG_COLOR, color=BG_COLOR, size=1)))
    f.save(plot_fn)

コード例 #5

0

ファイルを表示

def plot_replicate_correlation(
    df,
    batch,
    plate,
    facet_string=None,
    split_samples=False,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=500,
    height=4,
    width=5,
    return_plot=False,
):
    correlation_gg = (
        gg.ggplot(
            df,
            gg.aes(x="group_replicate", y="similarity_metric", fill="group_replicate"),
        )
        + gg.geom_boxplot(
            alpha=0.3, outlier_alpha=0, width=0.8, notchwidth=0.25, fatten=1.5
        )
        + gg.geom_jitter(shape=".", size=0.001, alpha=0.3, width=0.3, height=0)
        + gg.scale_fill_manual(
            name="Replicate",
            labels={"True": "True", "False": "False"},
            values=["#B99638", "#2DB898"],
        )
        + gg.xlab("Replicates")
        + gg.ylab("Pearson Correlation")
        + gg.ggtitle("{}: {}".format(batch, plate))
        + gg.theme_bw()
        + gg.theme(
            subplots_adjust={"wspace": 0.2},
            title=gg.element_text(size=5),
            axis_text=gg.element_text(size=4),
            axis_title=gg.element_text(size=5),
            legend_text=gg.element_text(size=4),
            legend_title=gg.element_text(size=5),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        )
    )

    if split_samples:
        assert facet_string, "To split samples, specify a facet_string"
        correlation_gg += gg.facet_wrap(facet_string)

    if output_file_base:
        save_figure(
            correlation_gg, output_file_base, output_file_extensions, dpi, height, width
        )
    if return_plot:
        return correlation_gg

コード例 #6

0

ファイルを表示

ファイル: plots.py プロジェクト: mappin/asxtrade

def plot_boxplot_series(df, normalisation_method=None):
    """
    Treating each column as a separate boxplot and each row as an independent observation 
    (ie. different company)
    render a series of box plots to identify a shift in performance from the observations.
    normalisation_method should be one of the values present in 
    SectorSentimentSearchForm.normalisation_choices
    """
    # compute star performers: those who are above the mean on a given day counted over all days
    count = defaultdict(int)
    for col in df.columns:
        avg = df.mean(axis=0)
        winners = df[df[col] > avg[col]][col]
        for winner in winners.index:
            count[winner] += 1
    winner_results = []
    for asx_code, n_wins in count.items():
        x = df.loc[asx_code].sum()
        # avoid "dead cat bounce" stocks which fall spectacularly and then post major increases in percentage terms
        if x > 0.0:  
            winner_results.append((asx_code, n_wins, x))

    # and plot the normalised data
    if normalisation_method is None or normalisation_method == "1":
        normalized_df = df
        y_label = "Percentage change"
    elif normalisation_method == "2":
        normalized_df = (df - df.min()) / (df.max() - df.min())
        y_label = "Percentage change (min/max. scaled)"
    else:
        normalized_df = df / df.max(axis=0)  # div by max if all else fails...
        y_label = "Percentage change (normalised by dividing by max)"

    n_inches = len(df.columns) / 5
    melted = normalized_df.melt(ignore_index=False).dropna()
    plot = (
        p9.ggplot(melted, p9.aes(x="fetch_date", y="value"))
        + p9.geom_boxplot(outlier_colour="blue")
        + p9.theme(
            axis_text_x=p9.element_text(size=7),
            axis_text_y=p9.element_text(size=7),
            figure_size=(12, n_inches),
        )
        + p9.labs(x="Date (YYYY-MM-DD)", y=y_label)
        + p9.coord_flip()
    )
    return (
        plot_as_inline_html_data(plot),
        list(reversed(sorted(winner_results, key=lambda t: t[2]))),
    )

コード例 #7

0

ファイルを表示

ファイル: test_geom_boxplot.py プロジェクト: tr8dr/plotnine

def test_weight():
    # The boxes of the two plots should differ slightly due to the
    # method used to calculate weighted percentiles. There is no
    # standard method for calculating weighted percentiles.
    df = pd.DataFrame({
        'x':
        list('a' * 11 + 'b' * 5),
        'y':
        np.hstack([[1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 15], [1, 2, 3, 4, 15]]),
        'weight':
        np.hstack([np.ones(11), [1, 2, 3, 4, 1]])
    })
    p = (ggplot(df, aes(x='x', y='y', weight='weight')) + geom_boxplot())
    assert p == 'weight'

コード例 #8

0

ファイルを表示

def plot_market_cap_distribution(ld: LazyDictionary) -> p9.ggplot:
    df = ld["market_cap_df"]
    assert set(df.columns).intersection(set(
        ["market", "market_cap",
         "bin"])) == set(["market", "market_cap", "bin"])
    pos_market_cap_only = df[df["market_cap"] > 0.0]
    plot = (p9.ggplot(pos_market_cap_only) +
            p9.geom_boxplot(p9.aes(x="market", y="market_cap")) +
            p9.facet_wrap("bin", scales="free_y") + p9.scales.scale_y_log10())
    return user_theme(
        plot,
        y_axis_label="Market cap. ($AUD Millions)",
        subplots_adjust={"wspace": 0.30},
    )

コード例 #9

0

ファイルを表示

ファイル: study_plots_utils.py プロジェクト: sealuzh/benchmark-instability-prediction-replication-package

def plot_preprocessing_boxplot_bymodel(dataframe,
                                       models_labels,
                                       metrics_labels,
                                       groups_labels,
                                       figure_size=(14, 4)):
    """
    We define a function to plot the grid.
    """

    return (
        # Define the plot.
        p9.ggplot(dataframe, p9.aes(x='variable', y='value', fill='group'))
        # Add the boxplots.
        + p9.geom_boxplot(position='dodge')
        # Rename the x axis.
        + p9.scale_x_discrete(name='Metric',
                              labels=lambda l: [metrics_labels[x] for x in l])
        # Rename the y axis.
        + p9.scale_y_continuous(
            name='Value',
            expand=(0, 0.05),
            # breaks=[-0.25, 0, 0.25, 0.5, 0.75, 1], limits=[-0.25, 1],
            labels=lambda l: ['{:.2f}'.format(x) for x in l])
        # Define the colors for the metrics for color-blind people.
        + p9.scale_fill_brewer(name='Group',
                               labels=lambda l: [groups_labels[x] for x in l],
                               type='qual',
                               palette='Set2')
        # Place the plots in a grid, renaming the labels.
        + p9.facet_grid(
            'model ~ .',
            scales='free_y',
            labeller=p9.labeller(rows=lambda x: f'{models_labels[x]}'))
        # Define the theme for the plot.
        + p9.theme(
            # Remove the x and y axis names.
            axis_title_x=p9.element_blank(),
            axis_title_y=p9.element_blank(),
            # Set the size of x and y tick labels font.
            axis_text_x=p9.element_text(size=7),
            axis_text_y=p9.element_text(size=7),
            # Place the legend on top, without title, and reduce the margin.
            legend_title=p9.element_blank(),
            legend_position='top',
            legend_box_margin=2,
            # Set the size for the figure.
            figure_size=figure_size,
        ))

コード例 #10

0

ファイルを表示

    def create(self, file_path: str) -> None:
        metrics = self._data["metric"].unique()

        for metric in metrics:
            data = self._data[self._data["metric"] == metric]
            q75, q25 = np.percentile(data["value"], [98, 2])

            (ggplot(data, aes(x="category", y="value")) +
             geom_boxplot(outlier_shape="") +
             coord_cartesian(ylim=(q75 * 0.8, q25 * 1.2))
             #+ facet_wrap(facets="metric", scales="free", ncol=3)
             + ggtitle(metric)
             #+ ggtitle("QMOOD Quality Attributes")
             + xlab("Category") + ylab("Value") +
             theme_classic(base_size=28, base_family="Helvetica")
             #+ theme(subplots_adjust={"wspace": 0.25, "hspace": 0.2})
             ).save(f"{file_path}.{metric}.pdf", width=24, height=24)

コード例 #11

0

ファイルを表示

ファイル: main-checkpoint.py プロジェクト: murphycj/murphycj.github.io-posts

def grid_search_models(X,y):

    # get only exons 4-12

    X2 = X[:,3:12]
    X_train, X_test, y_train, y_test = train_test_split(X2,y,test_size=0.3)

    #SVM

    svc = SVC()
    param_grid = {'C':[0.5,1,2,3,5,6,7,8,9,10],'kernel':['rbf','linear','poly','sigmoid'],'degree':[2,3,4,5,6]}
    grid_search_svc = GridSearchCV(svc, param_grid,
                               scoring='accuracy')
    grid_search_svc.fit(X_train, y_train)

    #logistic regression

    lr = LogisticRegression()
    param_grid = {'penalty':['l1','l2'],'C':[0.5,1,2,3,4,5,8,10]}
    grid_search_lr = GridSearchCV(lr, param_grid,
                               scoring='accuracy')
    grid_search_lr.fit(X_train, y_train)

    #decision tree

    dt = DecisionTreeClassifier()
    param_grid = {'max_depth': [3, 10, 20, 30], 'max_leaf_nodes': [2, 4, 6, 8],'min_samples_leaf':[1,2,3],'min_samples_split':[2,4,6]}
    grid_search_dt = RandomizedSearchCV(dt, param_grid, cv=10,
                               scoring='accuracy')
    grid_search_dt.fit(X_train, y_train)

    # plot performances

    data = {
        'Model':['SVM']*10 + ['LogisticRegression']*10 + ['DecisionTree']*10,
        'Accuracy':list(cross_val_score(grid_search_svc.best_estimator_,X_train,y_train,cv=10)) + \
        list(cross_val_score(grid_search_lr.best_estimator_,X_train,y_train,cv=10)) + \
        list(cross_val_score(grid_search_dt.best_estimator_,X_train,y_train,cv=10))
    }
    data = pd.DataFrame(data)
    data['Model'] = pd.Categorical(data['Model'], categories=['SVM','LogisticRegression','DecisionTree'], ordered=True)

    p = pn.ggplot(data,pn.aes('Model','Accuracy')) + pn.geom_boxplot() + pn.ylim(0,1)
    p.save('./plots/tumor_genotype_prediction/accuracy-model.png')

コード例 #12

0

ファイルを表示

ファイル: q3_answers.py プロジェクト: vreuter/python_bootcamp_2020

def create_boxplot(box_df):
    """This function should create a boxplot from the dataframe created in melt_data

    Input
    -----
    box_df: pandas.DataFrame
        The dataframe returned by melt_data

    Returns
    -------
    plot: plotnine.ggplot
        A boxplot visualizing the data in box_df
    """
    plot = ggplot(
        box_df,
        aes(x='treated/control', y='blood_pressure',
            fill='treated/control')) + geom_boxplot()

    return plot

コード例 #13

0

ファイルを表示

ファイル: plots.py プロジェクト: mappin/asxtrade

def plot_market_cap_distribution(stocks, ymd: str, ymd_start_of_timeframe: str):
    #print(ymd)
    latest_quotes = valid_quotes_only(ymd)
    earliest_quotes = valid_quotes_only(ymd_start_of_timeframe)
    asx_codes = set(stocks)
   
    latest_df = make_quote_df(latest_quotes, asx_codes, ymd)
    earliest_df = make_quote_df(earliest_quotes, asx_codes, ymd_start_of_timeframe)
    df = latest_df.append(earliest_df)

    #print(df)
    small_text = p9.element_text(size=7)
    plot = p9.ggplot(df) + \
           p9.geom_boxplot(p9.aes(x='market', y='market_cap')) + \
           p9.facet_wrap("bin", scales="free_y") + \
           p9.labs(x='', y='Market cap. ($AUD Millions)') + \
           p9.theme(subplots_adjust={'wspace': 0.30}, 
                    axis_text_x=small_text, 
                    axis_text_y=small_text)
    return plot_as_inline_html_data(plot)

コード例 #14

0

ファイルを表示

ファイル: test_geom_boxplot.py プロジェクト: yejianye/plotnine

def test_params():
    p = (ggplot(df, aes('x')) +
         geom_boxplot(df[:m], aes(y='y'), size=2, notch=True) +
         geom_boxplot(df[m:2*m], aes(y='y'), size=2,
                      notch=True, notchwidth=0.8) +
         # outliers
         geom_boxplot(df[2*m:3*m], aes(y='y'), size=2,
                      outlier_size=4, outlier_color='green') +
         geom_boxplot(df[2*m:3*m], aes(y='y+25'), size=2,
                      outlier_size=4, outlier_alpha=0.5) +
         geom_boxplot(df[2*m:3*m], aes(y='y+60'), size=2,
                      outlier_size=4, outlier_shape='D') +
         # position dodge
         geom_boxplot(df[3*m:4*m], aes(y='y', fill='factor(y%2)')) +
         theme(facet_spacing={'right': 0.85})
         )
    assert p == 'params'

コード例 #15

0

ファイルを表示

ファイル: test_geom_boxplot.py プロジェクト: jwhendy/plotnine

def test_params():
    p = (ggplot(df, aes('x')) +
         geom_boxplot(df[:m], aes(y='y'), size=2, notch=True) +
         geom_boxplot(df[m:2*m], aes(y='y'), size=2,
                      notch=True, notchwidth=0.8) +
         # outliers
         geom_boxplot(df[2*m:3*m], aes(y='y'), size=2,
                      outlier_size=4, outlier_color='green') +
         geom_boxplot(df[2*m:3*m], aes(y='y+25'), size=2,
                      outlier_size=4, outlier_alpha=0.5) +
         geom_boxplot(df[2*m:3*m], aes(y='y+60'), size=2,
                      outlier_size=4, outlier_shape='D') +
         # position dodge
         geom_boxplot(df[3*m:4*m], aes(y='y', fill='factor(y%2)')) +
         theme(subplots_adjust={'right': 0.85})
         )
    assert p == 'params'

コード例 #16

0

ファイルを表示

def plot_box_plots(var, draws, measurements, variable_id_map):
    """Return plotnine.geoms.geom_boxplot of given variable."""
    plot = p9.ggplot(data=draws[var]) + p9.geom_boxplot(
        p9.aes(x=variable_id_map[var], y=var, fill=variable_id_map[var]),
        outlier_shape="",
    )
    if measurements[var].empty is False:
        plot += p9.geoms.geom_point(p9.aes(y="measurement",
                                           x=variable_id_map[var]),
                                    data=measurements[var])
    if var != "flux":
        plot += p9.scale_y_log10()
    plot += p9.facet_wrap("~experiments") + p9.themes.theme(
        panel_spacing_y=0.05,
        panel_spacing_x=0.35,
        axis_title=p9.element_text(size=10),
        axis_text=p9.themes.element_text(size=11),
    )
    if var == "flux":
        plot += p9.scale_y_continuous(breaks=np.arange(-0.001, 0.002, 0.00025),
                                      limits=[-0.001, 0.002])
    plot += p9.theme(axis_text_x=p9.themes.element_text(rotation=90, size=6))
    return plot

コード例 #17

0

ファイルを表示

ファイル: test_position.py プロジェクト: tr8dr/plotnine

def test_dodge2():
    p = (ggplot(df3, aes('x', 'y', color='c')) +
         geom_boxplot(position='dodge2', size=2))
    assert p + _theme == 'dodge2'

コード例 #18

0

ファイルを表示

ファイル: datavis.py プロジェクト: joholley/datavis

# ggbarse.save('gse75386_gad1_barchart_stat.pdf', format='pdf',
#              height=1, width=6)

## mean bars +/- standard error using seaborn
plt.close()
# plt.figure(figsize=(6, 1))
sns.barplot(data=gse75386, y='class', x='Gad1', color='slategray', ci=68)
# plt.savefig('gse75386_gad1_barchart_stat.pdf',
#             format='pdf', bbox_inches='tight')

## -----------------------------------------------------------------
## GSE75386 boxplot + stripchart
## -----------------------------------------------------------------
plt.close()
ggbox = ggplot(gse75386, gg.aes(x='class', y='Gad1')) +\
        gg.geom_boxplot(stat='boxplot', outlier_size=0.0001) +\
        gg.geom_point(alpha=0.5) +\
        gg.coord_flip()
print(ggbox)
# ggbox.save('gse75386_gad1_boxplot.pdf', format='pdf', height=1, width=6)

plt.close()
# plt.figure(figsize=(6, 1))
sns.boxplot(data=gse75386, y='class', x='Gad1', color='white')
sns.stripplot(data=gse75386, y='class', x='Gad1', color='black')
# plt.savefig('gse75386_gad1_boxplot.pdf',
#             format='pdf', bbox_inches='tight')

## -----------------------------------------------------------------
## GSE75386 scatterplot
## -----------------------------------------------------------------

コード例 #19

0

ファイルを表示

xseq_2 = np.linspace(np.min(x), np.max(x), 80)

results_2 = linregress(x, y)
print(results_2)
# -

x_line = np.array([
    published_date_distances["version_count"].min(),
    published_date_distances["version_count"].max(),
])
y_line = x_line * results_2.slope + results_2.intercept

g = (p9.ggplot(
    published_date_distances,
    p9.aes(x="factor(version_count)", y="time_to_published"),
) + p9.geom_boxplot(fill="#a6cee3") + p9.geom_line(
    mapping=p9.aes(x="version_count", y="time_to_published"),
    stat="smooth",
    method="lm",
    linetype="dashed",
    se=False,
    alpha=1,
    size=0.7,
    inherit_aes=False,
) + p9.scale_y_timedelta(labels=timedelta_format("d")) + p9.annotate(
    "text",
    x=9,
    y=timedelta(days=1470),
    label=f"Y={results_2.slope:.2f}*X+{results_2.intercept:.2f}",
) + p9.labs(x="# of Preprint Versions",
            y="Time Elapsed Until Preprint is Published") + p9.theme_seaborn(

コード例 #20

0

ファイルを表示

ファイル: 03_biorxiv_pca_category_bootstrap.py プロジェクト: greenelab/annorxiver

projected_documents.shape

projected_documents_df = pd.DataFrame(
    projected_documents, columns=[f"PC_{dim+1}" for dim in range(n_components)]
).assign(
    category=document_categories_df.category.tolist(),
    document=document_categories_df.document.tolist(),
)
projected_documents_df

g = (
    p9.ggplot(projected_documents_df)
    + p9.aes(x="factor(category)", y="PC_1")
    + p9.geom_boxplot(
        fill="#a6cee3",
        outlier_size=1,
        outlier_alpha=0.65,
        fatten=1.5,
    )
    + p9.coord_flip()
    + p9.scale_x_discrete(
        limits=(
            projected_documents_df.groupby("category")
            .agg({"PC_1": "median"})
            .sort_values("PC_1", ascending=False)
            .reset_index()
            .category.tolist()[::-1]
        )
    )
    + p9.labs(x="Article Category", y="PC1")
    + p9.theme_seaborn(context="paper", style="ticks", font="Arial", font_scale=2)
    + p9.theme(figure_size=(11, 8.5))

コード例 #21

0

ファイルを表示

    best_line, age, linear_coeff, log10_coeff, ln_coeff) + zero_z_score

print("\n\nThe predicted acceptable range at age ", str(age), " is from ",
      str(min_acceptable_range), " to ", str(max_acceptable_range), "\n\n")

# save csv file
outlierfile = filename.replace('.csv', '_outliers.csv')

data_output.to_csv(outlierfile, index=False)

# plot overlay of IQR and mod-Z score outliers
p = (
    p9.ggplot(data=data_output,
              mapping=p9.aes(x='age_rounded', y='value', group='age_rounded'))
    + p9.geom_jitter(mapping=p9.aes(color='z_outlier', outlier_alpha=0.1)) +
    p9.geom_boxplot(outlier_size=0, outlier_stroke=0) + p9.ggtitle(
        "Outliers detected via the IQR method (boxplot)\nand modified z-score method (dotplot)"
    ) + p9.ylim(-10, 175))
print(p)
plotfile = filename.replace('.csv', '_outlierplot')
p9.ggsave(plot=p, filename=plotfile)

# plot regression
x = data_stats_regression['age_rounded']
y = data_stats_regression['median']
plt.plot(x, y, 'o')
plt.plot(x, r.func_linear(x, *linear_coeff))
plt.plot(x, r.func_log(x, *log10_coeff))
plt.plot(x, r.func_ln(x, *ln_coeff))
plt.title(
    "Regression performed on medians of age 1, 3 and 5\ndata with outliers removed"

コード例 #22

0

ファイルを表示

ファイル: biorxiv_pca_category_bootstrap.py プロジェクト: MarvinT/annorxiver

# In[8]:

projected_documents_df = (pd.DataFrame(
    projected_documents,
    columns=[f"PC_{dim+1}" for dim in range(n_components)
             ]).assign(category=document_categories_df.category.tolist(),
                       document=document_categories_df.document.tolist()))
projected_documents_df

# In[9]:

g = (
    p9.ggplot(projected_documents_df) +
    p9.aes(x="factor(category)", y="PC_1") + p9.geom_boxplot(
        fill="#a6cee3",
        outlier_size=1,
        outlier_alpha=0.65,
        fatten=1.5,
    ) + p9.coord_flip() + p9.scale_x_discrete(
        limits=(projected_documents_df.groupby("category").agg({
            "PC_1": "median"
        }).sort_values(
            "PC_1", ascending=False).reset_index().category.tolist()[::-1])) +
    p9.labs(x="Article Category", y="PC1") + p9.theme(figure_size=(6.66, 5)) +
    p9.theme_seaborn(
        context="paper", style="ticks", font="Arial", font_scale=1))
g.save("output/pca_plots/figures/category_box_plot_pc1.png", dpi=250)
g.save(
    "output/pca_plots/svg_files/category_box_plot/category_box_plot_pc1.svg",
    dpi=250)
print(g)

コード例 #23

0

ファイルを表示

ファイル: test_position.py プロジェクト: tr8dr/plotnine

def test_dodge2_varwidth():
    p = (ggplot(df3, aes('x', 'y', color='c')) + geom_boxplot(
        position=position_dodge2(preserve='single'), varwidth=True, size=2))
    assert p + _theme == 'dodge2_varwidth'

コード例 #24

0

ファイルを表示

ファイル: churn_visuals.py プロジェクト: timcashion/churn_analysis

+ geom_histogram()).save(filename="MonthlyCharges_Hist.png", dpi=300)

(ggplot(dat, aes(x='TotalCharges'))
+ geom_histogram()).save(filename="TotalCharges_Hist.png", dpi=300)

#Neither follow a normal distribution. Log transformation could help, but these are odd. 
dat["LogTotalCharges"] = np.log(dat["TotalCharges"]+1)
dat["LogMonthlyCharges"] = np.log(dat["MonthlyCharges"]+1)


(ggplot(dat, aes(x='LogMonthlyCharges'))
+ geom_histogram())

(ggplot(dat, aes(x='LogTotalCharges'))
+ geom_histogram())

#Doesn't really help so leave this for now. 

dat = dat.drop(columns = ["LogTotalCharges", "LogMonthlyCharges"])



dat["Churn_label"] = dat["Churn"].astype(str)

(ggplot(dat, aes(x="Churn_label", y='MonthlyCharges'))
+ geom_boxplot()).save(filename="MonthlyChargesChurn_Box.png", dpi=300)

(ggplot(dat, aes(x="Churn_label", y='TotalCharges'))
+ geom_boxplot()).save(filename="TotalChargesChurn_Box.png", dpi=300)

dat = dat.drop(columns="Churn_label")

コード例 #25

0

ファイルを表示

ファイル: storage_throughput_vs_cpu_plots.py プロジェクト: scotthart/google-cloud-cpp

    + p9.geom_point()
    + facet
)
(plot + p9.scale_y_log10() if use_y_log10 else plot).save(
    args.output_prefix + ".elapsed-vs-size.png"
)

# %%
plot = (
    p9.ggplot(data=data, mapping=p9.aes(x="MiB", y="CpuNanosPerByte", color="ApiName"))
    + p9.geom_point()
    + facet
)
(plot + p9.scale_y_log10() if use_y_log10 else plot).save(
    args.output_prefix + ".cpu-vs-size.png"
)

# %%
(
    p9.ggplot(data=data, mapping=p9.aes(x="MiB", y="MiBs", color="ApiName"))
    + p9.geom_point()
    + facet
).save(args.output_prefix + ".tp-vs-size.png")

# %%
(
    p9.ggplot(data=data, mapping=p9.aes(x="ApiName", y="MiBs", color="ApiName"))
    + p9.geom_boxplot()
    + facet
).save(args.output_prefix + ".tp-vs-api.png")

コード例 #26

0

ファイルを表示

        df, separated_peaks = er.proof_artificial(
            model,
            ad_partial,
            region_length=parameters['pad_to'],
            nb_datasets=parameters['artificial_nb_datasets'],
            nb_tfs=parameters['artificial_nb_tfs'],
            n_iter=500,
            squish_factor=parameters['squish_factor'])
        arti_end = time.time()
        print('Artificial data generalisation completed in ' +
              str(arti_end - arti_start) + ' s')

        # The plots
        a = ggplot(df, aes(x="type", y="rebuilt_value", fill="tf_group"))
        a1 = a + geom_violin(position=position_dodge(1), width=1)
        a2 = a + geom_boxplot(position=position_dodge(1), width=0.5)
        b = ggplot(df, aes(
            x="brothers", y="rebuilt_value",
            group="brothers")) + scale_fill_grey() + geom_boxplot(width=0.4)

        a2.save(filename=plot_output_path +
                'artifical_data_systematisation_value_per_type.png',
                height=10,
                width=14,
                units='in',
                dpi=400,
                verbose=False)
        b.save(filename=plot_output_path +
               'artifical_data_systematisation_value_per_brothers.png',
               height=10,
               width=14,

コード例 #27

0

ファイルを表示

    treatment_replace)

print(scores_df.shape)
scores_df.head(3)

# In[ ]:

# In[6]:

scores_df.Metadata_treatment.value_counts()

# In[7]:

clone_a_gg = (
    gg.ggplot(scores_df, gg.aes(y="Clone A", x="Metadata_clone_number")) +
    gg.geom_boxplot(gg.aes(fill="data_fit")) +
    gg.facet_wrap("~shuffle_label") + gg.xlab("Cell Line") +
    gg.ylab("Clone A Probability") + gg.theme_bw() +
    gg.theme(legend_key=gg.element_rect(color="black", fill="white"),
             strip_text=gg.element_text(size=6, color="black"),
             strip_background=gg.element_rect(colour="black", fill="#fdfff4")))

file = pathlib.Path("figures", "predictions", "clone_a_single_cell_proba.png")
clone_a_gg.save(file, height=3, width=6, dpi=400)

clone_a_gg

# In[8]:

clone_e_gg = (
    gg.ggplot(scores_df, gg.aes(y="Clone E", x="Metadata_clone_number")) +

コード例 #28

0

ファイルを表示

    def plot(self):
        """Plot the figures using R"""
        df = pandas.DataFrame(
            self.data,
            columns=self.datacols,
        )
        with capture_c_msg("datar", prefix=f"[r]{self.title}[/r]: "):
            df.columns = make_unique(df.columns.tolist())

        if self.savedata:
            datafile = self.outprefix + ".csv"
            logger.info(
                "[r]%s[/r]: Saving data to: %r",
                self.title,
                datafile,
                extra={"markup": True},
            )
            df.to_csv(datafile, index=False)

        if df.shape[0] == 0:
            logger.warning("No data points to plot")
            return

        aes_for_geom_fill = None
        aes_for_geom_color = None
        theme_elems = p9.theme(axis_text_x=p9.element_text(angle=60, hjust=2))
        if df.shape[1] > 2:
            aes_for_geom_fill = p9.aes(fill=df.columns[2])
            aes_for_geom_color = p9.aes(color=df.columns[2])
        plt = p9.ggplot(df, p9.aes(y=df.columns[0], x=df.columns[1]))
        if self.figtype == "scatter":
            plt = plt + p9.geom_point(aes_for_geom_color)
            theme_elems = None
        elif self.figtype == "line":
            pass
        elif self.figtype == "bar":
            plt = plt + p9.geom_bar(p9.aes(fill=df.columns[0]))
        elif self.figtype == "col":
            plt = plt + p9.geom_col(aes_for_geom_fill)
        elif self.figtype == "pie":
            logger.warning("Pie chart is not support by plotnine yet, "
                           "plotting bar chart instead.")
            col0 = df.iloc[:, 0]
            if df.shape[1] > 2:
                plt = plt + p9.geom_bar(
                    p9.aes(x=df.columns[2], y=col0.name, fill=df.columns[2]),
                    stat="identity"
                    # aes_for_geom_fill,
                    # x=df.Group,
                    # y=col0,
                    # label=paste0(round_(100 * col0 / sum_(col0), 1), "%"),
                    # show_legend=False,
                    # position=p9.position_adjust_text(),
                )
            else:
                col0 = factor(col0, levels=rev(unique(as_character(col0))))
                fills = rev(levels(col0))
                sums = map(lambda x: sum(col0 == x), fills)
                print(col0)
                print(fills)
                plt = (p9.ggplot(df, p9.aes(x=df.columns[1])) +
                       p9.geom_bar(p9.aes(fill=df.columns[0])) + p9.geom_label(
                           x=1,
                           y=cumsum(sums) - sums / 2,
                           label=paste0(round(sums / sum(sums) * 100, 1), "%"),
                           show_legend=False,
                       ))
                theme_elems = p9.theme(
                    axis_title_x=p9.element_blank(),
                    axis_title_y=p9.element_blank(),
                    axis_text_y=p9.element_blank(),
                )
        elif self.figtype == "violin":
            plt = plt + p9.geom_violin(aes_for_geom_fill)
        elif self.figtype == "boxplot":
            plt = plt + p9.geom_boxplot(aes_for_geom_fill)
        elif self.figtype in ("histogram", "density"):
            plt = p9.ggplot(df, p9.aes(x=df.columns[0]))
            geom = getattr(p9, f"geom_{self.figtype}")
            if df.columns[1] != "ONE":
                plt = plt + geom(p9.aes(fill=df.columns[1]), alpha=0.6)
                theme_elems = None
            else:
                plt = plt + geom(alpha=0.6)
                theme_elems = p9.theme(legend_position="none")
        elif self.figtype == "freqpoly":
            plt = p9.ggplot(df, p9.aes(x=df.columns[0]))
            if df.columns[1] != "ONE":
                plt = plt + p9.geom_freqpoly(p9.aes(fill=df.columns[1]))
            else:
                plt = plt + p9.geom_freqpoly()
            theme_elems = None
        else:
            raise ValueError(f"Unknown figure type: {self.figtype}")

        plt = plt + p9.ggtitle(self.title)
        self.save_plot(plt, theme_elems)

コード例 #29

0

ファイルを表示

    + p9.xlab("age at diagnosis (days)")
    + p9.theme_bw()
    + p9.theme(text=p9.element_text(size=16))
    )

## Challenge: create a scatterplot from smoke_complete showing
# age at diagnosis vs years smoked with points colored by gender
# and appropriate axis labels

#### Plotting distributions ####

# boxplot
(p9.ggplot(smoke_complete,
           p9.aes(x="vital_status",
                          y="cigarettes_per_day"))
    + p9.geom_boxplot()
    )

# change color of boxes and move aes to geom layer
(p9.ggplot(smoke_complete)
    + p9.geom_boxplot(p9.aes(x="vital_status",
                   y="cigarettes_per_day"), color="tomato")
    )

# adding colored points to black box and whisker plot
(p9.ggplot(smoke_complete,
           p9.aes(x="vital_status",
                          y="cigarettes_per_day"))
    + p9.geom_boxplot()
    + p9.geom_jitter(alpha=0.2, color="blue")
    )

コード例 #30

0

ファイルを表示

ファイル: ch14_housePrice_prediction2.py プロジェクト: xiaoguozhi/Applied-Computational-Thinking-with-Python

# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

housing_data = pd.read_csv(
    "C:\\Users\\sofia.dejesus\\Documents\\02_book\\kc_house_data.csv")
#Housing_data.head()
#print(housing_data.shape)

housing_data.describe(include=[np.number])
housing_data.head

housing_data.describe()

#Checking for missing values in data
housing_data.isnull().sum()

#Pairplotting for some data
coln = ['price', 'sqft_living', 'zipcode', 'sqft_above']
sns.pairplot(housing_data[coln], height=4)
plt.savefig('pairplotting.png', dpi=300)
plt.show()

from plotnine.data import huron
from plotnine import ggplot, aes, geom_boxplot

(ggplot(huron) + aes(x='sqft_living', y='sqft_above') + geom_boxplot())

コード例 #31

0

ファイルを表示

ファイル: md_plot.py プロジェクト: TinoGehlert/md_plot

def MDplot(Data,
           Names=None,
           Ordering='Default',
           Scaling=None,
           Fill='darkblue',
           RobustGaussian=True,
           GaussianColor='magenta',
           Gaussian_lwd=1.5,
           BoxPlot=False,
           BoxColor='darkred',
           MDscaling='width',
           LineColor='black',
           LineSize=0.01,
           QuantityThreshold=40,
           UniqueValuesThreshold=12,
           SampleSize=500000,
           SizeOfJitteredPoints=1,
           OnlyPlotOutput=True,
           ValueColumn=None,
           ClassColumn=None):
    """
    Plots a mirrored density plot for each numeric column
    
    Args:
        Data (dataframe): dataframe containing data. Each column is one 
                          variable (wide table format, for long table format 
                          see ValueColumn and ClassColumn)
        Names (list): list of column names (will be used if data is not a 
                      dataframe)
        Ordering (str): 'Default', 'Columnwise', 'Alphabetical' or 'Statistics'
        Scaling (str): scaling method, one of: Percentalize, CompleteRobust, 
                                               Robust, Log
        Fill (str): color of MD-Plot
        RobustGaussian (bool): draw a gaussian distribution if column is 
                               gaussian
        GaussianColor (str): color for gaussian distribution
        Gaussian_lwd (float): line width of gaussian distribution
        BoxPlot (bool): draw box-plot
        BoxColor (str): color for box-plots
        MDscaling (str): scale of ggplot violin
        LineSize (float): line width of ggplot violin
        QuantityThreshold (int): minimal number of rows
        UniqueValuesThreshold (int): minimal number of unique values per 
                                         column
        SampleSize (int): number of samples used if number of rows is larger 
                          than SampleSize
        OnlyPlotOutput (bool): if True than returning only ggplot object,
                               if False than returning dictionary containing 
                               ggplot object and additional infos
        ValueColumn (str): name of the column of values to be plotted
                           (data in long table format)
        ClassColumn (str): name of the column with class identifiers for the 
                           value column (data in long table format)
        
    Returns:
        ggplot object or dictionary containing ggplot object and additional 
        infos
    """

    if not isinstance(Data, pd.DataFrame):
        try:
            if Names is not None:
                Data = pd.DataFrame(Data, columns=Names)
            else:
                Data = pd.DataFrame(Data)
                lstCols = list(Data.columns)
                dctCols = {}
                for strCol in lstCols:
                    dctCols[strCol] = "C_" + str(strCol)
                Data = Data.rename(columns=dctCols)
        except:
            raise Exception("Data cannot be converted into pandas dataframe")
    else:
        Data = Data.reset_index(drop=True)

    if ValueColumn is not None and ClassColumn is not None:
        lstCols = list(Data.columns)
        if ValueColumn not in lstCols:
            raise Exception("ValueColumn not contained in dataframe")
        if ClassColumn not in lstCols:
            raise Exception("ClassColumn not contained in dataframe")

        lstClasses = list(Data[ClassColumn].unique())
        DataWide = pd.DataFrame()
        for strClass in lstClasses:
            if len(DataWide) == 0:
                DataWide = Data[Data[ClassColumn] == strClass].copy()\
                .reset_index(drop=True)
                DataWide = DataWide.rename(columns={ValueColumn: strClass})
                DataWide = DataWide[[strClass]]
            else:
                dfTemp = Data[Data[ClassColumn] == strClass].copy()\
                .reset_index(drop=True)
                dfTemp = dfTemp.rename(columns={ValueColumn: strClass})
                dfTemp = dfTemp[[strClass]]
                DataWide = DataWide.join(dfTemp, how='outer')
        Data = DataWide.copy()

    lstCols = list(Data.columns)
    for strCol in lstCols:
        if not is_numeric_dtype(Data[strCol]):
            print("Deleting non numeric column: " + strCol)
            Data = Data.drop([strCol], axis=1)
        else:
            if abs(Data[strCol].sum()) == np.inf:
                print("Deleting infinite column: " + strCol)
                Data = Data.drop([strCol], axis=1)

    Data = Data.rename_axis("index", axis="index")\
    .rename_axis("variable", axis="columns")
    dvariables = Data.shape[1]
    nCases = Data.shape[0]

    if nCases > SampleSize:
        print('Data has more cases than "SampleSize". Drawing a sample for '
              'faster computation. You can omit this by setting '
              '"SampleSize=len(data)".')
        sampledIndex = np.sort(
            np.random.choice(list(Data.index), size=SampleSize, replace=False))
        Data = Data.loc[sampledIndex]

    nPerVar = Data.apply(lambda x: len(x.dropna()))
    nUniquePerVar = Data.apply(lambda x: len(list(x.dropna().unique())))

    # renaming columns to nonumeric names
    lstCols = list(Data.columns)
    dctCols = {}
    for strCol in lstCols:
        try:
            a = float(strCol)
            dctCols[strCol] = "C_" + str(strCol)
        except:
            dctCols[strCol] = str(strCol)
    Data = Data.rename(columns=dctCols)

    if Scaling == "Percentalize":
        Data = Data.apply(lambda x: 100 * (x - x.min()) / (x.max() - x.min()))
    if Scaling == "CompleteRobust":
        Data = robust_normalization(Data, centered=True, capped=True)
    if Scaling == "Robust":
        Data = robust_normalization(Data, centered=False, capped=False)
    if Scaling == "Log":
        Data = signed_log(Data, base="Ten")
        if RobustGaussian == True:
            RobustGaussian = False
            print("log with robust gaussian does not work, because mean and "
                  "variance is not valid description for log normal data")

#_______________________________________________Roboust Gaussian and Statistics
    if RobustGaussian == True or Ordering == "Statistics":
        Data = Data.applymap(lambda x: np.nan if abs(x) == np.inf else x)

        if nCases < 50:
            warnings.warn("Sample is maybe too small for statistical testing")

        factor = pd.Series([0.25, 0.75]).apply(lambda x: abs(norm.ppf(x)))\
        .sum()
        std = Data.std()

        dfQuartile = Data.apply(
            lambda x: mquantiles(x, [0.25, 0.75], alphap=0.5, betap=0.5))
        dfQuartile = dfQuartile.append(dfQuartile.loc[1] - dfQuartile.loc[0],
                                       ignore_index=True)
        dfQuartile.index = ["low", "hi", "iqr"]
        dfMinMax = Data.apply(
            lambda x: mquantiles(x, [0.001, 0.999], alphap=0.5, betap=0.5))
        dfMinMax.index = ["min", "max"]

        shat = pd.Series()
        mhat = pd.Series()
        nonunimodal = pd.Series()
        skewed = pd.Series()
        bimodalprob = pd.Series()
        isuniformdist = pd.Series()
        nSample = max([10000, nCases])
        normaldist = np.empty((nSample, dvariables))
        normaldist[:] = np.nan
        normaldist = pd.DataFrame(normaldist, columns=lstCols)

        for strCol in lstCols:
            shat[strCol] = min(
                [std[strCol], dfQuartile[strCol].loc["iqr"] / factor])
            mhat[strCol] = trim_mean(Data[strCol].dropna(), 0.1)

            if nCases > 45000 and nPerVar[strCol] > 8:
                # statistical testing does not work with to many cases
                sampledIndex = np.sort(
                    np.random.choice(list(Data.index),
                                     size=45000,
                                     replace=False))
                vec = Data[strCol].loc[sampledIndex]
                if nUniquePerVar[strCol] > UniqueValuesThreshold:
                    nonunimodal[strCol] = dip.diptst(vec.dropna(), numt=100)[1]
                    skewed[strCol] = skewtest(vec)[1]
                    args = (dfMinMax[strCol].loc["min"],
                            dfMinMax[strCol].loc["max"] \
                            - dfMinMax[strCol].loc["min"])
                    isuniformdist[strCol] = kstest(vec, "uniform", args)[1]
                    bimodalprob[strCol] = bimodal(vec)["Bimodal"]
                else:
                    print("Not enough unique values for statistical testing, "
                          "thus output of testing is ignored.")
                    nonunimodal[strCol] = 1
                    skewed[strCol] = 1
                    isuniformdist[strCol] = 0
                    bimodalprob[strCol] = 0
            elif nPerVar[strCol] < 8:
                warnings.warn("Sample of finite values to small to calculate "
                              "agostino.test or dip.test for " + strCol)
                nonunimodal[strCol] = 1
                skewed[strCol] = 1
                isuniformdist[strCol] = 0
                bimodalprob[strCol] = 0
            else:
                if nUniquePerVar[strCol] > UniqueValuesThreshold:
                    nonunimodal[strCol] = dip.diptst(Data[strCol].dropna(),
                                                     numt=100)[1]
                    skewed[strCol] = skewtest(Data[strCol])[1]
                    args = (dfMinMax[strCol].loc["min"],
                            dfMinMax[strCol].loc["max"] \
                            - dfMinMax[strCol].loc["min"])
                    isuniformdist[strCol] = kstest(Data[strCol], "uniform",
                                                   args)[1]
                    bimodalprob[strCol] = bimodal(Data[strCol])["Bimodal"]
                else:
                    print("Not enough unique values for statistical testing, "
                          "thus output of testing is ignored.")
                    nonunimodal[strCol] = 1
                    skewed[strCol] = 1
                    isuniformdist[strCol] = 0
                    bimodalprob[strCol] = 0

            if isuniformdist[strCol] < 0.05 and nonunimodal[strCol] > 0.05 \
            and skewed[strCol] > 0.05 and bimodalprob[strCol] < 0.05 \
            and nPerVar[strCol] > QuantityThreshold \
            and nUniquePerVar[strCol] > UniqueValuesThreshold:
                normaldist[strCol] = np.random.normal(mhat[strCol],
                                                      shat[strCol], nSample)
                normaldist[strCol] = normaldist[strCol]\
                .apply(lambda x: np.nan if x < Data[strCol].min() \
                                 or x > Data[strCol].max() else x)
        nonunimodal[nonunimodal == 0] = 0.0000000001
        skewed[skewed == 0] = 0.0000000001
        effectStrength = (-10 * np.log(skewed) - 10 * np.log(nonunimodal)) / 2

#______________________________________________________________________Ordering
    if Ordering == "Default":
        bimodalprob = pd.Series()
        for strCol in lstCols:
            if nCases > 45000 and nPerVar[strCol] > 8:
                sampledIndex = np.sort(
                    np.random.choice(list(Data.index),
                                     size=45000,
                                     replace=False))
                vec = Data[strCol].loc[sampledIndex]
                bimodalprob[strCol] = bimodal(vec)["Bimodal"]
            elif nPerVar[strCol] < 8:
                bimodalprob[strCol] = 0
            else:
                bimodalprob[strCol] = bimodal(Data[strCol])["Bimodal"]
        if len(list(bimodalprob.unique())) < 2 and dvariables > 1 \
        and RobustGaussian == True:
            rangfolge = list(effectStrength.sort_values(ascending=False).index)
            print("Using statistics for ordering instead of default")
        else:
            rangfolge = list(bimodalprob.sort_values(ascending=False).index)

    if Ordering == "Columnwise":
        rangfolge = lstCols

    if Ordering == "Alphabetical":
        rangfolge = lstCols.copy()
        rangfolge.sort()

    if Ordering == "Statistics":
        rangfolge = list(effectStrength.sort_values(ascending=False).index)

#________________________________________________________________Data Reshaping
    if nPerVar.min() < QuantityThreshold \
    or nUniquePerVar.min() < UniqueValuesThreshold:
        warnings.warn("Some columns have less than " + str(QuantityThreshold) +
                      " data points or less than " +
                      str(UniqueValuesThreshold) +
                      " unique values. Changing from MD-plot to Jitter-Plot "
                      "for these columns.")
        dataDensity = Data.copy()
        mm = Data.median()
        for strCol in lstCols:
            if nPerVar[strCol] < QuantityThreshold \
            or nUniquePerVar[strCol] < UniqueValuesThreshold:
                if mm[strCol] != 0:
                    dataDensity[strCol] = mm[strCol] \
                    * np.random.uniform(-0.001, 0.001, nCases) + mm[strCol]
                else:
                    dataDensity[strCol] = np.random.uniform(
                        -0.001, 0.001, nCases)
        # Generates in the cases where pdf cannot be estimated a scatter plot
        dataJitter = dataDensity.copy()
        # Delete all scatters for features where distributions can be estimated
        for strCol in lstCols:
            if nPerVar[strCol] >= QuantityThreshold \
            and nUniquePerVar[strCol] >= UniqueValuesThreshold:
                dataJitter[strCol] = np.nan
        #apply ordering
        dataframe = dataDensity[rangfolge].reset_index()\
        .melt(id_vars=["index"])
    else:
        dataframe = Data[rangfolge].reset_index().melt(id_vars=["index"])

    dctCols = {"index": "ID", "variable": "Variables", "value": "Values"}
    dataframe = dataframe.rename(columns=dctCols)

    #______________________________________________________________________Plotting
    plot = p9.ggplot(dataframe, p9.aes(x="Variables", group="Variables",
                                        y="Values")) \
                     + p9.scale_x_discrete(limits=rangfolge)

    plot = plot + p9.geom_violin(stat = stat_pde_density(scale=MDscaling),
                                 fill=Fill, colour=LineColor,
                                 size=LineSize, trim=True) \
                           + p9.theme(axis_text_x=p9.element_text(rotation=90))

    if nPerVar.min() < QuantityThreshold \
    or nUniquePerVar.min() < UniqueValuesThreshold:
        dataframejitter = dataJitter[rangfolge].reset_index()\
        .melt(id_vars=["index"])
        dataframejitter = dataframejitter.rename(columns=dctCols)
        plot = plot + p9.geom_jitter(
            size=SizeOfJitteredPoints,
            data=dataframejitter,
            colour=LineColor,
            mapping=p9.aes(x="Variables", group="Variables", y="Values"),
            position=p9.position_jitter(0.15))

    if RobustGaussian == True:
        dfTemp = normaldist[rangfolge].reset_index().melt(id_vars=["index"])
        dfTemp = dfTemp.rename(columns=dctCols)
        if dfTemp["Values"].isnull().all() == False:
            plot = plot + p9.geom_violin(
                data=dfTemp,
                mapping=p9.aes(x="Variables", group="Variables", y="Values"),
                colour=GaussianColor,
                alpha=0,
                scale=MDscaling,
                size=Gaussian_lwd,
                na_rm=True,
                trim=True,
                fill=None,
                position="identity",
                width=1)

    if BoxPlot == True:
        plot = plot + p9.stat_boxplot(geom = "errorbar", width = 0.5,
                                      color=BoxColor) \
                    + p9.geom_boxplot(width=1, outlier_colour = None, alpha=0,
                                      fill='#ffffff', color=BoxColor,
                                      position="identity")

    if OnlyPlotOutput == True:
        return plot
    else:
        print(plot)
        return {
            "Ordering": rangfolge,
            "DataOrdered": Data[rangfolge],
            "ggplotObj": plot
        }