Python swarmplot Examples, seaborn.swarmplot Python Examples

Example #1

0

Show file

File: release_test_compare.py Project: cfe-lab/MiCall

def plot_distances(distance_data, filename, title, plot_variable='distance'):
    seeds = sorted(set(distance_data['region']))
    distance_data = distance_data.sort_values(['region', 'cutoff'])
    sns.set()
    num_plots = len(seeds)
    figure, axes_sets = plt.subplots(nrows=num_plots, ncols=1, squeeze=False)
    axes_sets = list(chain(*axes_sets))  # 2-dim array -> 1-dim list
    for ax, seed in zip(axes_sets, seeds):
        seed_data = distance_data[distance_data['region'] == seed]
        seed_data = seed_data.assign(
            count=lambda df: df['cutoff'].map(
                df.groupby(by=['cutoff'])[plot_variable].count()))
        seed_data['cutoff_n'] = seed_data.apply(format_cutoff, 'columns')

        sns.violinplot(x='cutoff_n',
                       y=plot_variable,
                       data=seed_data,
                       cut=0,
                       alpha=0.7,
                       ax=ax)
        plt.setp(ax.lines, zorder=100)
        plt.setp(ax.collections, zorder=100)
        sns.swarmplot(x='cutoff_n',
                      y=plot_variable,
                      data=seed_data,
                      color='k',
                      ax=ax)
        ax.set_ylabel(seed + '\n' + plot_variable)
    axes_sets[0].set_title(title)
    plt.savefig(filename)

Example #2

0

Show file

File: RecBlastFigures.py Project: Efrapoport/recblast-web

def create_swarmplot(df, path, title, colormap, genes, species):
    """
    The function creates a swarmplot using seaborn.
    :param df: pandas.DataFrame object
    :param path: The CSV file path.
    :param title: Title for the plot.
    :param colormap: Colormap
    :param genes: Ordered list of genes.
    :param species: Ordered list of species.
    :return:
    """
    print("Creating swarmplot for {}".format(path))
    output_path = os.path.dirname(path)
    output = join_folder(output_path, "%s_swarmplot.png" % title)
    fig = plt.figure(figsize=(16, 10), dpi=180)  # new figure
    sns.swarmplot(x='Gene Name', y='Orthologues', hue='Species', order=genes, hue_order=species, data=df,
                  palette=colormap)  # draw swarmplot
    plt.ylabel("# Orthologues")
    plt.xlabel("Gene Name")
    plt.ylim(0, )
    plt.yticks(fontsize=10)
    plt.xticks(fontsize=10)
    plt.savefig(output)  # saving figure as output
    plt.close()
    return output

Example #3

0

Show file

File: plotting.py Project: dhuppenkothen/entrofy

def _plot_categorical_and_continuous(df, xlabel, ylabel, x_keys, y_keys, ax,
                                     cmap, n_cat=5, plottype="box"):
    """
    Plot a categorical variable and a continuous variable against each
    other. Types of plots include box plot, violin plot, strip plot and swarm
    plot.

    Parameters
    ----------
    df : pd.DataFrame
        A pandas DataFrame with the data

    xlabel : str
        The column name for the variable on the x-axis

    ylabel : str
        The column name for the variable on the y-axis

    ax : matplotlib.Axes object
        The matplotlib.Axes object to plot the bubble plot into

    cmap : matplotlib.cm.colormap
        A matplotlib colormap to use for shading the bubbles

    n_cat : int
        The number of categories; used for creating the colour map

    plottype : {"box" | "violin" | "strip" | "swarm"}
        The type of plot to produce; default is a box plot

    Returns
    -------
    ax : matplotlib.Axes object
        The same matplotlib.Axes object for further manipulation

    """
    if x_keys is xlabel:
        keys = y_keys
    elif y_keys is ylabel:
        keys = x_keys
    else:
        raise Exception("Something went terribly, horribly wrong!")

    current_palette = sns.color_palette(cmap, n_cat)
    if plottype == "box":
        sns.boxplot(x=xlabel, y=ylabel, data=df, order=keys,
                    palette=current_palette, ax=ax)
    elif plottype == "strip":
        sns.stripplot(x=xlabel, y=ylabel, data=df, order=keys,
                      palette=current_palette, ax=ax)
    elif plottype == "swarm":
        sns.swarmplot(x=xlabel, y=ylabel, data=df, order=keys,
                      palette=current_palette, ax=ax)
    elif plottype == "violin":
        sns.violinplot(x=xlabel, y=ylabel, data=df, order=keys,
                       palette=current_palette, ax=ax)
    else:
        raise Exception("plottype not recognized!")

    return ax

Example #4

0

Show file

File: beeswarmplot.py Project: gkunter/coquery

    def plot_facet(self, data, color,
                   x=None, y=None, levels_x=None, levels_y=None,
                   palette=None, **kwargs):
        ax = kwargs.get("ax", plt.gca())
        corpus_id = "coquery_invisible_corpus_id"

        params = {"data": data, "palette": palette}
        self.horizontal = True
        if not x and not y:
            params.update({"x": corpus_id}),
            self._xlab = x
            self._ylab = ""
        elif x and not y:
            params.update({"x": x, "y": corpus_id, "order": levels_x})
            self.horizontal = False
            self._xlab = x
            self._ylab = "Corpus position"
        elif y and not x:
            params.update({"y": y, "x": corpus_id, "order": levels_y})
            self._xlab = "Corpus position"
            self._ylab = y
        elif x and y:
            params.update({"x": corpus_id, "y": y, "hue": x,
                           "order": levels_y, "hue_order": levels_x})
            self._xlab = "Corpus position"
            self._ylab = y

        sns.swarmplot(**params)
        return ax

Example #5

0

Show file

File: diffimTests_OLD_multi.py Project: djreiss/diffimTests

def plotResults(tr, resultKey='resultInputPsf', doRates=False, title='', asHist=False, doPrint=True, actuallyPlot=True):
    import matplotlib.pyplot as plt
    import matplotlib
    matplotlib.style.use('ggplot')

    import seaborn as sns
    sns.set(style="whitegrid", palette="pastel", color_codes=True)

    methods = ['ALstack', 'ZOGY', 'SZOGY', 'ALstack_decorr']
    tr = [t for t in tr if t is not None and t[resultKey]]
    FN = pd.DataFrame({key: np.array([t[resultKey][key]['FN'] for t in tr]) for key in methods})
    FP = pd.DataFrame({key: np.array([t[resultKey][key]['FP'] for t in tr]) for key in methods})
    TP = pd.DataFrame({key: np.array([t[resultKey][key]['TP'] for t in tr]) for key in methods})
    title_suffix = 's'
    if doRates:
        FN /= (FN + TP)
        FP /= (FN + TP)
        TP /= (FN + TP)
        title_suffix = ' rate'
    if doPrint:
        print 'FN:', '\n', FN.mean()
        print 'FP:', '\n', FP.mean()
        print 'TP:', '\n', TP.mean()

    if not actuallyPlot:
        return TP, FP, FN

    matplotlib.rcParams['figure.figsize'] = (18.0, 6.0)
    fig, axes = plt.subplots(nrows=1, ncols=2)

    if not asHist:
        sns.violinplot(data=TP, cut=True, linewidth=0.3, bw=0.25, scale='width', alpha=0.5, ax=axes[0])
        if TP.shape[0] < 500:
            sns.swarmplot(data=TP, color='black', size=3, alpha=0.3, ax=axes[0])
        sns.boxplot(data=TP, saturation=0.5, boxprops={'facecolor': 'None'},
                    whiskerprops={'linewidth': 0}, showfliers=False, ax=axes[0])
        plt.setp(axes[0], alpha=0.3)
        axes[0].set_ylabel('True positive' + title_suffix)
        axes[0].set_title(title)
        sns.violinplot(data=FP, cut=True, linewidth=0.3, bw=0.5, scale='width', ax=axes[1])
        if FP.shape[0] < 500:
            sns.swarmplot(data=FP, color='black', size=3, alpha=0.3, ax=axes[1])
        sns.boxplot(data=FP, saturation=0.5, boxprops={'facecolor': 'None'},
                    whiskerprops={'linewidth': 0}, showfliers=False, ax=axes[1])
        plt.setp(axes[1], alpha=0.3)
        axes[1].set_ylabel('False positive' + title_suffix)
        axes[1].set_title(title)
    else:
        for t in TP:
            sns.distplot(TP[t], label=t, norm_hist=False, ax=axes[0])
        axes[0].set_xlabel('True positive' + title_suffix)
        axes[0].set_title(title)
        legend = axes[0].legend(loc='upper left', shadow=True)
        for t in FP:
            sns.distplot(FP[t], label=t, norm_hist=False, ax=axes[1])
        axes[1].set_xlabel('False positive' + title_suffix)
        axes[1].set_title(title)
        legend = axes[1].legend(loc='upper left', shadow=True)

    return TP, FP, FN

Example #6

0

Show file

File: velocity_analysis.py Project: logyuan/NBA-player-movement

def fatigue_plots(df):
    """
    Makes plots showing game fatigue for SAS and IND

    Args:
        df (pd.DataFrame): dataframe of fatigue data
            Note: use extract_fatigue() to obtain this data

    Returns:
        None
        Saves plots to examples/
    """
    plt.figure()
    sns.swarmplot(x='variable', y='value',
                  data=df[df.Pos == 'Off'][df.Tm == 'IND'])
    plt.title('Indiana Pacers Fatigue')
    plt.xlabel('Quarter')
    plt.ylabel('Mean Offensive Velocity (ft/sec)')
    plt.ylim(0.015, 0.034)
    locs, labels = plt.yticks()
    plt.yticks(locs, map(lambda x: "%.1f" % x, locs*1000))
    plt.savefig('examples/INDfatige')

    plt.figure()
    sns.swarmplot(x='variable', y='value',
                  data=df[df.Pos == 'Off'][df.Tm == 'SAS'])
    plt.title('San Antonio Spurs Fatigue')
    plt.xlabel('Quarter')
    plt.ylabel('Mean Offensive Velocity (ft/sec)')
    locs, labels = plt.yticks()
    plt.yticks(locs, map(lambda x: "%.1f" % x, locs*1000))
    plt.savefig('examples/SASfatige')

Example #7

0

Show file

File: LDA.py Project: the0demiurge/python-test

def plot_prediction2(transformed, predicted, y, label_names, threshold):
    plot([threshold, threshold], [-50, 50], 'grey', label='Classify boundary')
    t = pd.DataFrame(transformed, columns=['Dimention Reduction Result After LDA Transform'])
    p = pd.DataFrame([label_names[int(i[0])] for i in predicted], columns=['prediction'])
    lab = pd.DataFrame([label_names[int(i[0])] for i in y], columns=['label'])
    data_t = pd.concat([t, p, lab], axis=1)
    sns.swarmplot(y='label', x='Dimention Reduction Result After LDA Transform', hue='prediction', data=data_t)

Example #8

0

Show file

File: beeswarmplot.py Project: gkunter/coquery

 def plot_facet(data, color):
     sns.swarmplot(
         x=data[self._groupby[-1]],
         y=data["coquery_invisible_corpus_id"],
         order=sorted(self._levels[-1]),
         palette=self.options["color_palette_values"],
         data=data)

Example #9

0

Show file

File: charting.py Project: optilude/jira-cycle-extract

def ageing_wip_chart(cycle_data, start_column, end_column, done_column=None, now=None, title=None, ax=None):
    if len(cycle_data.index) == 0:
        raise UnchartableData("Cannot draw ageing WIP chart with no data")

    if ax is None:
        fig, ax = plt.subplots()
    
    if title is not None:
        ax.set_title(title)

    if now is None:
        now = pd.Timestamp.now()

    if done_column is None:
        done_column = cycle_data.columns[-1]

    today = now.date()

    # remove items that are done
    cycle_data = cycle_data[pd.isnull(cycle_data[done_column])]
    cycle_data = pd.concat((
        cycle_data[['key', 'summary']],
        cycle_data.ix[:, start_column:end_column]
    ), axis=1)

    def extract_status(row):
        last_valid = row.last_valid_index()
        if last_valid is None:
            return np.NaN
        return last_valid

    def extract_age(row):
        started = row[start_column]
        if pd.isnull(started):
            return np.NaN
        return (today - started.date()).days

    wip_data = cycle_data[['key', 'summary']].copy()
    wip_data['status'] = cycle_data.apply(extract_status, axis=1)
    wip_data['age'] = cycle_data.apply(extract_age, axis=1)

    wip_data.dropna(how='any', inplace=True)

    sns.swarmplot(x='status', y='age', order=cycle_data.columns[2:], data=wip_data, ax=ax)

    ax.set_xlabel("Status")
    ax.set_ylabel("Age (days)")

    ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=90)

    bottom, top = ax.get_ylim()
    ax.set_ylim(0, top)

    return ax

Example #10

0

Show file

File: solution.py Project: strongSquirell/DataScience

def dataExpl(data):

    # Statistical description of the data
    data.describe()

    sns.barplot(x='Sex', y='Survived', data=data)
    age_df = data[['Age','Survived', 'Sex']].copy()
    age_df.loc[age_df.Age<15,'AgeGroup'] = 'Children'
    age_df.loc[age_df.Age>=15,'AgeGroup'] = 'Adult'
    sns.barplot(x='AgeGroup', y='Survived', hue='Sex', data=age_df)
    sns.swarmplot(x='Age',y='Sex',hue='Survived',data=data)

Example #11

0

Show file

File: plotting.py Project: tomhettinger/clustering

def swarmplot(df):
    fig = plt.figure(figsize=[12,8])
    ax = fig.gca()
    df_melted = pd.melt(frame = df, 
                        id_vars = ["group"],
                        value_vars = ["height", "heartrate", "weight", "age"],
                        var_name="measurement")
    sns.swarmplot(x="measurement", y="value", hue="group", data=df_melted)
    ax.set_xticklabels(['height (cm)', 'heart rate (bpm)', 'weight (kg)', 'age (years)'])
    ax.set_xlabel('')
    fig.tight_layout()
    plt.savefig(os.path.join(FIG_PATH, 'swarmplot.png'), dpi=100)

Example #12

0

Show file

File: parA_inheritance.py Project: mountainpenguin/spot_analysis

def swarm2(ax, data, xvar1, xlabel1, xvar2, xlabel2, yvar, ylabel):
    sns.swarmplot(data=data[[xvar1, xvar2]])
    # plot mean value
    trans = matplotlib.transforms.blended_transform_factory(
        ax.transAxes, ax.transData
    )
    plt.plot(
        [0.1, 0.4],
        [data[xvar1].mean(), data[xvar1].mean()],
        "r-",
        transform=trans,
    )

    plt.plot(
        [0.6, 0.9],
        [data[xvar2].mean(), data[xvar2].mean()],
        "r-",
        transform=trans,
    )

    pvalue = scipy.stats.ttest_ind(
        data[xvar1].dropna(),
        data[xvar2].dropna(),
        equal_var=False
    ).pvalue
    dataset = pd.concat([data[xvar1], data[xvar2]])
    curr_ylim = ax.get_ylim()
    curr_ymax = curr_ylim[1]
    y20 = dataset.max() + (dataset.max() - dataset.min()) * 0.2
    if curr_ymax < y20:
        ax.set_ylim([curr_ylim[0], y20])
#    ymax = dataset.max() + (dataset.max() - dataset.min()) * 0.15
    ax.annotate(
        r"$p = {0:.5f}$".format(pvalue),
        xy=(0.5, 0.95),
        horizontalalignment="center",
        xycoords=ax.transAxes,
    )
    ax.annotate(
        r"$n = {0}$".format(len(data[xvar1].dropna())),
        xy=(0.5, 0.9),
        horizontalalignment="center",
        xycoords=ax.transAxes,
    )

    ax.set_ylabel(ylabel)
    labels = [xlabel1, xlabel2]
    ax.set_xticklabels(labels)
    sns.despine()

Example #13

0

Show file

File: AnalyzeOutput.py Project: floft/cpu-scheduler

        def combPlot(x, y, data, hue=None, onlyAverage=False):
            # For Alexander, if she really only wanted to see one value for
            # each x value, averaging averages.
            #
            # Note: the reset_index() is required to make Seaborn be able to
            # plot the data for some reason.
            # http://stackoverflow.com/a/10374456
            if onlyAverage:
                if hue:
                    data = pd.DataFrame(data.groupby([x,hue]).mean().reset_index())
                else:
                    data = pd.DataFrame(data.groupby(x).mean().reset_index())

            #sns.violinplot(x=x, y=y, data=data, hue=hue, inner=None)
            #sns.swarmplot(x=x, y=y, data=data, hue=hue, color="w", alpha=.5)
            sns.swarmplot(x=x, y=y, hue=hue, data=data)

Example #14

0

Show file

File: plotter.py Project: ari99/wiki_stats

def make_plot_file(top, file_name, time_func, rank_name, rank_max):
    """
    Creates a scatter plot of 'pageviews' depending on time
    with a combination of 'pagename' and 'projectcode' as the keys to plot.

    :param top: Pandas dataframe with pages ranked within each occurance of the current timeframe.
    :param file_name: The name of the png plot file to create.
    :param time_func: The lambda to create the time data.
    :param rank_name: The column with the rank.
    :param rank_max: The highest (least 'pageviews') rank to include; exclusive.
    :return:
    """
    #Get only the rows with rank less than rank_max
    top = top[top[rank_name] < rank_max].copy(deep=True)
    #Create time column
    top['time'] = top.apply(time_func, axis=1)
    #Replace blank page names with "main page"
    top['pagename'] = top['pagename'].fillna("main page")
    #Create key for the legend and plotting.
    top['pagename_projectcode'] = top['pagename']+"_"+top['projectcode']
    #Create and configure seaborn plot.
    g = sns.swarmplot(x="time", y="pageviews", hue="pagename_projectcode", data=top);
    lgd = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.xticks(rotation=45)
    fig = plt.figure(1)
    #Create file
    fig.savefig(file_name, bbox_extra_artists=(lgd,), bbox_inches='tight')
    #Clear plot data
    plt.clf()

Example #15

0

Show file

File: plotting.py Project: agartland/utils

def swarmBox(data, x, y, hue, palette=None, order=None, hue_order=None, connect=False):
    """Depends on plot order of the swarm plot which does not seem dependable at the moment.
    Better idea would be to adopt code from the actual swarm function for this, adding boxplots separately"""
    if palette is None:
        palette = sns.color_palette('Set2',  n_colors=data[hue].unique().shape[0])
    if hue_order is None:
        hue_order = sorted(data[hue].unique())
    if order is None:
        order = sorted(data[x].unqiue())
        
    params = dict(data=data, x=x, y=y, hue=hue, palette=palette, order=order, hue_order=hue_order)
    sns.boxplot(**params, fliersize=0, linewidth=0.5)
    swarm = sns.swarmplot(**params, linewidth=0.5, edgecolor='black', dodge=True)
    if connect:
        zipper = [order] + [swarm.collections[i::len(hue_order)] for i in range(len(hue_order))]
        for z in zip(*zipper):
            curx = z[0]
            collections = z[1:]
            offsets = []
            for c,h in zip(collections, hue_order):
                ind = (data[x] == curx) & (data[hue] == h)
                sortii = np.argsort(np.argsort(data.loc[ind, y]))
                offsets.append(c.get_offsets()[sortii,:])

            for zoffsets in zip(*offsets):
                xvec = [o[0] for o in zoffsets]
                yvec = [o[1] for o in zoffsets]
                plt.plot(xvec, yvec, '-', color='gray', linewidth=0.5)
    plt.legend([plt.Circle(1, color=c) for c in palette], hue_order, title=hue)

Example #16

0

Show file

File: seaborn_analysis.py Project: johnstinson99/JiraGraphCreator

    def plot_hours_of_day(self, my_jira_df, my_filename_without_path, my_chart_title, my_output_path, my_relative_output_path, my_png_list):
        # g = sns.Grid(my_jira_df, vars=["DayDiff", "DateNum"])
        # g = sns.swarmplot(x="From", y="HoursOfDay", hue="To", data=my_jira_df)
        g = sns.swarmplot(x="Project", y="HoursOfDay", hue="StateChange", data=my_jira_df)

        g.set( yticks=list(range(8, 18, 1)))
        # sns.sinplot()
        self.save_file(my_filename_without_path, my_chart_title, my_output_path, my_relative_output_path,  my_png_list, g)

Example #17

0

Show file

File: grid_plot.py Project: egedinc/IntelAIWorkshop

def plot_acc_grid(models = ALL_MODELS, save_path='../resources/cached_model_grid_scores.csv'):
    grid = pd.read_csv(save_path)   
    grid = grid[grid['model_names'].isin(models)]

    f, (ax1, ax2) = plt.subplots(2, figsize=(12,12));
    grid_acc = grid[(grid['score'] == 'acc') 
                    & (grid['data_fold'] != 'overfit')]
    sns.swarmplot(data=grid_acc, 
                  y='variable', x='value', hue='model_names', ax=ax1);
    ax1.set(xlabel='scores', ylabel='');
    ax1.legend(bbox_to_anchor=(1.05, 1), loc='lower right', borderaxespad=0.);
    grid_overfit= grid[(grid['score'] == 'acc')
                       & (grid['data_fold'] == 'overfit')]
    sns.swarmplot(data=grid_overfit, 
                        y='variable', x='value', hue='model_names', ax=ax2);
    ax2.set(xlabel='scores', ylabel='');
    ax2.legend_.remove();
    plt.show();

Example #18

0

Show file

File: signals_visualize.py Project: olegs/washu

def mean_boxplots(df, title, ax):
    """Plot mean values for individual donors"""
    signal = df.mean(axis=1).to_frame('value')
    groups = [group(n) for n in signal.index]
    signal['group'] = [g.name for g in groups]
    all_groups = [g for g in reversed(sorted(set(groups)))]
    all_groups_names = [g.name for g in all_groups]
    sns.boxplot(x='group', y='value', data=signal, palette='Set3',
                linewidth=1.0, order=all_groups_names, ax=ax)
    sns.swarmplot(x='group', y='value', data=signal, color='.25', order=all_groups_names, ax=ax)

    for i, g in enumerate(all_groups):
        group_data = signal[signal['group'] == g.name]
        for j, label in enumerate(group_data.index):
            ax.annotate(donor(label),
                        xy=(i, group_data.iloc[j, :]['value']),
                        xytext=(5, 0),
                        color=g.color,
                        textcoords='offset points')
    ax.set_title(title)

Example #19

0

Show file

File: peak_metrics.py Project: olegs/washu

def frip_boxplot(age_labels, df, save_to):
    """
    Plots FRiP boxplot for passed in data frame donors:

    :param age_labels: Age labels for dots coloring
    :param df: Data frame with information about donors and their FRiP
    :param save_to: Object for plots saving
    """
    plt.figure()
    ax = plt.subplot()
    sns.boxplot(x="age", y="frip", data=df, palette="Set3", linewidth=1.0, order=age_labels, ax=ax)
    sns.swarmplot(x="age", y="frip", data=df, color=".25", order=age_labels, ax=ax)

    for i, age_label in enumerate(age_labels):
        age_data = df[df['age'] == age_label]
        for j, label in enumerate(age_data.index):
            ax.annotate(label, xy=(i, age_data.iloc[j, :]['frip']), xytext=(5, 0),
                        color="red" if age_label == "YDS" else "blue", textcoords='offset points')
    ax.set_title("Signal FRiP")
    save_plot(save_to)

Example #20

0

Show file

File: data-viz.py Project: georgetown-analytics/team-buzzfeed

def sns_viz(dataframe):
    """
    Takes a merged buzzfeed data frame and generates a seaborn time series vizualization
    """
    # load the data locally
    data = dataframe.groupby('title')
    # drop the columns we won't be using
    #data = data.drop(['u_name', 'last_upd', 'pub', 'pub_ts', 'lang', 'id', 'descr', 'cat_id', 'u_id', 'title', 'status', 'metav', 'comment_stat'], 1)
    # Plot the data
    sns_plotter = sns.swarmplot(x='pull_cc', y='max_impres', data=data.grou)

    print(type(sns_plotter))
    # show the data, and make it pretty
    sns_plotter.plt.show()

Example #21

0

Show file

File: plot_frequencies.py Project: hbc/tumor-only-prioritization

def plot_by_genes(df, plot_dir, af_key, config):
    """Plot allele frequencies of known cancer genes in primary, relapse status
    """
    out_file = os.path.join(plot_dir, "driver-af-comparison.pdf")
    df = df[pd.notnull(df["known"])]
    with PdfPages(out_file) as pdf_out:
        for cohort, cohort_df in df.groupby(["cohort"]):
            labels = sorted(list(cohort_df["status"].unique()))
            labels.reverse()
            cohort_df["status"].categories = labels
            g = sns.violinplot(x=af_key, y="status", data=cohort_df, inner=None)
            g.set_title("%s -- %s cancer genes" % (cohort, len(cohort_df["known"].unique())))
            g = _af_violinplot_shared(g)
            pdf_out.savefig(g.figure)
            plt.clf()
        for cohort, cohort_df in df.groupby(["cohort"]):
            for gene, gene_df in cohort_df.groupby(["known"]):
                if len(gene_df["status"].unique()) > 1 and len(gene_df) > 10:
                    gene_df["sample_label"] = gene_df.apply(
                        lambda row: "%s\n(%s variants)" %
                        (row["status"],
                         len(gene_df[gene_df["status"] == row["status"]])),
                        axis=1)
                    labels = list(gene_df["sample_label"].unique())
                    labels.reverse()
                    gene_df["sample_label"].categories = labels
                    g = sns.violinplot(x=af_key, y="sample_label", data=gene_df, inner=None, bw=.1)
                    sns.swarmplot(x=af_key, y="sample_label", data=gene_df, color="w", alpha=.5)
                    g.set_title("%s -- %s" % (cohort, gene))
                    g = _af_violinplot_shared(g)
                    pdf_out.savefig(g.figure)
                    if config and (cohort, gene) in config.driver_detailed:
                        out_dir = utils.safe_makedir(os.path.join(plot_dir, "detailed"))
                        out_file = os.path.join(out_dir, "driver-%s-%s.png" % (cohort, gene))
                        g.figure.savefig(out_file)
                    plt.clf()
    return out_file

Example #22

0

Show file

File: parA_inheritance.py Project: mountainpenguin/spot_analysis

def swarm(ax, data, xlabel1, xlabel2, ylabel):
    sns.swarmplot(data=data[["y1", "y2"]])
    # test that ymax is at least 20% higher than range
    pvalue = scipy.stats.ttest_ind(
        data["y1"].dropna(),
        data["y2"].dropna(),
        equal_var=False
    ).pvalue
    dataset = pd.concat([data["y1"], data["y2"]])
    curr_ylim = ax.get_ylim()
    curr_ymax = curr_ylim[1]
    y20 = dataset.max() + (dataset.max() - dataset.min()) * 0.2
    if curr_ymax < y20:
        ax.set_ylim([curr_ylim[0], y20])
    ymax = dataset.max() + (dataset.max() - dataset.min()) * 0.15
    ax.annotate(
        r"$p = {0:.5f}$".format(pvalue),
        xy=(0.5, ymax),
        horizontalalignment="center",
    )
#    ax.annotate(
#        "",
#        xy=(0, dataset.max()),
#        xytext=(1, dataset.max()),
#        arrowprops={
#            "connectionstyle": "bar",
#            "arrowstyle": "-",
#            "shrinkA": 20,
#            "shrinkB": 20,
#            "lw": 2
#        }
#    )
    ax.set_ylabel(ylabel)
    labels = [xlabel1, xlabel2]
    ax.set_xticklabels(labels)
    sns.despine()

Example #23

0

Show file

File: htbayes.py Project: ericmjl/protein-systematic-characterization

    def plot_posterior(self, rotate_xticks=False):
        """
        Plots a swarm plot of the data overlaid on top of the 95% HPD and IQR
        of the posterior distribution.
        """

        # Make summary plot #
        fig = plt.figure()
        ax = fig.add_subplot(111)

        # 1. Get the lower error and upper errorbars for 95% HPD and IQR.
        lower, lower_q, upper_q, upper = np.percentile(self.trace['fold'][500:],
                                                       [2.5, 25, 75, 97.5],
                                                       axis=0)
        summary_stats = pd.DataFrame()
        summary_stats['mean'] = self.trace['fold'].mean(axis=0)
        err_low = summary_stats['mean'] - lower
        err_high = upper - summary_stats['mean']
        iqr_low = summary_stats['mean'] - lower_q
        iqr_high = upper_q - summary_stats['mean']

        # 2. Plot the swarmplot and errorbars.
        summary_stats['mean'].plot(ls='', ax=ax,
                                   yerr=[err_low, err_high])
        summary_stats['mean'].plot(ls='', ax=ax,
                                   yerr=[iqr_low, iqr_high],
                                   elinewidth=4, color='red')
        sns.swarmplot(data=self.data, x=self.sample_col, y=self.output_col,
                      ax=ax, alpha=0.5)

        if rotate_xticks:
            logging.info('rotating xticks')
            plt.xticks(rotation='vertical')
        plt.ylabel(self.output_col)

        return fig, ax

Example #24

0

Show file

File: f_images.py Project: noveroa/DataBaseAWS

def getLine(data_frame, xaxis = 'confName', yaxis = 'counts'):
    plt.cla()
    fig = sns.swarmplot(data = data_frame, 
                        x=xaxis, 
                        y = yaxis,
                        palette = 'Blues')
   
    io = StringIO()
    plt.savefig(io, format='png')
    img = base64.encodestring(io.getvalue())
   
    io = StringIO()
    plt.savefig(io, format='png')
    data = base64.encodestring(io.getvalue())
    script = '''<img src="data:image/png;base64,{}";/>'''
    return script.format(data)

Example #25

0

Show file

File: asd.py Project: caporaso-lab/autism-fmt1

def plot_week_data(df, sample_type, metric, hue=None, hide_donor_baseline=False, hide_control_baseline=False, dm=None, show_legend=True, label_axes=True):
    df['week'] = pd.to_numeric(df['week'], errors='coerce')
    df[metric] = pd.to_numeric(df[metric], errors='coerce')
    asd_data = filter_sample_md(df, [('SampleType', sample_type), ('Group', 'autism')])
    asd_data = asd_data.sort_values(by='week')
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax = sns.boxplot(data=asd_data, x='week', y=metric, color='white', ax=ax)
    ax = sns.swarmplot(data=asd_data, x='week', y=metric, hue=hue, palette=palette, ax=ax)

    x0 = np.min(df['week']) - 1
    x1 = np.max(df['week']) + 1
    if not hide_control_baseline:
        control = control_metric(df, sample_type, metric=metric)
        control_y = np.median(control)
        ax.axhline(control_y,
                color=palette['neurotypical'], linestyle='--', label='neurotypical (median; n=%d)' % len(control))
    if not hide_donor_baseline:
        donor_initial = donor_metric(df, metric=metric, group='donor-initial', sample_type=sample_type)
        donor_initial_y = np.median(donor_initial)
        donor_maintenance = donor_metric(df, metric=metric, group='donor-maintenance', sample_type=sample_type)
        donor_maintenance_y = np.median(donor_maintenance)
        ax.axhline(donor_initial_y,
            color=palette['donor'], linestyle='--', label='donor (median; n=%d)' % len(donor_initial))
        ax.axhline(donor_maintenance_y,
            color=palette['donor'], linestyle=':', label='donor (median; n=%d)' % len(donor_maintenance))
    if dm is not None:
        inter_nt_dm = inter_neurotypical_distances(df, dm, sample_type=sample_type)
        inter_nt = inter_nt_dm.condensed_form()
        median_inter_nt = np.median(inter_nt)
        ax.axhline(median_inter_nt,
            color=palette['neurotypical'], linestyle='-.', label='between neurotypical distance (median; n=%d)' % len(inter_nt))
    if show_legend:
        ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    else:
        ax.legend().remove()
    if not label_axes:
        ax.set_xlabel('')
        ax.set_ylabel('')
    return fig

Example #26

0

Show file

grid = sns.jointplot(v1, v2, alpha=0.4)
grid.ax_joint.set_aspect('equal')

# In[22]:

sns.jointplot(v1, v2, kind='hex')

# In[23]:

# set the seaborn style for all the following plots
sns.set_style('white')

sns.jointplot(v1, v2, kind='kde', space=0)

# In[24]:

iris = pd.read_csv('iris.csv')
iris.head()

# In[25]:

sns.pairplot(iris, hue='Name', diag_kind='kde', size=2)

# In[26]:

plt.figure(figsize=(8, 6))
plt.subplot(121)
sns.swarmplot('Name', 'PetalLength', data=iris)
plt.subplot(122)
sns.violinplot('Name', 'PetalLength', data=iris)

Example #27

0

Show file

File: average_graph_metrics.py Project: NIDCD/lsnm_in_python

                 color='r',
                 yerr=std_RE[1],
                 error_kw=dict(ecolor='black', lw=2, capsize=5, capthick=2),
                 label='DMS-PF')
 
plt.xlabel('Graph metric')
plt.ylabel('Relative Error')
plt.xticks(index + bar_width, ('GE', 'LE', 'CC', 'CP', 'EC', 'BC', 'PC', 'M'))
plt.legend(loc='best')
plt.tight_layout()
fig.savefig('avg_RE.png')

##############################################################################
# try seaborn plots using above data
##############################################################################
fig, ax = plt.subplots()
df=pd.DataFrame(data = RE[1],                
                index = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6'],
                columns = ['GE', 'LE', 'CC', 'CP', 'EC', 'BC', 'PC', 'M']
)

ax=sns.violinplot(data=df, scale='count')
ax=sns.swarmplot(data=df, color='black')


##############################################################################
# Show the plots on the screen
##############################################################################
plt.show()

Example #28

0

Show file

File: swarmplot.py Project: rhjohnstone/random

        axes[(row,col)] = plt.subplot2grid(gridshape, (row, col),sharex=axes[(2,col)],sharey=axes[(row,0)])
        
    if (col>0):
        plt.setp(axes[(row,col)].get_yticklabels(), visible=False)
    if (0<=col<=1) and (row<4):
        plt.setp(axes[(row,col)].get_xticklabels(), visible=False)
    if (2<=col<=3) and (row<3):
        plt.setp(axes[(row,col)].get_xticklabels(), visible=False)
        
    axes[(row,col)].plot(npr.randn(col+5),color=colours[i])
   
   

df = pd.DataFrame(columns=["axis","value","other"])
df["axis"] = range(10)+range(10)
df["value"] = np.concatenate((npr.randn(10),npr.randn(10)+1))
df["other"] = ["P1"]*10+["P2"]*10

print df

sns.swarmplot(y=df["value"],ax=ax1,hue=df["axis"],palette=colours,x=df["other"])

ax1.set_ylabel("")
ax1.set_xlabel("")
ax1.legend_.remove()

#fig.tight_layout()#pad=0.4, w_pad=0.5, h_pad=1.0)
plt.show(block=True)

Example #29

0

Show file

File: srcElements_activity_swarmplot.py Project: brguez/TEIBA

for line in sortedSrcFile:
    colList = line.rstrip().split("\t")
    cytobandId = colList[0]
    
    sourceElementOrder.append(cytobandId)


##### Make plot
#################



fig = plt.figure(figsize=(25,5))
#ax = sns.swarmplot(x='cytobandId', y='nbTransductions', data=hotL1Df, size=3, edgecolor="gray", order=sourceElementOrder)
ax = sns.swarmplot(x='cytobandId', y='nbTransductions', data=df, size=3, edgecolor="gray", order=sourceElementOrder)

### Axis labels
ax.set_xlabel('')
ax.set_ylabel('# transductions')

# turn the axis labels
for item in ax.get_yticklabels():
    item.set_rotation(0)

for item in ax.get_xticklabels():
    item.set_rotation(90)

## Y ticks
ax.set(yticks=np.arange(0,91,10))

Example #30

0

Show file

File: plot_violin_v1.4_2BLs.py Project: SpencerEricksen/informers

fig, ax = plt.subplots()

#ax.set_title('PKIS1 LOTO (N=224 targets)') #, fontsize=10)

ax = sb.violinplot(data=df,
                   palette="Set3",
                   inner='box',
                   scale="count",
                   bw=0.1,
                   alpha=1.0,
                   cut=0,
                   linewidth=1.5,
                   orient=orientation,
                   zorder=0)
#sb.violinplot( data=df, palette="Set3",    inner='stick', scale="count", bw=0.1, alpha=0.5,  cut=0, linewidth=0.5, orient=orientation, ax=ax )
sb.swarmplot(data=df, color='k', size=2, alpha=0.25, ax=ax, orient=orientation)

if orientation == 'v':
    ax.set_xlabel('IBR model')  #, fontsize=8)
    ax.set_ylabel(metric)
    ax.tick_params(axis='x', labelsize=8)

elif orientation == 'h':
    ax.set_xlabel(metric)
    ax.set_ylabel('IBR model')
    ax.tick_params(axis='y', labelsize=8)

#ax.grid(False)

# width * height
fig.set_size_inches(8, 5)

Example #31

0

Show file

File: playoffs_visual_eda.py Project: pjordan34/GWU_Capstone_NHL_API

    plt.xticks(rotation=60)
    # show and save off the graph
    plt.tight_layout(pad=3.0, w_pad=3.0, h_pad=2.0)
    plt.savefig('data/' + str(season) + 'Playoffs_Save_perc_SOGA.png',
                bbox_inches='tight',
                pad_inches=0.5)
    #plt.show()
    plt.clf()
    # swarm plots on blocks for and against
    fig = plt.figure(figsize=(12, 12))
    fig.suptitle('Blocks Against vs Blocks for per Game by Division' + '' +
                 str(season) + '' + 'Playoffs',
                 fontsize=14)

    plt.subplot(4, 2, 1)
    sns.swarmplot(x='Ev_Team', y='Blocks_A', data=metroDf)
    # Label the axes
    plt.xlabel('Metro Divison')
    plt.ylabel('Blocks Against')
    plt.xticks(rotation=60)

    plt.subplot(4, 2, 2)
    sns.swarmplot(x='Ev_Team', y='Blocks_for', data=metroDf)
    plt.xlabel('Atlantic Divison')
    plt.ylabel('Blocks for')
    plt.xticks(rotation=60)

    plt.subplot(4, 2, 3)
    sns.swarmplot(x='Ev_Team', y='Blocks_A', data=atlanticDf)
    plt.xlabel('Atlantic Divison')
    plt.ylabel('Blocks Against')

Example #32

0

Show file

sns.pointplot(data=df, x="연령대코드(5세단위)", y="신장(5Cm 단위)", hue="음주여부", ci="sd")
sns.pointplot(data=df, x="연령대코드(5세단위)", y="혈색소", ci=None)

■■■■■ boxplot 그래프
sns.boxplot(data=df, x="신장(5Cm단위)", y="체중(5Kg 단위)")
sns.boxplot(data=df, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="성별코드")
sns.boxplot(data=df, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부")

■■■■■ violinplot 그래프
sns.violinplot(data=df, x="신장(5Cm단위)", y="체중(5Kg 단위)")
sns.violinplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부")
sns.violinplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부", split=True)
sns.violinplot(data=df_sample, x="연령대코드(5세단위)", y="혈색소", hue="음주여부", split=True)

■■■■■ warm plot 그래프
sns.swarmplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부")
sns.swarmplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부")
sns.violinplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)")
sns.swarmplot(data=df_sample, x="연령대코드(5세단위)", y="혈색소", hue="음주여부")

■■■■■ Implot 그래프  # 회귀선을 볼수 있다  #col 구분하여 표를 나눠서 그릴수 있다
sns.lmplot(data=df_sample, x="연령대코드(5세단위)", y="혈색소", hue="음주여부")
sns.lmplot(data=df_sample, x="연령대코드(5세단위)", y="혈색소", hue="음주여부", col="성별코드")

★★★★★★★★★수치형 데이터★★★★★★★★★★
■■■■■ scatterplot 그래프 X,Y 수치형 데이터
sns.scatterplot(data=df, x="(혈청지오티)AST", y="(혈청지오티)ALT")
sns.scatterplot(data=df_sample, x="(혈청지오티)AST", y="(혈청지오티)ALT", hue="음주여부")
sns.scatterplot(data=df_sample, x="(혈청지오티)AST", y="(혈청지오티)ALT", hue="허리둘레")
sns.scatterplot(data=df_sample, x="(혈청지오티)AST", y="(혈청지오티)ALT", hue="음주여부", size="체중(5Kg 단위)") # Size를 구분하여 작성가능

Example #33

0

Show file

# importing packages
import seaborn as sns
import matplotlib.pyplot as plt

# loading dataset
data = sns.load_dataset("tips")

# plot the swarmplot
# size set to 5
sns.swarmplot(x="day", y="total_bill", data=data, size=5)
plt.show()

Example #34

0

Show file

        'pct_assigned_GRCh38', 'pct_remain_after_dedupe_1',
        'pct_remain_after_dedupe_2'
    ]

    meta_all = []
    for k, v in meta.items():
        this = v[cols_to_keep]
        this.insert(1, 'batch', k)
        meta_all.append(this)
    meta_all = pd.concat(meta_all, axis=0)
    meta_all.insert(5, 'assigned_GRCh38',
                    meta_all.read_count * meta_all.pct_assigned_GRCh38 / 100.)
    meta_all.to_excel(os.path.join(outdir, "metadata_all.xlsx"))

    # compare raw read counts
    ax = sns.swarmplot(data=meta_all, x='batch', y='read_count')
    ax.figure.savefig(os.path.join(outdir, 'raw_read_counts.png'), dpi=200)
    ax.cla()

    # mapped read counts
    ax = sns.swarmplot(data=meta_all, x='batch', y='uniquely_mapped_GRCh38')
    ax.figure.savefig(os.path.join(outdir, 'uniquely_mapped_read_counts.png'),
                      dpi=200)
    ax.cla()

    # assigned read counts
    ax = sns.swarmplot(data=meta_all, x='batch', y='assigned_GRCh38')
    ax.figure.savefig(os.path.join(outdir, 'assigned_read_counts.png'),
                      dpi=200)
    ax.cla()

Example #35

0

Show file

File: horizontal_boxplot.py Project: zar92/seaborn

import matplotlib.pyplot as plt

sns.set(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(7, 6))
ax.set_xscale("log")

# Load the example planets dataset
planets = sns.load_dataset("planets")

# Plot the orbital period with horizontal boxes
sns.boxplot(x="distance",
            y="method",
            data=planets,
            whis=[0, 100],
            palette="vlag")

# Add in points to show each observation
sns.swarmplot(x="distance",
              y="method",
              data=planets,
              size=2,
              color=".3",
              linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
sns.despine(trim=True, left=True)

Example #36

0

Show file

File: 2019_KBO_best_player.py Project: seunnnc1129/python_ml_data_analysis

matplotlib.rcParams['axes.unicode_minus'] = False

# In[47]:

# 팀별 선수 출루율 분포 boxplot이용하여 알아보자

import seaborn as sns

sns.boxplot(data=player_stat, x='팀', y='출루율')

# In[48]:

# 팀별 선수 출루율 swarmplot과 boxplot같이 사용하여 살펴보기

sns.boxplot(data=player_stat, x='팀', y='출루율')
sns.swarmplot(data=player_stat, x='팀', y='출루율')

# In[59]:

# swarmplot과 boxplot같이 사용하면 색상이 겺쳐 구분하기 어려움
# 이럴경우 boxplot 색상 제거하고 간단하게 표시하면 깔끔하게 표현가능
# showcaps = False --> 박스 상/하단 가로라인 보이지 않게 하기
# whiskerprops{ 'linewidth' : 0}  --> 박스 상/하단 세로라인 보이지 않게 하기
# showfliers = False  --> 박스 범위 벗어난 아웃라이어 표시하지 않기
# boxprops = { 'facecolor' : 'None' }  --> 박스 색상 지우기

sns.boxplot(data=player_stat,
            x='팀',
            y='출루율',
            showcaps=False,
            whiskerprops={'linewidth': 0},

Example #37

0

Show file

File: pdm04_st_mid_exam.py Project: kimchaeyoung-student/pdm04

def main():
    # Sidebar
    st.sidebar.header("About -")
    st.sidebar.info("pdm04_st_Mid_exam")

    # Title
    st.title("Mid exam - pdm04, 김채영")
    st.header("- EDA of Pima diabetes data -")

    st.set_option('deprecation.showPyplotGlobalUse', False)

    # Get the data from github
    df = pd.read_csv("https://github.com/Redwoods/Py/raw/master/pdm2020/my-note/py-pandas/data/diabetes.csv")

    st.dataframe(df)

    # Return dataframe
    if st.checkbox("Show Data description"):
        st.dataframe(df.describe())
        # shape
        st.subheader("shape")
        df.shape

        #
        st.subheader("Check & cleaning data")
        df.isnull().values.any(), df.isna().sum()

        vars = df.columns
        st.write(vars)
        df = df[vars].dropna()
        df.shape

    if st.checkbox("Skew of attribute distributions"):
        skew = df.skew()
        st.write(skew)
        st.markdown('- 데이터 왜곡도')

    st.markdown("* * *")

    #
    st.header("- Visualizing data -")

    #
    st.subheader("Check the balance of classes in the data through plot")
    if st.checkbox("Outcome plot"):
        classes=df.Outcome
        sns.countplot(classes, label='count')
        st.pyplot()
        nDB,DB=classes.value_counts()
        st.write('False: non-diabetes',nDB)
        st.write('True: diabetes',DB)

        classes.value_counts(), type(classes)
        st.text("0 : 정상인, 1 : 당뇨병 환자")

    st.markdown("* * *")

    #
    st.subheader("Show the data as a chart")
    if st.checkbox("chart"):
        st.line_chart(df)
    
    st.markdown("* * *")

    #
    st.subheader("Univariate plots:")

    #
    if st.checkbox("Histograms"):
        st.subheader("Histograms")
        plt.rcParams['figure.figsize'] = [12, 10] # set the figure size 
        st.write(df.hist())
        st.pyplot()
    
    if st.checkbox("Density Plots"):
        st.subheader("Density Plots")
        st.write(df.plot(kind='density', subplots=True, layout=(3,3), sharex=False))
        st.pyplot()

    if st.checkbox("Box and Whisker Plots"):
        st.subheader("Box and Whisker Plots")
        st.write(df.plot(kind= 'box', subplots=True, layout=(3,3), sharex=False, sharey=False))
        st.pyplot()

    st.markdown("* * *")

    #
    st.subheader("Multivariate Plots:")

    #
    if st.checkbox("Correlation plot"):
        st.subheader("Correlation plot")
        df.corr()
        plt.figure(figsize=(12,10))
        sns.heatmap(df.corr(),annot=True, cmap= "RdYlGn", vmin=-1, vmax=1)
        st.pyplot()
    
    if st.checkbox("Compute correlation matrix"):
        st.subheader("Correlations of attributes in the data")
        correlations = df.corr(method = 'pearson')
        st.write(correlations)
        st.markdown('- 값이 1에 가까울수록 상관성이 있음!')
    
    if st.checkbox("result"):
        st.markdown('- 상관성 분석 결과\n'
            '   * Age vs. Pregnancies : 0.54\n'
            '   * Glucose vs. Outcome : 0.47\n'
            '   * SkinThickness vs. Insulin : 0.44\n'
            '   * SkinThickness vs. BMI : 0.39\n')
        st.markdown('- 상관성이 높은 변수들에 대한 좀 더 자세한 시각화가 필요하다.')

    st.markdown("* * *")

    #
    # Import required package 
    from pandas.plotting import scatter_matrix
    plt.rcParams['figure.figsize'] = [12, 12]

    if st.checkbox("Scatter Plot Matrix"):
        st.subheader("Scatter Plot Matrix")
        scatter_matrix(df)
        plt.show()
        st.pyplot()

    if st.checkbox("Scatter Plot_1"):
        st.subheader("Scatter Plot")
        sns.pairplot(df, hue="Outcome", markers=["o", "s"],palette="husl")
        st.pyplot()

    if st.checkbox("Scatter Plot_2"):
        st.subheader("0, 1을 noDM, DM으로 변경")
        df_temp = df.copy()
        df_temp['Outcome'] = df_temp['Outcome'].replace([0, 1],['noDM', 'DM'])
        sns.pairplot(df_temp, hue='Outcome', markers=["o", "s"],palette="husl")
        st.pyplot()

    st.markdown("* * *")

    #
    if st.checkbox("6 high correlation"):
        st.subheader("상관성이 높은 6개의 특성에 대한 산포도")
        high_corr = ['Pregnancies', 'Glucose', 'SkinThickness', 'Insulin', 'BMI','Age', 'Outcome']
        df_temp2 = df.copy()
        df_temp2['Outcome'] = df_temp2['Outcome'].replace([0, 1],['noDM', 'DM'])
        sns.pairplot(df_temp2[high_corr], hue='Outcome')
        st.pyplot()
    
    if st.checkbox("3 high correlation"):
        st.subheader("상관성이 높은 3개의 특성에 대한 산포도")
        highest_corr = ['Pregnancies', 'Age', 'Outcome']
        df_temp3 = df.copy()
        df_temp3['Outcome'] = df_temp3['Outcome'].replace([0, 1],['noDM', 'DM'])
        sns.pairplot(df_temp3[highest_corr], hue='Outcome')
        st.pyplot()

    st.markdown("* * *")

    #
    st.subheader("Advanced plots:")

    #
    if st.checkbox("Standarization of data and Violinplot"):
        st.markdown('- Standarization of data (Normalization)')
        df_n = (df - df.mean())/df.std()
        df_n

        y=df.Outcome
        df2=pd.concat([y, df_n.iloc[:,0:8]], axis=1)
        y.shape,df2.shape

        df3=pd.melt(df2,id_vars='Outcome', var_name='features',value_name='values')
        df3.head(), df3.shape
        
        st.subheader("Violinplot")
        plt.figure(figsize=(10,10))
        sns.violinplot(x='features', y='values', hue='Outcome', data=df3, split=True, inner='quart')
        plt.xticks(rotation=45)
        st.pyplot()

        #
        if st.checkbox("Customizing seaborn plot"):
            st.subheader("Customizing seaborn plot")
            sns.set(style='whitegrid', palette='muted')
            plt.figure(figsize=(10,10))
            sns.swarmplot(x='features', y='values', hue='Outcome', data=df3)
            plt.xticks(rotation=45)
            st.pyplot()

    st.markdown("* * *")

Example #38

0

Show file

File: 6_analysis_of_individual_variables.py Project: rpplayground/CS982

    "Life expectancy at birth, total (years)",
    "Development of Life Expectancy by Region\nby Year since 1960",
    y_scale="linear")

#%%
f, ax = plt.subplots(figsize=(10, 9))
sns.set_style("ticks", {
    'axes.grid': True,
    'grid.color': '.8',
    'grid.linestyle': '-'
})
plt.rcParams.update({'axes.titlesize' : 18, 'lines.linewidth' : 3,\
    'axes.labelsize' : 16, 'xtick.labelsize' : 16, 'ytick.labelsize' : 16})
plt.title("Development of Life Expectancy by Country\nby Decade since 1960",
          fontdict={"fontsize": 20})
sns.swarmplot(x="Decade", y="Life expectancy at birth, total (years)", hue="Region",\
    palette=region_palette, data=mean_by_country_and_decade)

#%% [markdown]
#### Conclusions - Development of Life Expectancy Over Time
#The following observations can be made from the data above:
# - The gap in life expectancy has closed (more than halved) between 6 of te 7 regions;
# - Meanwhile life expectancy for the Sub-Saharan Africa region has not improved at the same rate, mainly as a result of a plateau in the 1990s;
# - The net result is that the gap between those countries with the worst and best record for life expectancy has not closed appreciably since 1960.
#
#%% [markdown]
### Stage 6.5 - Analysing Gross Domestic Product (GDP)
#Using a number of techniques to get a feel for the life expectany data:
# - Looking at top 10 and bottom 10 countries in 2018;
# - Distribution of data by region in 2018;
# - Analysing how it has developed over time since 1960.
#

Example #39

0

Show file

def plot():
    ###################

    rcParams['pdf.fonttype'] = 42
    rcParams['ps.fonttype'] = 42
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Arial']

    ######################

    r = pickle.load(open('spade_stats.p', 'rb'))
    congru_stats = r['congru_stats']
    incongru_stats = r['incongru_stats']
    candidate_per_sess = r['candidate_per_sess']
    ######################

    congru_dens = []
    incongru_dens = []
    for idx, cs in enumerate(candidate_per_sess):
        if cs[1] > 1000:
            congru_dens.append(
                np.sum(np.array(congru_stats['sess_ids']) == cs[0]) / cs[1])
        if cs[2] > 1000:
            incongru_dens.append(
                np.sum(np.array(incongru_stats['sess_ids']) == cs[0]) / cs[2])

    congru_boot = boot.ci(congru_dens, np.mean, n_samples=1000)
    incongru_boot = boot.ci(incongru_dens, np.mean, n_samples=1000)

    congru_sem = np.std(congru_dens) / np.sqrt(len(congru_dens))
    incongru_sem = np.std(incongru_dens) / np.sqrt(len(incongru_dens))

    # p_value = permutation_test(congru_dens,incongru_dens,method='approximate',num_rounds=10000)

    (fh, ax) = plt.subplots(1, 1, figsize=(1.5 / 2.54, 4 / 2.54), dpi=300)
    mm = [np.mean(incongru_dens), np.mean(congru_dens)
          ] / np.mean(incongru_dens)
    ax.bar(1, mm[0], color='k', edgecolor='k')
    ax.bar(2, mm[1], color='w', edgecolor='k')
    ax.errorbar([1, 2],
                mm,
                np.hstack((incongru_sem, congru_sem)) / np.mean(incongru_dens),
                color='none',
                ecolor='grey',
                capsize=3)
    ax.set_yscale('log')
    ax.set_ylim([1e-1 * 2, 1e2])
    ax.set_ylabel('Norm. motif density')
    ax.set_xticks([1, 2])
    ax.set_xticklabels(['Incongru.', 'Congruent'], rotation=45, ha='right')
    # plt.close('all')
    fh.savefig('spade_4su_pattern_density.pdf', bbox_inches='tight')

    ### candidiates
    congru_candi = []
    incongru_candi = []
    for idx, cs in enumerate(candidate_per_sess):
        if cs[1] > 1000:
            congru_candi.append(cs[1])
        if cs[2] > 1000:
            incongru_candi.append(cs[2])

    rcParams['pdf.fonttype'] = 42
    rcParams['ps.fonttype'] = 42
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Arial']

    (fh, ax) = plt.subplots(1, 1, figsize=(1 / 2.54, 4 / 2.54), dpi=300)

    ax.scatter(np.random.random(len(incongru_candi)) * 0.2 + 0.9,
               incongru_candi,
               s=4,
               c='k',
               alpha=0.5,
               edgecolors='none')

    ax.scatter(np.random.random(len(congru_candi)) * 0.2 + 2.9,
               congru_candi,
               s=4,
               c='k',
               alpha=0.5,
               edgecolors='none')
    ax.errorbar(4,np.mean(congru_candi),\
                    np.std(congru_candi)/np.sqrt(len(congru_candi)),
                    fmt='ro',ecolor='r',elinewidth=0.5,capsize=2,ms=4,mfc='none')

    ax.errorbar(0,np.mean(incongru_candi),\
                    np.std(incongru_candi)/np.sqrt(len(incongru_candi)),
                    fmt='ro',ecolor='r',elinewidth=0.5,capsize=2,ms=4,mfc='none')

    ax.set_yscale('log')
    ax.set_xlim([-1, 5])
    ax.set_xticks([])
    ax.set_yticks([1000, 100000, 10000000])
    plt.show()

    fh.savefig('4su_candi_count.pdf', bbox_inches='tight')
    stats.ranksums(incongru_candi, congru_candi)

    # for patt in congru_stats['pertrial']:
    #     [np.mean(x) for x in patt]
    #     pass

    # breakpoint()
    ####################

    s1sigsel = np.array(congru_stats['perHz_pvalues'])[:, 1] < 0.05
    s2sigsel = np.array(congru_stats['perHz_pvalues'])[:, 2] < 0.05
    (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300)
    # error on yaxis
    ax.scatter(np.array(congru_stats['perHz_mm'])[np.logical_not(s1sigsel), 1],
               np.array(congru_stats['perHz_mm'])[np.logical_not(s1sigsel), 0],
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['perHz_mm'])[np.logical_not(s2sigsel), 3],
               np.array(congru_stats['perHz_mm'])[np.logical_not(s2sigsel), 2],
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['perHz_mm'])[s1sigsel, 1],
               np.array(congru_stats['perHz_mm'])[s1sigsel, 0],
               s=1,
               c='r',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['perHz_mm'])[s2sigsel, 3],
               np.array(congru_stats['perHz_mm'])[s2sigsel, 2],
               s=1,
               c='r',
               marker='.',
               alpha=0.4)
    ax.plot([0, 0.26], [0, 0.26], '--k')

    ax.set_yticks([0, 0.1, 0.2])
    ax.set_xticks([0, 0.1, 0.2])
    ax.set_xlabel('patterns / spike / s, error trial')
    ax.set_ylabel('patterns / spike / s, correct trial')
    ax.set_xlim((0, 0.26))
    ax.set_ylim((0, 0.26))
    fh.savefig('spade_4su_pattern_correct_error.pdf', bbox_inches='tight')
    #############################

    s1sigsel = np.array(congru_stats['motif_pvalues'])[:, 1] < 0.05
    s2sigsel = np.array(congru_stats['motif_pvalues'])[:, 2] < 0.05
    (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300)
    # error on yaxis
    ax.scatter(np.array(congru_stats['mm'])[np.logical_not(s1sigsel), 1] / 6,
               np.array(congru_stats['mm'])[np.logical_not(s1sigsel), 0] / 6,
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['mm'])[np.logical_not(s2sigsel), 3] / 6,
               np.array(congru_stats['mm'])[np.logical_not(s2sigsel), 2] / 6,
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['mm'])[s1sigsel, 1] / 6,
               np.array(congru_stats['mm'])[s1sigsel, 0] / 6,
               s=1,
               c='r',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['mm'])[s2sigsel, 3] / 6,
               np.array(congru_stats['mm'])[s2sigsel, 2] / 6,
               s=1,
               c='r',
               marker='.',
               alpha=0.4)
    ax.plot([0, 3.6], [0, 3.6], '--k')

    ax.set_yticks(np.arange(0, 4))
    ax.set_xticks(np.arange(0, 4))
    ax.set_xlabel('patterns / s, error trial')
    ax.set_ylabel('patterns / s, correct trial')
    ax.set_xlim((0, 3.6))
    ax.set_ylim((0, 3.6))
    fh.savefig('spade_4su_raw_pattern_correct_error.pdf', bbox_inches='tight')

    #############################
    s1sel = np.array(congru_stats['prefered_samp']) == 1
    s2sel = np.array(congru_stats['prefered_samp']) == 2
    sigsel = np.array(congru_stats['perHz_pvalues'])[:, 0] < 0.05
    (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300)
    #prefer 1, 1 on yaxis
    ax.scatter(np.array(congru_stats['perHz_mm'])[
        np.logical_and(s1sel, np.logical_not(sigsel)), 2],
               np.array(congru_stats['perHz_mm'])[
                   np.logical_and(s1sel, np.logical_not(sigsel)), 0],
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['perHz_mm'])[
        np.logical_and(s2sel, np.logical_not(sigsel)), 0],
               np.array(congru_stats['perHz_mm'])[
                   np.logical_and(s2sel, np.logical_not(sigsel)), 2],
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(
        np.array(congru_stats['perHz_mm'])[np.logical_and(s1sel, sigsel), 2],
        np.array(congru_stats['perHz_mm'])[np.logical_and(s1sel, sigsel), 0],
        s=1,
        c='r',
        marker='.',
        alpha=0.4)

    ax.scatter(
        np.array(congru_stats['perHz_mm'])[np.logical_and(s2sel, sigsel), 0],
        np.array(congru_stats['perHz_mm'])[np.logical_and(s2sel, sigsel), 2],
        s=1,
        c='r',
        marker='.',
        alpha=0.4)
    ax.plot([0, 0.26], [0, 0.26], '--k')

    ax.set_yticks([0, 0.1, 0.2])
    ax.set_xticks([0, 0.1, 0.2])
    ax.set_xlabel('patterns / spike / s, non-prefered')
    ax.set_ylabel('patterns / spike / s, prefered')
    ax.set_xlim((0, 0.26))
    ax.set_ylim((0, 0.26))
    fh.savefig('spade_4su_pattern_prefered_nonprefered.pdf',
               bbox_inches='tight')

    ###########selectivity
    s1sel = np.array(congru_stats['prefered_samp']) == 1
    s2sel = np.array(congru_stats['prefered_samp']) == 2
    sigsel = np.array(congru_stats['perHz_pvalues'])[:, 0] < 0.05

    prefered_raw = np.hstack(
        (np.array(congru_stats['mm'])[s1sel, 0], np.array(
            congru_stats['mm'])[s2sel, 2])) / 6
    nonpref_raw = np.hstack(
        (np.array(congru_stats['mm'])[s1sel, 2], np.array(
            congru_stats['mm'])[s2sel, 0])) / 6
    # mm=(np.mean(nonpref),np.mean(prefered))
    # pref_boot=boot.ci(prefered, np.mean,n_samples=1000)
    # npref_boot=boot.ci(nonpref, np.mean,n_samples=1000)
    selec_idx_raw = ((prefered_raw - nonpref_raw) /
                     (prefered_raw + nonpref_raw))

    prefered = np.hstack((np.array(congru_stats['perHz_mm'])[s1sel, 0],
                          np.array(congru_stats['perHz_mm'])[s2sel, 2])) / 6
    nonpref = np.hstack((np.array(congru_stats['perHz_mm'])[s1sel, 2],
                         np.array(congru_stats['perHz_mm'])[s2sel, 0])) / 6
    # perHz_mm=(np.mean(nonpref),np.mean(prefered))
    # pref_boot=boot.ci(prefered, np.mean,n_samples=1000)
    # npref_boot=boot.ci(nonpref, np.mean,n_samples=1000)
    selec_idx = ((prefered - nonpref) / (prefered + nonpref))

    swmy = np.hstack((selec_idx_raw, selec_idx))
    swmx = np.hstack(
        (np.ones_like(selec_idx_raw), np.ones_like(selec_idx) * 2))

    (fh, ax) = plt.subplots(1, 1, figsize=(15 / 2.54, 15 / 2.54), dpi=300)
    # ax.scatter(np.ones_like(selec_idx),selec_idx)
    ax = sns.swarmplot(x=swmx, y=swmy, size=1, ax=ax, color='silver')
    ax = sns.boxplot(x=swmx,
                     y=swmy,
                     showcaps=False,
                     boxprops={'facecolor': 'None'},
                     showfliers=False,
                     whiskerprops={'linewidth': 0},
                     ax=ax)

    ax.set_ylabel('Selectivity index')
    ax.set_xticks([0, 1])
    ax.set_xticklabels(['Patterns / s', 'Patterns / spike'],
                       rotation=45,
                       ha='right')

    fh.savefig('spade_4su_pattern_selectivity_index.pdf', bbox_inches='tight')

    #####################################

    s1sel = np.array(congru_stats['prefered_samp']) == 1
    s2sel = np.array(congru_stats['prefered_samp']) == 2
    sigsel = np.array(congru_stats['motif_pvalues'])[:, 0] < 0.05
    (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300)
    #prefer 1, 1 on yaxis
    ax.scatter(np.array(
        congru_stats['mm'])[np.logical_and(s1sel, np.logical_not(sigsel)), 2] /
               6,
               np.array(congru_stats['mm'])[
                   np.logical_and(s1sel, np.logical_not(sigsel)), 0] / 6,
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(
        congru_stats['mm'])[np.logical_and(s2sel, np.logical_not(sigsel)), 0] /
               6,
               np.array(congru_stats['mm'])[
                   np.logical_and(s2sel, np.logical_not(sigsel)), 2] / 6,
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(
        np.array(congru_stats['mm'])[np.logical_and(s1sel, sigsel), 2] / 6,
        np.array(congru_stats['mm'])[np.logical_and(s1sel, sigsel), 0] / 6,
        s=1,
        c='r',
        marker='.',
        alpha=0.4)

    ax.scatter(
        np.array(congru_stats['mm'])[np.logical_and(s2sel, sigsel), 0] / 6,
        np.array(congru_stats['mm'])[np.logical_and(s2sel, sigsel), 2] / 6,
        s=1,
        c='r',
        marker='.',
        alpha=0.4)
    ax.plot([0, 3.6], [0, 3.6], '--k')

    ax.set_yticks(np.arange(0, 4))
    ax.set_xticks(np.arange(0, 4))
    ax.set_xlabel('patterns / s, non-prefered')
    ax.set_ylabel('patterns / s, prefered')
    ax.set_xlim((0, 3.6))
    ax.set_ylim((0, 3.6))
    fh.savefig('spade_4su_raw_pattern_prefered_nonprefered.pdf',
               bbox_inches='tight')

    ### for comparison of r rather than fr

    s1sel = np.array(congru_stats['prefered_samp']) == 1
    s2sel = np.array(congru_stats['prefered_samp']) == 2
    sigsel = np.array(congru_stats['motif_pvalues'])[:, 0] < 0.05
    (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300)
    #prefer 1, 1 on yaxis
    ax.scatter(np.array(congru_stats['mm'])[s1sel, 2] / 6,
               np.array(congru_stats['mm'])[s1sel, 0] / 6,
               s=1,
               c='k',
               marker='.',
               alpha=1)

    ax.scatter(np.array(congru_stats['mm'])[s2sel, 0] / 6,
               np.array(congru_stats['mm'])[s2sel, 2] / 6,
               s=1,
               c='k',
               marker='.',
               alpha=1)

    ax.plot([0, 3.6], [0, 3.6], '--', color='silver')

    xx = np.hstack([
        np.array(congru_stats['mm'])[s1sel, 2],
        np.array(congru_stats['mm'])[s2sel, 0]
    ])
    yy = np.hstack([
        np.array(congru_stats['mm'])[s1sel, 0],
        np.array(congru_stats['mm'])[s2sel, 2]
    ])
    print(signedstat=stats.wilcoxon(xx, yy))

    # (slope, intercept,rvalue,pvalue,stderr)=stats.linregress(xx,yy)
    # ax.plot([0,3.6],[intercept/6,slope*3.6+intercept/6],'--r')

    ax.set_yticks(np.arange(0, 4))
    ax.set_xticks(np.arange(0, 4))
    ax.set_xlabel('patterns / s, non-prefered')
    ax.set_ylabel('patterns / s, prefered')
    ax.set_xlim((0, 3.6))
    ax.set_ylim((0, 3.6))
    fh.savefig('spade_4su_raw_pattern_prefered_nonprefered.pdf',
               bbox_inches='tight')

Example #40

0

Show file

pupil_z = pup.ztransform_pupil_size(pupil_filt)
pup_dat = np.hstack((np.mean(pupil_filt, axis=1), pupil_z))
label = ['pupil'] * len(pupil_z) + ['pupil_z'] * len(pupil_z)
label2 = [0] * len(pupil_z) + [1] * len(pupil_z)
df = pd.DataFrame({
    'pupil': pup_dat,
    'type': label,
    'label': label2,
    'correct': np.hstack((performance, performance))
})

sns.set_context('talk')
fig, ax = plt.subplots(1, 2, figsize=(15, 8))
sns.swarmplot(y='pupil',
              x='type',
              hue='correct',
              data=df[df['type'] == 'pupil'],
              ax=ax[0],
              alpha=0.7)
sns.boxplot(y='pupil',
            x='type',
            data=df[df['type'] == 'pupil'],
            ax=ax[0],
            showfliers=False,
            color='gray',
            whis=[20, 80])
sns.swarmplot(y='pupil',
              x='type',
              hue='correct',
              data=df[df['type'] == 'pupil_z'],
              ax=ax[1],
              alpha=0.7)

Example #41

0

Show file

if flip:
    input_df['biotype'] = np.abs(input_df['biotype']-1) # works because we only ever have 2 biotypes

for col in cols:
    db[col] = zscore_by_group(input_df[col], labels, healthy_group)

db = pd.melt(db, id_vars=['id', 'biotype', 'diagnosis'], value_vars=cols)

# show diagnostic distributions for each biotype seperarely
sns.set_style('white')
fig, (ax1, ax2) = plt.subplots(figsize=(10, 7), nrows=2, sharex=True)
plt.subplots_adjust(left=0.125, bottom=0.15, right=0.9, top=0.85, wspace=0.25, hspace=0.25)
plt.suptitle('Diagnosis distribution per biotype')

sns.swarmplot(x="variable", y="value", hue="diagnosis", data=db.loc[db['biotype'] == 0], ax=ax1)
ax1.set_ylim([-4, 4])
ax1.set_title('Average-performing biotype')
ax1.set_xticklabels([], rotation=45, ha='right')
ax1.hlines(0,  ax1.xaxis.get_majorticklocs()[0],  ax1.xaxis.get_majorticklocs()[-1])

sns.swarmplot(x="variable", y="value", hue="diagnosis", data=db.loc[db['biotype'] == 1], ax=ax2)
ax2.set_ylim([-4, 4])
ax2.set_title('Poor-performing biotype')
ax2.set_xticklabels(names, rotation=45, ha='right')
ax2.hlines(0,  ax1.xaxis.get_majorticklocs()[0],  ax1.xaxis.get_majorticklocs()[-1])

sns.plt.savefig('biotype_yscores_per_diagnosis_and_biotype.pdf')
sns.plt.close()

Example #42

0

Show file

File: scatterplots.py Project: ilyakolb/protein-ML-benchmarking

    # box plots of rank differences (ML vs naive) of top hits
    fig = plt.figure()
    fig.suptitle(x_to_plot_gt + " and " + y_to_plot_gt)
    top_percents = [10, 20, 50, 100]  # percent above which to do cutoff
    for i, top_percent in enumerate(top_percents):

        ax = fig.add_subplot(2, 2, i + 1)
        top_ranknum = int(len(df_ranked) * (1 - top_percent / 100))

        df_ranked_top = df_ranked[df_ranked[x_to_plot_gt] > top_ranknum].copy()

        df_ranked_top['ML vs GT'] = np.abs(
            df_ranked_top[x_to_plot_gt] -
            df_ranked_top[x_to_plot_ML]) + np.abs(df_ranked_top[y_to_plot_gt] -
                                                  df_ranked_top[y_to_plot_ML])
        df_ranked_top['naive vs GT'] = np.abs(
            df_ranked_top[x_to_plot_gt] -
            df_ranked_top[x_to_plot_naive]) + np.abs(
                df_ranked_top[y_to_plot_gt] - df_ranked_top[y_to_plot_naive])
        df_comp = df_ranked_top.melt(value_vars=['ML vs GT', 'naive vs GT'],
                                     var_name='model',
                                     value_name='rank difference')
        ax = sns.swarmplot(x='model',
                           y='rank difference',
                           data=df_comp,
                           color=".25",
                           alpha=0.5)
        ax = sns.boxplot(x='model', y='rank difference', data=df_comp)
        ax.set_title("top {}%".format(top_percent))
        ax.set_xlabel('')
        plt.tight_layout()

Example #43

0

Show file

#Create a stripplot of the Award_Amount with the Model Selected on the y axis with jitter enabled.
# Create the stripplot
sns.stripplot(data=df,
              x='Award_Amount',
              y='Model Selected',
              jitter=True)

plt.show()



#Create a swarmplot() of the same data, but also include the hue by Region.
# Create and display a swarmplot with hue set to the Region
sns.swarmplot(data=df,
              x='Award_Amount',
              y='Model Selected',
              hue='Region')

plt.show()




#Create and display a boxplot of the data with Award_Amount on the x axis and Model Selected on the y axis.
# Create a boxplot
sns.boxplot(data=df,
            x='Award_Amount',
            y='Model Selected')

plt.show()
plt.clf()

Example #44

0

Show file

File: Program_2.py Project: AnkusManish/Machine-Learning

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 15 22:15:25 2019

@author: ankusmanish
"""

#Write a program to draw swarm plot of “total bill” against day for a dataset given in url

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sea

data = pd.read_csv('tips.csv')

x = data['day']
y = data['total_bill']

plt.figure(figsize=(8, 8))
sea.swarmplot(x, y)
plt.xlabel('Day', fontsize=20)
plt.ylabel('Total Bill', fontsize=20)
plt.show()

Example #45

0

Show file

File: AnovaOneWay.py Project: Mpretel/Python-san

    "https://reneshbedre.github.io/assets/posts/anova/onewayanova.txt",
    sep="\t")
# reshape the d dataframe suitable for statsmodels package
df_melt = pd.melt(df.reset_index(),
                  id_vars=['index'],
                  value_vars=['A', 'B', 'C', 'D'])
# replace column names
df_melt.columns = ['index', 'treatments', 'value']

# %%
# generate a boxplot to see the data distribution by treatments. Using boxplot, we can
# easily detect the differences between different treatments
import matplotlib.pyplot as plt
import seaborn as sns
ax = sns.boxplot(x='treatments', y='value', data=df_melt, color='#99c2a2')
ax = sns.swarmplot(x="treatments", y="value", data=df_melt, color='#7d0013')
plt.show()
# %%
import scipy.stats as stats
# stats f_oneway functions takes the groups as input and returns ANOVA F and p value
fvalue, pvalue = stats.f_oneway(df['A'], df['B'], df['C'], df['D'])
print(fvalue, pvalue)
# 17.492810457516338 2.639241146210922e-05
# %%
# get ANOVA table as R like output
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Ordinary Least Squares (OLS) model
model = ols('value ~ C(treatments)', data=df_melt).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

Example #46

0

Show file

# Number of Painting in each genre
plt.figure(figsize=(20, 10))
sns.boxenplot(x="nationality",
              y="paintings",
              color="y",
              scale="linear",
              data=df1)
plt.title('Number of Painting in each nation')
plt.xlabel('Age Group')
plt.ylabel('Number of Painting')
plt.xticks(rotation=60)
plt.show()

# Number of Painting by Genre
plt.figure(figsize=(20, 10))
sns.swarmplot(x=df1['genre'], y=df1['paintings'], color="black")
plt.xticks(rotation=60)
plt.title('Number of Painting by Genre')
plt.xlabel('Genre')
plt.ylabel('Number of Painting')
plt.show()

# Number of Painting by Nation
plt.figure(figsize=(20, 10))
sns.swarmplot(x=df1['nationality'], y=df1['paintings'], color="red")
plt.xticks(rotation=60)
plt.title('Number of Painting by Nation')
plt.xlabel('Nation')
plt.ylabel('Number of Painting')
plt.show()

Example #47

0

Show file

File: regular_vs_iono_treatments.py Project: ilyakolb/fastGCaMP_analysis

    ax1.legend(construct_legend)
    ax1.plot(t, 100*np.ones_like(t), 'k--')
    
    plt.ylim([80, 105])
    plt.xlim([-0.2, 2])
    ax1.set_xlabel('Time (s)')
    
    
    # percent change bar plots    
    ax2 = inset_axes(ax1, width="30%", height="40%", loc=4, borderpad=3)
    
    df_barplot = df_percents[df_percents['index']==construct]

    sns.swarmplot(x = 'exp', 
              y='mean percent', 
              color='black',
              data=df_barplot,
              order=colors.keys())
    
    sns.boxplot(x='exp', 
                y='mean percent', 
                data=df_barplot, 
                # palette=colors,
                color='white',
                whis=1.5,
                showfliers=False,
                dodge=False,
                hue='exp',
                palette=colors,
                order=colors.keys(),
                width=0.5)

Example #48

0

Show file

           value='100-percentile2',
           estimator=np.median)
sns.tsplot(dfd,
           'percentile1',
           'subj',
           condition='condition',
           value='100-percentile2',
           err_style="unit_traces",
           estimator=np.median)
plt.title('Rest before VS. Motor before')
plt.ylim(-1, 1.5)
plt.plot([0, 100], [100, 0], 'k--')
plt.show()

sns.boxplot(x='condition', y='auc', data=aucs)
sns.swarmplot(x='condition', y='auc', data=aucs, color='k', alpha=0.5)
print(aucs.loc[aucs['condition'] == 'exp'])
print(aucs.loc[aucs['condition'] == 'control'])
print(aucs.loc[aucs['condition'] == 'exp', 'auc'].as_matrix() -
      aucs.loc[aucs['condition'] == 'control', 'auc'].as_matrix())
from scipy.stats import ttest_ind, ttest_1samp, ttest_rel, wilcoxon, ranksums
print(
    ttest_ind(aucs.loc[aucs['condition'] == 'exp', 'auc'],
              aucs.loc[aucs['condition'] == 'control', 'auc']))
print(
    ttest_rel(aucs.loc[aucs['condition'] == 'exp', 'auc'],
              aucs.loc[aucs['condition'] == 'control', 'auc']))
print(
    wilcoxon(aucs.loc[aucs['condition'] == 'exp', 'auc'],
             aucs.loc[aucs['condition'] == 'control', 'auc']))
print(

Example #49

0

Show file

import seaborn as sns
import matplotlib.pyplot as plt

#load iris data
iris = sns.load_dataset("iris")

sns.swarmplot(x="species", y="petal_length", data=iris)

#show plot
plt.show()

Example #50

0

Show file

# coding=utf-8

# %matplotlib inline jupyter
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats, integrate
import pandas as pd

sns.set(style="whitegrid", color_codes=True)
np.random.seed(sum(map(ord, "categorical")))
titanic = sns.load_dataset("titanic")
tips = sns.load_dataset("tips")
iris = sns.load_dataset("iris")

sns.stripplot(x="day", y="total_bill", data=tips)
sns.stripplot(x="day", y="total_bill", data=tips, jitter=True)

sns.swarmplot(x="day", y="total_bill", data=tips)
sns.swarmplot(x="day", y="total_bill", hue="sex", data=tips)

# 盒图
sns.boxplot(x="day", y="total_bill", hue="time", data=tips)

# 小提琴图
sns.violinplot(x="day", y="total_bill", hue="time", data=tips)

sns.violinplot(x="total_bill", y="day", hue="time", data=tips, split=True)

Example #51

0

Show file

# In[ ]:

pivoted = pd.pivot_table(df_raw, values='sales', columns='Year', index='Week')
pivoted.plot(figsize=(12, 12))

# In[ ]:

pivoted = pd.pivot_table(df_raw, values='sales', columns='Month', index='Day')
pivoted.plot(figsize=(12, 12))

# In[ ]:

temp_1 = df_raw.groupby(['Year', 'Month',
                         'item'])['sales'].mean().reset_index()
plt.figure(figsize=(12, 8))
sns.swarmplot('item', 'sales', data=temp_1, hue='Month')
# Place legend to the right
plt.legend(bbox_to_anchor=(1, 1), loc=2)

# In[ ]:

#In case the above plot is clutterd(which it is), try this, (Will create a grid for Year vs Month)
#sns.factorplot('item', 'sales', data=temp_1, hue = 'Month', col='Year',row='Month', kind='swarm', size = 5);

# In[ ]:

temp_1 = df_raw.groupby(['Year', 'Month'])['sales'].mean().reset_index()
plt.figure(figsize=(12, 8))
sns.lmplot('Month', 'sales', data=temp_1, hue='Year', fit_reg=False)

# In[ ]:

Example #52

0

Show file

File: categorical_plots..py Project: ayoubBouziane/100_Days_of_ML_Code

# Box plot
sns.boxplot('day', 'total_bill', data=tips)
sns.boxplot('day', 'total_bill', data=tips, hue='smoker')

# violin plot
sns.violinplot('day', 'total_bill', data=tips)
sns.violinplot('day', 'total_bill', data=tips, hue='sex')
sns.violinplot('day', 'total_bill', data=tips, hue='sex', split=True)

# Strip plot
sns.stripplot('day', 'total_bill', data=tips)
sns.stripplot('day', 'total_bill', data=tips, jitter=True)
sns.stripplot('day', 'total_bill', data=tips, jitter=True, hue='sex')
sns.stripplot('day',
              'total_bill',
              data=tips,
              jitter=True,
              hue='sex',
              split=True)

# Swarm Plot
sns.swarmplot('day', 'total_bill', data=tips)

# Swarm and Violin Plot
sns.violinplot('day', 'total_bill', data=tips)
sns.swarmplot('day', 'total_bill', data=tips, color='black')

# Factor Plot
sns.factorplot('day', 'total_bill', data=tips, kind='bar')
sns.factorplot('day', 'total_bill', data=tips, kind='violin')

Example #53

0

Show file

File: stats_slopes_an.py Project: nikolaims/nfb

fig, axes = plt.subplots(2, 10)
stds = pd.DataFrame(columns=['std', 'group'])
for g, group in enumerate(['Real', 'Mock']):
    for s, subj in enumerate(df.loc[df.group == group, 'subj'].unique()):
        axes[0, s].set_title('S'+str(s))
        for d, day in enumerate(df.loc[(df.group == group) & (df.subj == subj), 'day'].unique()):
            stds.loc[len(stds)] = {'std': df.loc[(df.group == group) & (df.subj == subj) & (df.day == day), 'slope'].std(), 'group': group}
        axes[g, s].hist(df.loc[(df.group == group) & (df.subj == subj), 'slope'], np.linspace(-0.7, 0.7, 50), density=True)

axes[0, 0].set_ylabel('Real')
axes[1, 0].set_ylabel('Mock')

#sns.pairplot(df, 'subj', vars=['slope'])
plt.show()

sns.barplot(x='group', y='std', data=stds, estimator=np.median)
sns.swarmplot(x='group', y='std', data=stds, color='r')
plt.show()

sns.kdeplot(df.loc[(df.group == 'Real'), 'slope'])
sns.kdeplot(df.loc[(df.group == 'Mock'), 'slope'])
plt.show()

from scipy.stats import *
print(bartlett(df.loc[(df.group == 'Real'), 'slope'], df.loc[(df.group == 'Mock'), 'slope']))
print(bartlett(df.loc[(df.group == 'Real'), 'slope'], df.loc[(df.group == 'Real'), 'slope']))
print(levene(df.loc[(df.group == 'Real'), 'slope'], df.loc[(df.group == 'Mock'), 'slope']))
print(levene(df.loc[(df.group == 'Real'), 'slope'], df.loc[(df.group == 'Real'), 'slope']))
print(normaltest(df.loc[(df.group == 'Real'), 'slope']))
print(normaltest(df.loc[(df.group == 'Mock'), 'slope']))

Example #54

0

Show file

File: sb_01.py Project: sohan0310grd/Data_Visualization_Using_Python

titanic=sb.load_dataset("titanic")
titanic.to_csv("db_titanic.csv",index=False)
print(titanic)
sb.barplot(x="age",y="embark_town",orient='h',data=titanic)
mpl.show()

sb.barplot(x="embark_town",y="age",data=titanic)
mpl.show()

mpl.scatter("survived","age",data=titanic)
mpl.show()

sb.pointplot(x="sex",y="age",data=titanic)
mpl.show()

sb.swarmplot(x="pclass",y="age",data=titanic)
mpl.show()

iris=sb.load_dataset("iris")
sb.set_style("darkgrid")
sb.kdeplot(iris.loc[(iris['species']=='setosa'),'sepal_length'],color='b',shade=True,Label='setosa')
sb.kdeplot(iris.loc[(iris['species']=='virginica'),'sepal_length'],color='r',shade=True,Label='virginica')
mpl.show()

sb.countplot(x='class',hue='who',data=titanic)
mpl.show()


sb.countplot(x='sex',hue='who',data=titanic,palette="PuRd")
mpl.show()

Example #55

0

Show file

df = pd.read_csv(filepath, sep=";", decimal=',', index_col=0)

df = df.reset_index()
#df["Normalized intensity"] =df.groupby(["Experiment", "Genotype"])["Mean intensity"].apply(lambda x: x/x.mean())
means_stds = df.groupby(['Experiment'
                         ])['Mean intensity'].agg('mean').reset_index()

means_stds = means_stds.rename(columns={"Mean intensity": "mean_norm"})
df = df.merge(means_stds, on=(["Experiment"]))
df["Normalized intensity"] = df["Mean intensity"] / df["mean_norm"]

pal = sns.color_palette("viridis", 4)
g = sns.boxplot(y="Mean intensity", x="Genotype", data=df, order=["WT", "KO"])
g = sns.swarmplot(y="Mean intensity",
                  x="Genotype",
                  hue="Experiment",
                  data=df,
                  order=["WT", "KO"],
                  palette=pal)

plt.show()

cat1_wt = df[df['Genotype'] == 'WT']
cat1_KO = df[df['Genotype'] == 'KO']
print(ttest_ind(cat1_wt['Mean intensity'], cat1_KO['Mean intensity']))

df2 = df.groupby([df["Experiment"], df["Genotype"]]).mean()
df2.reset_index()
df3 = df2.reset_index()
print("Plotting means ")
pal = sns.color_palette("viridis", 4)
g = sns.pointplot(y="Mean intensity",

Example #56

0

Show file

File: _Clustering_DMR_clustermap.py Project: ryansohny/STAD

sc.tl.dpt(dmr_t, n_branchings=0, n_dcs=15)

sns.lmplot(data=dmr_t.obs, x='dpt_pseudotime', y='EpiBurden')

lin = tuple(sorted(list(dmr_t.obs['DMR_leiden'].values.unique())))
dmr_t.obs['DMR_leiden'] = dmr_t.obs['DMR_leiden'].cat.reorder_categories(list(lin), ordered=True)

color_dict = {
    "leiden_A": "#d62728",
    "leiden_B": "#ff7f0e",
    "leiden_C": "#1f77b4",
    "leiden_D": "#2ca02c"
} # equivalent to dict(zip(list(dmr_t.obs['DMR_leiden'].value_counts().index), dmr_t.uns['DMR_leiden_colors']))

sns.boxplot(data=dmr_t.obs, x='DMR_leiden', y='EpiBurden', palette=color_dict)
sns.swarmplot(data=dmr_t.obs, x='DMR_leiden', y='EpiBurden', color=".2")

rna = pd.read_table("/mnt/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/02.RNA-seq/STAD_SNUH_vst.txt", index_col=0, sep=' ')
rna = pd.read_table("/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/02.RNA-seq/GENCODE_V24/STAD_SNUH_vst.txt", index_col=0, sep=' ')
rna.columns = list(map(lambda x: "X" + x, rna.columns))

deg_tn_protein = pd.read_table("/mnt/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/02.RNA-seq/STAD_SNUH_Tumor_leiden_vst_DEG_Leiden_A_D_protein.txt", index_col="ID")
deg_tn_protein.columns = list(map(lambda x: "X" + x, deg_tn_protein.columns))



pro_met = pd.read_table("Promoter_up500down500_ALL.txt", index_col="ID")
pro_met = pd.read_table("/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/03.WGBS/Promoter_cCRE_ALL.txt", index_col="ID")
pro_met = pd.read_table("/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/03.WGBS/NEW/Promoter_cCRE_ALL.txt", index_col="ID")
pro_met.columns = list(map(lambda x: "X" + x, pro_met.columns))
pro_met_info = pd.DataFrame(list(zip(list(map(lambda x: x.split('/')[0], pro_met.index)), list(map(lambda x: x.split('/')[1], pro_met.index)), list(map(lambda x: x.split('/')[2], pro_met.index)), list(map(lambda x: x.split('/')[-1], pro_met.index)))), columns=['Loc', 'GeneID', 'EnsemblID', 'CpG'], index=pro_met.index)

Example #57

0

Show file

                              "shade": True,
                              "cumulative": cdf
                          },
                          hist=False,
                          color='cyan')
        plt.plot([me['Time']] * 2, [0, ax.get_ylim()[1]], color='black')
        plt.xlabel('Overall')
        ax.xaxis.set_major_formatter(formatter)

        plt.tight_layout()
        txt = 'cdf' if cdf else 'pdf'
        plt.savefig('stages_' + txt + '.svg')

    plt.figure(figsize=[10, 10])
    ax = sns.boxplot(x='Time', y='Div', data=df)
    sns.swarmplot(x='Time', y='Div', data=df, size=2, color=".3", linewidth=0)
    plt.title('Total Time by Division')
    plt.xlabel('Total Time')
    ax.xaxis.set_major_formatter(formatter)
    plt.savefig('time_by_div.svg')

    df['Sex'] = df['Div'].map(lambda x: 'F' in x or 'ATH' in x)

    plt.figure(figsize=[10, 6])
    ax = sns.distplot(df.loc[df['Sex'] == 1, 'Time'],
                      kde_kws={"shade": True},
                      hist=False)
    ax = sns.distplot(df.loc[df['Sex'] == 0, 'Time'],
                      kde_kws={"shade": True},
                      hist=False)
    plt.legend(['Women', 'Men'])

Example #58

0

Show file

sb.violinplot(x='day', y='total_bill', data=tips, hue='sex',
              split=True)  # Graph on each side of plot

# Strip Plots
sb.stripplot(x='day', y='total_bill', data=tips)
sb.stripplot(x='day', y='total_bill', data=tips, jitter=True)
sb.stripplot(x='day', y='total_bill', data=tips, jitter=True, hue='sex')
sb.stripplot(x='day',
             y='total_bill',
             data=tips,
             jitter=True,
             hue='sex',
             split=True)

# Swarm Plots
sb.swarmplot(x='day', y='total_bill', data=tips)

# Violin & Swarm Plots
sb.violinplot(x='day', y='total_bill', data=tips)
sb.swarmplot(x='day', y='total_bill', data=tips, color='black')

# Factor Plots (General-Purpose with Kind Specification)
sb.factorplot(x='day', y='total_bill', data=tips)
sb.factorplot(x='day', y='total_bill', data=tips, kind='box')
sb.factorplot(x='day', y='total_bill', data=tips, kind='bar')
sb.factorplot(x='day', y='total_bill', data=tips, kind='violin')
sb.factorplot(x='day', y='total_bill', data=tips, kind='strip')
sb.factorplot(x='day', y='total_bill', data=tips, kind='swarm')

# Matrix Plots ---------------------------------------------------------------

Example #59

0

Show file

File: API_weather.py Project: hdavis/Thinkful_Data_Science

    # print('Just finished collecting and storing data for ' + city_var)

    # end of city loop - all cities should have been processed

'''Calculate summary statistics over the 30-day period and store in a new
dataframe, "df_summary"; separate the latitude and longitude into two columns
converting them from strings to floats; then write the DataFrame to a .csv
file called "summary.csv".'''
df_summary = pd.DataFrame(columns=('city', 'long', 'lat', 'max_tmax',
                                   'min_tmax', 'range_tmax', 'mean_tmax',
                                   'sd_tmax'))

df_summary['max_tmax'] = df.groupby('city')['tmax'].max()
df_summary['min_tmax'] = df.groupby('city')['tmax'].min()
df_summary['mean_tmax'] = df.groupby('city')['tmax'].mean()
df_summary['sd_tmax'] = df.groupby('city')['tmax'].std()
df_summary['range_tmax'] = df_summary['max_tmax'] - df_summary['min_tmax']
df_summary['city'] = df_summary.index

for k, v in cities.iteritems():
    location = tuple(float(x) for x in v.split(','))
    df_summary.set_value(k, 'lat', location[0])
    df_summary.set_value(k, 'long', location[1])

df_summary.to_csv('summary.csv', index=False)

sns.set_style("whitegrid")
ax = sns.boxplot(x="city", y="tmax", data=df.sort_values(by='city'))
ax = sns.swarmplot(x="city", y="tmax", data=df.sort_values(by='city'),
                   color=".25")

Example #60

0

Show file

sns.set_style("ticks")
sns.set_context("talk")

df = pd.read_csv("VRTag_days.csv")

#df['daycond'] = df.day + df.cond.astype(str)

## Pointplot for simple, easy mean/sem visualization



df = df.groupby(['subject','day','condition']).mean().reset_index()

#sns.pointplot(x="day",y="dist", hue = "condition", ax=ax,palette = p1,data=df, dodge= True,ci=68)
sns.swarmplot(x="day",y="dist", hue = "condition", dodge= True, ax=ax,data=df, hue_order = ["video","vr"])


#ax.legend_.remove()
sns.despine(ax=ax)
<<<<<<< HEAD
ax.set(xlabel="Condition",ylabel="Day 1 minus Day 2 distance (pixels)")

ax.set(xlabel="Condition",ylabel="Distance from Correct (pixels)")

## box and swarm for specific data-point visualization

#sns.boxplot(x="day",y="dist",ax=ax,palette = p1,data=df, dodge= True)
#sns.swarmplot(x="day",y="dist",ax=ax,color = "black",data=df, dodge= True)