Example #1
0
def plot_distances(distance_data, filename, title, plot_variable='distance'):
    seeds = sorted(set(distance_data['region']))
    distance_data = distance_data.sort_values(['region', 'cutoff'])
    sns.set()
    num_plots = len(seeds)
    figure, axes_sets = plt.subplots(nrows=num_plots, ncols=1, squeeze=False)
    axes_sets = list(chain(*axes_sets))  # 2-dim array -> 1-dim list
    for ax, seed in zip(axes_sets, seeds):
        seed_data = distance_data[distance_data['region'] == seed]
        seed_data = seed_data.assign(
            count=lambda df: df['cutoff'].map(
                df.groupby(by=['cutoff'])[plot_variable].count()))
        seed_data['cutoff_n'] = seed_data.apply(format_cutoff, 'columns')

        sns.violinplot(x='cutoff_n',
                       y=plot_variable,
                       data=seed_data,
                       cut=0,
                       alpha=0.7,
                       ax=ax)
        plt.setp(ax.lines, zorder=100)
        plt.setp(ax.collections, zorder=100)
        sns.swarmplot(x='cutoff_n',
                      y=plot_variable,
                      data=seed_data,
                      color='k',
                      ax=ax)
        ax.set_ylabel(seed + '\n' + plot_variable)
    axes_sets[0].set_title(title)
    plt.savefig(filename)
def create_swarmplot(df, path, title, colormap, genes, species):
    """
    The function creates a swarmplot using seaborn.
    :param df: pandas.DataFrame object
    :param path: The CSV file path.
    :param title: Title for the plot.
    :param colormap: Colormap
    :param genes: Ordered list of genes.
    :param species: Ordered list of species.
    :return:
    """
    print("Creating swarmplot for {}".format(path))
    output_path = os.path.dirname(path)
    output = join_folder(output_path, "%s_swarmplot.png" % title)
    fig = plt.figure(figsize=(16, 10), dpi=180)  # new figure
    sns.swarmplot(x='Gene Name', y='Orthologues', hue='Species', order=genes, hue_order=species, data=df,
                  palette=colormap)  # draw swarmplot
    plt.ylabel("# Orthologues")
    plt.xlabel("Gene Name")
    plt.ylim(0, )
    plt.yticks(fontsize=10)
    plt.xticks(fontsize=10)
    plt.savefig(output)  # saving figure as output
    plt.close()
    return output
Example #3
0
def _plot_categorical_and_continuous(df, xlabel, ylabel, x_keys, y_keys, ax,
                                     cmap, n_cat=5, plottype="box"):
    """
    Plot a categorical variable and a continuous variable against each
    other. Types of plots include box plot, violin plot, strip plot and swarm
    plot.

    Parameters
    ----------
    df : pd.DataFrame
        A pandas DataFrame with the data

    xlabel : str
        The column name for the variable on the x-axis

    ylabel : str
        The column name for the variable on the y-axis

    ax : matplotlib.Axes object
        The matplotlib.Axes object to plot the bubble plot into

    cmap : matplotlib.cm.colormap
        A matplotlib colormap to use for shading the bubbles

    n_cat : int
        The number of categories; used for creating the colour map

    plottype : {"box" | "violin" | "strip" | "swarm"}
        The type of plot to produce; default is a box plot

    Returns
    -------
    ax : matplotlib.Axes object
        The same matplotlib.Axes object for further manipulation

    """
    if x_keys is xlabel:
        keys = y_keys
    elif y_keys is ylabel:
        keys = x_keys
    else:
        raise Exception("Something went terribly, horribly wrong!")

    current_palette = sns.color_palette(cmap, n_cat)
    if plottype == "box":
        sns.boxplot(x=xlabel, y=ylabel, data=df, order=keys,
                    palette=current_palette, ax=ax)
    elif plottype == "strip":
        sns.stripplot(x=xlabel, y=ylabel, data=df, order=keys,
                      palette=current_palette, ax=ax)
    elif plottype == "swarm":
        sns.swarmplot(x=xlabel, y=ylabel, data=df, order=keys,
                      palette=current_palette, ax=ax)
    elif plottype == "violin":
        sns.violinplot(x=xlabel, y=ylabel, data=df, order=keys,
                       palette=current_palette, ax=ax)
    else:
        raise Exception("plottype not recognized!")

    return ax
Example #4
0
    def plot_facet(self, data, color,
                   x=None, y=None, levels_x=None, levels_y=None,
                   palette=None, **kwargs):
        ax = kwargs.get("ax", plt.gca())
        corpus_id = "coquery_invisible_corpus_id"

        params = {"data": data, "palette": palette}
        self.horizontal = True
        if not x and not y:
            params.update({"x": corpus_id}),
            self._xlab = x
            self._ylab = ""
        elif x and not y:
            params.update({"x": x, "y": corpus_id, "order": levels_x})
            self.horizontal = False
            self._xlab = x
            self._ylab = "Corpus position"
        elif y and not x:
            params.update({"y": y, "x": corpus_id, "order": levels_y})
            self._xlab = "Corpus position"
            self._ylab = y
        elif x and y:
            params.update({"x": corpus_id, "y": y, "hue": x,
                           "order": levels_y, "hue_order": levels_x})
            self._xlab = "Corpus position"
            self._ylab = y

        sns.swarmplot(**params)
        return ax
def plotResults(tr, resultKey='resultInputPsf', doRates=False, title='', asHist=False, doPrint=True, actuallyPlot=True):
    import matplotlib.pyplot as plt
    import matplotlib
    matplotlib.style.use('ggplot')

    import seaborn as sns
    sns.set(style="whitegrid", palette="pastel", color_codes=True)

    methods = ['ALstack', 'ZOGY', 'SZOGY', 'ALstack_decorr']
    tr = [t for t in tr if t is not None and t[resultKey]]
    FN = pd.DataFrame({key: np.array([t[resultKey][key]['FN'] for t in tr]) for key in methods})
    FP = pd.DataFrame({key: np.array([t[resultKey][key]['FP'] for t in tr]) for key in methods})
    TP = pd.DataFrame({key: np.array([t[resultKey][key]['TP'] for t in tr]) for key in methods})
    title_suffix = 's'
    if doRates:
        FN /= (FN + TP)
        FP /= (FN + TP)
        TP /= (FN + TP)
        title_suffix = ' rate'
    if doPrint:
        print 'FN:', '\n', FN.mean()
        print 'FP:', '\n', FP.mean()
        print 'TP:', '\n', TP.mean()

    if not actuallyPlot:
        return TP, FP, FN

    matplotlib.rcParams['figure.figsize'] = (18.0, 6.0)
    fig, axes = plt.subplots(nrows=1, ncols=2)

    if not asHist:
        sns.violinplot(data=TP, cut=True, linewidth=0.3, bw=0.25, scale='width', alpha=0.5, ax=axes[0])
        if TP.shape[0] < 500:
            sns.swarmplot(data=TP, color='black', size=3, alpha=0.3, ax=axes[0])
        sns.boxplot(data=TP, saturation=0.5, boxprops={'facecolor': 'None'},
                    whiskerprops={'linewidth': 0}, showfliers=False, ax=axes[0])
        plt.setp(axes[0], alpha=0.3)
        axes[0].set_ylabel('True positive' + title_suffix)
        axes[0].set_title(title)
        sns.violinplot(data=FP, cut=True, linewidth=0.3, bw=0.5, scale='width', ax=axes[1])
        if FP.shape[0] < 500:
            sns.swarmplot(data=FP, color='black', size=3, alpha=0.3, ax=axes[1])
        sns.boxplot(data=FP, saturation=0.5, boxprops={'facecolor': 'None'},
                    whiskerprops={'linewidth': 0}, showfliers=False, ax=axes[1])
        plt.setp(axes[1], alpha=0.3)
        axes[1].set_ylabel('False positive' + title_suffix)
        axes[1].set_title(title)
    else:
        for t in TP:
            sns.distplot(TP[t], label=t, norm_hist=False, ax=axes[0])
        axes[0].set_xlabel('True positive' + title_suffix)
        axes[0].set_title(title)
        legend = axes[0].legend(loc='upper left', shadow=True)
        for t in FP:
            sns.distplot(FP[t], label=t, norm_hist=False, ax=axes[1])
        axes[1].set_xlabel('False positive' + title_suffix)
        axes[1].set_title(title)
        legend = axes[1].legend(loc='upper left', shadow=True)

    return TP, FP, FN
def fatigue_plots(df):
    """
    Makes plots showing game fatigue for SAS and IND

    Args:
        df (pd.DataFrame): dataframe of fatigue data
            Note: use extract_fatigue() to obtain this data

    Returns:
        None
        Saves plots to examples/
    """
    plt.figure()
    sns.swarmplot(x='variable', y='value',
                  data=df[df.Pos == 'Off'][df.Tm == 'IND'])
    plt.title('Indiana Pacers Fatigue')
    plt.xlabel('Quarter')
    plt.ylabel('Mean Offensive Velocity (ft/sec)')
    plt.ylim(0.015, 0.034)
    locs, labels = plt.yticks()
    plt.yticks(locs, map(lambda x: "%.1f" % x, locs*1000))
    plt.savefig('examples/INDfatige')

    plt.figure()
    sns.swarmplot(x='variable', y='value',
                  data=df[df.Pos == 'Off'][df.Tm == 'SAS'])
    plt.title('San Antonio Spurs Fatigue')
    plt.xlabel('Quarter')
    plt.ylabel('Mean Offensive Velocity (ft/sec)')
    locs, labels = plt.yticks()
    plt.yticks(locs, map(lambda x: "%.1f" % x, locs*1000))
    plt.savefig('examples/SASfatige')
Example #7
0
def plot_prediction2(transformed, predicted, y, label_names, threshold):
    plot([threshold, threshold], [-50, 50], 'grey', label='Classify boundary')
    t = pd.DataFrame(transformed, columns=['Dimention Reduction Result After LDA Transform'])
    p = pd.DataFrame([label_names[int(i[0])] for i in predicted], columns=['prediction'])
    lab = pd.DataFrame([label_names[int(i[0])] for i in y], columns=['label'])
    data_t = pd.concat([t, p, lab], axis=1)
    sns.swarmplot(y='label', x='Dimention Reduction Result After LDA Transform', hue='prediction', data=data_t)
Example #8
0
 def plot_facet(data, color):
     sns.swarmplot(
         x=data[self._groupby[-1]],
         y=data["coquery_invisible_corpus_id"],
         order=sorted(self._levels[-1]),
         palette=self.options["color_palette_values"],
         data=data)
Example #9
0
def ageing_wip_chart(cycle_data, start_column, end_column, done_column=None, now=None, title=None, ax=None):
    if len(cycle_data.index) == 0:
        raise UnchartableData("Cannot draw ageing WIP chart with no data")

    if ax is None:
        fig, ax = plt.subplots()
    
    if title is not None:
        ax.set_title(title)

    if now is None:
        now = pd.Timestamp.now()

    if done_column is None:
        done_column = cycle_data.columns[-1]

    today = now.date()

    # remove items that are done
    cycle_data = cycle_data[pd.isnull(cycle_data[done_column])]
    cycle_data = pd.concat((
        cycle_data[['key', 'summary']],
        cycle_data.ix[:, start_column:end_column]
    ), axis=1)

    def extract_status(row):
        last_valid = row.last_valid_index()
        if last_valid is None:
            return np.NaN
        return last_valid

    def extract_age(row):
        started = row[start_column]
        if pd.isnull(started):
            return np.NaN
        return (today - started.date()).days

    wip_data = cycle_data[['key', 'summary']].copy()
    wip_data['status'] = cycle_data.apply(extract_status, axis=1)
    wip_data['age'] = cycle_data.apply(extract_age, axis=1)

    wip_data.dropna(how='any', inplace=True)

    sns.swarmplot(x='status', y='age', order=cycle_data.columns[2:], data=wip_data, ax=ax)

    ax.set_xlabel("Status")
    ax.set_ylabel("Age (days)")

    ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=90)

    bottom, top = ax.get_ylim()
    ax.set_ylim(0, top)

    return ax
Example #10
0
def dataExpl(data):

    # Statistical description of the data
    data.describe()

    sns.barplot(x='Sex', y='Survived', data=data)
    age_df = data[['Age','Survived', 'Sex']].copy()
    age_df.loc[age_df.Age<15,'AgeGroup'] = 'Children'
    age_df.loc[age_df.Age>=15,'AgeGroup'] = 'Adult'
    sns.barplot(x='AgeGroup', y='Survived', hue='Sex', data=age_df)
    sns.swarmplot(x='Age',y='Sex',hue='Survived',data=data)
Example #11
0
def swarmplot(df):
    fig = plt.figure(figsize=[12,8])
    ax = fig.gca()
    df_melted = pd.melt(frame = df, 
                        id_vars = ["group"],
                        value_vars = ["height", "heartrate", "weight", "age"],
                        var_name="measurement")
    sns.swarmplot(x="measurement", y="value", hue="group", data=df_melted)
    ax.set_xticklabels(['height (cm)', 'heart rate (bpm)', 'weight (kg)', 'age (years)'])
    ax.set_xlabel('')
    fig.tight_layout()
    plt.savefig(os.path.join(FIG_PATH, 'swarmplot.png'), dpi=100)
def swarm2(ax, data, xvar1, xlabel1, xvar2, xlabel2, yvar, ylabel):
    sns.swarmplot(data=data[[xvar1, xvar2]])
    # plot mean value
    trans = matplotlib.transforms.blended_transform_factory(
        ax.transAxes, ax.transData
    )
    plt.plot(
        [0.1, 0.4],
        [data[xvar1].mean(), data[xvar1].mean()],
        "r-",
        transform=trans,
    )

    plt.plot(
        [0.6, 0.9],
        [data[xvar2].mean(), data[xvar2].mean()],
        "r-",
        transform=trans,
    )

    pvalue = scipy.stats.ttest_ind(
        data[xvar1].dropna(),
        data[xvar2].dropna(),
        equal_var=False
    ).pvalue
    dataset = pd.concat([data[xvar1], data[xvar2]])
    curr_ylim = ax.get_ylim()
    curr_ymax = curr_ylim[1]
    y20 = dataset.max() + (dataset.max() - dataset.min()) * 0.2
    if curr_ymax < y20:
        ax.set_ylim([curr_ylim[0], y20])
#    ymax = dataset.max() + (dataset.max() - dataset.min()) * 0.15
    ax.annotate(
        r"$p = {0:.5f}$".format(pvalue),
        xy=(0.5, 0.95),
        horizontalalignment="center",
        xycoords=ax.transAxes,
    )
    ax.annotate(
        r"$n = {0}$".format(len(data[xvar1].dropna())),
        xy=(0.5, 0.9),
        horizontalalignment="center",
        xycoords=ax.transAxes,
    )

    ax.set_ylabel(ylabel)
    labels = [xlabel1, xlabel2]
    ax.set_xticklabels(labels)
    sns.despine()
Example #13
0
        def combPlot(x, y, data, hue=None, onlyAverage=False):
            # For Alexander, if she really only wanted to see one value for
            # each x value, averaging averages.
            #
            # Note: the reset_index() is required to make Seaborn be able to
            # plot the data for some reason.
            # http://stackoverflow.com/a/10374456
            if onlyAverage:
                if hue:
                    data = pd.DataFrame(data.groupby([x,hue]).mean().reset_index())
                else:
                    data = pd.DataFrame(data.groupby(x).mean().reset_index())

            #sns.violinplot(x=x, y=y, data=data, hue=hue, inner=None)
            #sns.swarmplot(x=x, y=y, data=data, hue=hue, color="w", alpha=.5)
            sns.swarmplot(x=x, y=y, hue=hue, data=data)
Example #14
0
def make_plot_file(top, file_name, time_func, rank_name, rank_max):
    """
    Creates a scatter plot of 'pageviews' depending on time
    with a combination of 'pagename' and 'projectcode' as the keys to plot.

    :param top: Pandas dataframe with pages ranked within each occurance of the current timeframe.
    :param file_name: The name of the png plot file to create.
    :param time_func: The lambda to create the time data.
    :param rank_name: The column with the rank.
    :param rank_max: The highest (least 'pageviews') rank to include; exclusive.
    :return:
    """
    #Get only the rows with rank less than rank_max
    top = top[top[rank_name] < rank_max].copy(deep=True)
    #Create time column
    top['time'] = top.apply(time_func, axis=1)
    #Replace blank page names with "main page"
    top['pagename'] = top['pagename'].fillna("main page")
    #Create key for the legend and plotting.
    top['pagename_projectcode'] = top['pagename']+"_"+top['projectcode']
    #Create and configure seaborn plot.
    g = sns.swarmplot(x="time", y="pageviews", hue="pagename_projectcode", data=top);
    lgd = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.xticks(rotation=45)
    fig = plt.figure(1)
    #Create file
    fig.savefig(file_name, bbox_extra_artists=(lgd,), bbox_inches='tight')
    #Clear plot data
    plt.clf()
Example #15
0
def swarmBox(data, x, y, hue, palette=None, order=None, hue_order=None, connect=False):
    """Depends on plot order of the swarm plot which does not seem dependable at the moment.
    Better idea would be to adopt code from the actual swarm function for this, adding boxplots separately"""
    if palette is None:
        palette = sns.color_palette('Set2',  n_colors=data[hue].unique().shape[0])
    if hue_order is None:
        hue_order = sorted(data[hue].unique())
    if order is None:
        order = sorted(data[x].unqiue())
        
    params = dict(data=data, x=x, y=y, hue=hue, palette=palette, order=order, hue_order=hue_order)
    sns.boxplot(**params, fliersize=0, linewidth=0.5)
    swarm = sns.swarmplot(**params, linewidth=0.5, edgecolor='black', dodge=True)
    if connect:
        zipper = [order] + [swarm.collections[i::len(hue_order)] for i in range(len(hue_order))]
        for z in zip(*zipper):
            curx = z[0]
            collections = z[1:]
            offsets = []
            for c,h in zip(collections, hue_order):
                ind = (data[x] == curx) & (data[hue] == h)
                sortii = np.argsort(np.argsort(data.loc[ind, y]))
                offsets.append(c.get_offsets()[sortii,:])

            for zoffsets in zip(*offsets):
                xvec = [o[0] for o in zoffsets]
                yvec = [o[1] for o in zoffsets]
                plt.plot(xvec, yvec, '-', color='gray', linewidth=0.5)
    plt.legend([plt.Circle(1, color=c) for c in palette], hue_order, title=hue)
    def plot_hours_of_day(self, my_jira_df, my_filename_without_path, my_chart_title, my_output_path, my_relative_output_path, my_png_list):
        # g = sns.Grid(my_jira_df, vars=["DayDiff", "DateNum"])
        # g = sns.swarmplot(x="From", y="HoursOfDay", hue="To", data=my_jira_df)
        g = sns.swarmplot(x="Project", y="HoursOfDay", hue="StateChange", data=my_jira_df)

        g.set( yticks=list(range(8, 18, 1)))
        # sns.sinplot()
        self.save_file(my_filename_without_path, my_chart_title, my_output_path, my_relative_output_path,  my_png_list, g)
Example #17
0
def plot_acc_grid(models = ALL_MODELS, save_path='../resources/cached_model_grid_scores.csv'):
    grid = pd.read_csv(save_path)   
    grid = grid[grid['model_names'].isin(models)]

    f, (ax1, ax2) = plt.subplots(2, figsize=(12,12));
    grid_acc = grid[(grid['score'] == 'acc') 
                    & (grid['data_fold'] != 'overfit')]
    sns.swarmplot(data=grid_acc, 
                  y='variable', x='value', hue='model_names', ax=ax1);
    ax1.set(xlabel='scores', ylabel='');
    ax1.legend(bbox_to_anchor=(1.05, 1), loc='lower right', borderaxespad=0.);
    grid_overfit= grid[(grid['score'] == 'acc')
                       & (grid['data_fold'] == 'overfit')]
    sns.swarmplot(data=grid_overfit, 
                        y='variable', x='value', hue='model_names', ax=ax2);
    ax2.set(xlabel='scores', ylabel='');
    ax2.legend_.remove();
    plt.show();
Example #18
0
def mean_boxplots(df, title, ax):
    """Plot mean values for individual donors"""
    signal = df.mean(axis=1).to_frame('value')
    groups = [group(n) for n in signal.index]
    signal['group'] = [g.name for g in groups]
    all_groups = [g for g in reversed(sorted(set(groups)))]
    all_groups_names = [g.name for g in all_groups]
    sns.boxplot(x='group', y='value', data=signal, palette='Set3',
                linewidth=1.0, order=all_groups_names, ax=ax)
    sns.swarmplot(x='group', y='value', data=signal, color='.25', order=all_groups_names, ax=ax)

    for i, g in enumerate(all_groups):
        group_data = signal[signal['group'] == g.name]
        for j, label in enumerate(group_data.index):
            ax.annotate(donor(label),
                        xy=(i, group_data.iloc[j, :]['value']),
                        xytext=(5, 0),
                        color=g.color,
                        textcoords='offset points')
    ax.set_title(title)
Example #19
0
def frip_boxplot(age_labels, df, save_to):
    """
    Plots FRiP boxplot for passed in data frame donors:

    :param age_labels: Age labels for dots coloring
    :param df: Data frame with information about donors and their FRiP
    :param save_to: Object for plots saving
    """
    plt.figure()
    ax = plt.subplot()
    sns.boxplot(x="age", y="frip", data=df, palette="Set3", linewidth=1.0, order=age_labels, ax=ax)
    sns.swarmplot(x="age", y="frip", data=df, color=".25", order=age_labels, ax=ax)

    for i, age_label in enumerate(age_labels):
        age_data = df[df['age'] == age_label]
        for j, label in enumerate(age_data.index):
            ax.annotate(label, xy=(i, age_data.iloc[j, :]['frip']), xytext=(5, 0),
                        color="red" if age_label == "YDS" else "blue", textcoords='offset points')
    ax.set_title("Signal FRiP")
    save_plot(save_to)
def sns_viz(dataframe):
    """
    Takes a merged buzzfeed data frame and generates a seaborn time series vizualization
    """
    # load the data locally
    data = dataframe.groupby('title')
    # drop the columns we won't be using
    #data = data.drop(['u_name', 'last_upd', 'pub', 'pub_ts', 'lang', 'id', 'descr', 'cat_id', 'u_id', 'title', 'status', 'metav', 'comment_stat'], 1)
    # Plot the data
    sns_plotter = sns.swarmplot(x='pull_cc', y='max_impres', data=data.grou)

    print(type(sns_plotter))
    # show the data, and make it pretty
    sns_plotter.plt.show()
def plot_by_genes(df, plot_dir, af_key, config):
    """Plot allele frequencies of known cancer genes in primary, relapse status
    """
    out_file = os.path.join(plot_dir, "driver-af-comparison.pdf")
    df = df[pd.notnull(df["known"])]
    with PdfPages(out_file) as pdf_out:
        for cohort, cohort_df in df.groupby(["cohort"]):
            labels = sorted(list(cohort_df["status"].unique()))
            labels.reverse()
            cohort_df["status"].categories = labels
            g = sns.violinplot(x=af_key, y="status", data=cohort_df, inner=None)
            g.set_title("%s -- %s cancer genes" % (cohort, len(cohort_df["known"].unique())))
            g = _af_violinplot_shared(g)
            pdf_out.savefig(g.figure)
            plt.clf()
        for cohort, cohort_df in df.groupby(["cohort"]):
            for gene, gene_df in cohort_df.groupby(["known"]):
                if len(gene_df["status"].unique()) > 1 and len(gene_df) > 10:
                    gene_df["sample_label"] = gene_df.apply(
                        lambda row: "%s\n(%s variants)" %
                        (row["status"],
                         len(gene_df[gene_df["status"] == row["status"]])),
                        axis=1)
                    labels = list(gene_df["sample_label"].unique())
                    labels.reverse()
                    gene_df["sample_label"].categories = labels
                    g = sns.violinplot(x=af_key, y="sample_label", data=gene_df, inner=None, bw=.1)
                    sns.swarmplot(x=af_key, y="sample_label", data=gene_df, color="w", alpha=.5)
                    g.set_title("%s -- %s" % (cohort, gene))
                    g = _af_violinplot_shared(g)
                    pdf_out.savefig(g.figure)
                    if config and (cohort, gene) in config.driver_detailed:
                        out_dir = utils.safe_makedir(os.path.join(plot_dir, "detailed"))
                        out_file = os.path.join(out_dir, "driver-%s-%s.png" % (cohort, gene))
                        g.figure.savefig(out_file)
                    plt.clf()
    return out_file
def swarm(ax, data, xlabel1, xlabel2, ylabel):
    sns.swarmplot(data=data[["y1", "y2"]])
    # test that ymax is at least 20% higher than range
    pvalue = scipy.stats.ttest_ind(
        data["y1"].dropna(),
        data["y2"].dropna(),
        equal_var=False
    ).pvalue
    dataset = pd.concat([data["y1"], data["y2"]])
    curr_ylim = ax.get_ylim()
    curr_ymax = curr_ylim[1]
    y20 = dataset.max() + (dataset.max() - dataset.min()) * 0.2
    if curr_ymax < y20:
        ax.set_ylim([curr_ylim[0], y20])
    ymax = dataset.max() + (dataset.max() - dataset.min()) * 0.15
    ax.annotate(
        r"$p = {0:.5f}$".format(pvalue),
        xy=(0.5, ymax),
        horizontalalignment="center",
    )
#    ax.annotate(
#        "",
#        xy=(0, dataset.max()),
#        xytext=(1, dataset.max()),
#        arrowprops={
#            "connectionstyle": "bar",
#            "arrowstyle": "-",
#            "shrinkA": 20,
#            "shrinkB": 20,
#            "lw": 2
#        }
#    )
    ax.set_ylabel(ylabel)
    labels = [xlabel1, xlabel2]
    ax.set_xticklabels(labels)
    sns.despine()
    def plot_posterior(self, rotate_xticks=False):
        """
        Plots a swarm plot of the data overlaid on top of the 95% HPD and IQR
        of the posterior distribution.
        """

        # Make summary plot #
        fig = plt.figure()
        ax = fig.add_subplot(111)

        # 1. Get the lower error and upper errorbars for 95% HPD and IQR.
        lower, lower_q, upper_q, upper = np.percentile(self.trace['fold'][500:],
                                                       [2.5, 25, 75, 97.5],
                                                       axis=0)
        summary_stats = pd.DataFrame()
        summary_stats['mean'] = self.trace['fold'].mean(axis=0)
        err_low = summary_stats['mean'] - lower
        err_high = upper - summary_stats['mean']
        iqr_low = summary_stats['mean'] - lower_q
        iqr_high = upper_q - summary_stats['mean']

        # 2. Plot the swarmplot and errorbars.
        summary_stats['mean'].plot(ls='', ax=ax,
                                   yerr=[err_low, err_high])
        summary_stats['mean'].plot(ls='', ax=ax,
                                   yerr=[iqr_low, iqr_high],
                                   elinewidth=4, color='red')
        sns.swarmplot(data=self.data, x=self.sample_col, y=self.output_col,
                      ax=ax, alpha=0.5)

        if rotate_xticks:
            logging.info('rotating xticks')
            plt.xticks(rotation='vertical')
        plt.ylabel(self.output_col)

        return fig, ax
Example #24
0
def getLine(data_frame, xaxis = 'confName', yaxis = 'counts'):
    plt.cla()
    fig = sns.swarmplot(data = data_frame, 
                        x=xaxis, 
                        y = yaxis,
                        palette = 'Blues')
   
    io = StringIO()
    plt.savefig(io, format='png')
    img = base64.encodestring(io.getvalue())
   
    io = StringIO()
    plt.savefig(io, format='png')
    data = base64.encodestring(io.getvalue())
    script = '''<img src="data:image/png;base64,{}";/>'''
    return script.format(data)
Example #25
0
def plot_week_data(df, sample_type, metric, hue=None, hide_donor_baseline=False, hide_control_baseline=False, dm=None, show_legend=True, label_axes=True):
    df['week'] = pd.to_numeric(df['week'], errors='coerce')
    df[metric] = pd.to_numeric(df[metric], errors='coerce')
    asd_data = filter_sample_md(df, [('SampleType', sample_type), ('Group', 'autism')])
    asd_data = asd_data.sort_values(by='week')
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax = sns.boxplot(data=asd_data, x='week', y=metric, color='white', ax=ax)
    ax = sns.swarmplot(data=asd_data, x='week', y=metric, hue=hue, palette=palette, ax=ax)

    x0 = np.min(df['week']) - 1
    x1 = np.max(df['week']) + 1
    if not hide_control_baseline:
        control = control_metric(df, sample_type, metric=metric)
        control_y = np.median(control)
        ax.axhline(control_y,
                color=palette['neurotypical'], linestyle='--', label='neurotypical (median; n=%d)' % len(control))
    if not hide_donor_baseline:
        donor_initial = donor_metric(df, metric=metric, group='donor-initial', sample_type=sample_type)
        donor_initial_y = np.median(donor_initial)
        donor_maintenance = donor_metric(df, metric=metric, group='donor-maintenance', sample_type=sample_type)
        donor_maintenance_y = np.median(donor_maintenance)
        ax.axhline(donor_initial_y,
            color=palette['donor'], linestyle='--', label='donor (median; n=%d)' % len(donor_initial))
        ax.axhline(donor_maintenance_y,
            color=palette['donor'], linestyle=':', label='donor (median; n=%d)' % len(donor_maintenance))
    if dm is not None:
        inter_nt_dm = inter_neurotypical_distances(df, dm, sample_type=sample_type)
        inter_nt = inter_nt_dm.condensed_form()
        median_inter_nt = np.median(inter_nt)
        ax.axhline(median_inter_nt,
            color=palette['neurotypical'], linestyle='-.', label='between neurotypical distance (median; n=%d)' % len(inter_nt))
    if show_legend:
        ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    else:
        ax.legend().remove()
    if not label_axes:
        ax.set_xlabel('')
        ax.set_ylabel('')
    return fig
Example #26
0
grid = sns.jointplot(v1, v2, alpha=0.4)
grid.ax_joint.set_aspect('equal')

# In[22]:

sns.jointplot(v1, v2, kind='hex')

# In[23]:

# set the seaborn style for all the following plots
sns.set_style('white')

sns.jointplot(v1, v2, kind='kde', space=0)

# In[24]:

iris = pd.read_csv('iris.csv')
iris.head()

# In[25]:

sns.pairplot(iris, hue='Name', diag_kind='kde', size=2)

# In[26]:

plt.figure(figsize=(8, 6))
plt.subplot(121)
sns.swarmplot('Name', 'PetalLength', data=iris)
plt.subplot(122)
sns.violinplot('Name', 'PetalLength', data=iris)
                 color='r',
                 yerr=std_RE[1],
                 error_kw=dict(ecolor='black', lw=2, capsize=5, capthick=2),
                 label='DMS-PF')
 
plt.xlabel('Graph metric')
plt.ylabel('Relative Error')
plt.xticks(index + bar_width, ('GE', 'LE', 'CC', 'CP', 'EC', 'BC', 'PC', 'M'))
plt.legend(loc='best')
plt.tight_layout()
fig.savefig('avg_RE.png')

##############################################################################
# try seaborn plots using above data
##############################################################################
fig, ax = plt.subplots()
df=pd.DataFrame(data = RE[1],                
                index = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6'],
                columns = ['GE', 'LE', 'CC', 'CP', 'EC', 'BC', 'PC', 'M']
)

ax=sns.violinplot(data=df, scale='count')
ax=sns.swarmplot(data=df, color='black')


##############################################################################
# Show the plots on the screen
##############################################################################
plt.show()

Example #28
0
        axes[(row,col)] = plt.subplot2grid(gridshape, (row, col),sharex=axes[(2,col)],sharey=axes[(row,0)])
        
    if (col>0):
        plt.setp(axes[(row,col)].get_yticklabels(), visible=False)
    if (0<=col<=1) and (row<4):
        plt.setp(axes[(row,col)].get_xticklabels(), visible=False)
    if (2<=col<=3) and (row<3):
        plt.setp(axes[(row,col)].get_xticklabels(), visible=False)
        
    axes[(row,col)].plot(npr.randn(col+5),color=colours[i])
   
   

df = pd.DataFrame(columns=["axis","value","other"])
df["axis"] = range(10)+range(10)
df["value"] = np.concatenate((npr.randn(10),npr.randn(10)+1))
df["other"] = ["P1"]*10+["P2"]*10

print df

sns.swarmplot(y=df["value"],ax=ax1,hue=df["axis"],palette=colours,x=df["other"])

ax1.set_ylabel("")
ax1.set_xlabel("")
ax1.legend_.remove()

#fig.tight_layout()#pad=0.4, w_pad=0.5, h_pad=1.0)
plt.show(block=True)


for line in sortedSrcFile:
    colList = line.rstrip().split("\t")
    cytobandId = colList[0]
    
    sourceElementOrder.append(cytobandId)


##### Make plot
#################



fig = plt.figure(figsize=(25,5))
#ax = sns.swarmplot(x='cytobandId', y='nbTransductions', data=hotL1Df, size=3, edgecolor="gray", order=sourceElementOrder)
ax = sns.swarmplot(x='cytobandId', y='nbTransductions', data=df, size=3, edgecolor="gray", order=sourceElementOrder)

### Axis labels
ax.set_xlabel('')
ax.set_ylabel('# transductions')

# turn the axis labels
for item in ax.get_yticklabels():
    item.set_rotation(0)

for item in ax.get_xticklabels():
    item.set_rotation(90)

## Y ticks
ax.set(yticks=np.arange(0,91,10))
fig, ax = plt.subplots()

#ax.set_title('PKIS1 LOTO (N=224 targets)') #, fontsize=10)

ax = sb.violinplot(data=df,
                   palette="Set3",
                   inner='box',
                   scale="count",
                   bw=0.1,
                   alpha=1.0,
                   cut=0,
                   linewidth=1.5,
                   orient=orientation,
                   zorder=0)
#sb.violinplot( data=df, palette="Set3",    inner='stick', scale="count", bw=0.1, alpha=0.5,  cut=0, linewidth=0.5, orient=orientation, ax=ax )
sb.swarmplot(data=df, color='k', size=2, alpha=0.25, ax=ax, orient=orientation)

if orientation == 'v':
    ax.set_xlabel('IBR model')  #, fontsize=8)
    ax.set_ylabel(metric)
    ax.tick_params(axis='x', labelsize=8)

elif orientation == 'h':
    ax.set_xlabel(metric)
    ax.set_ylabel('IBR model')
    ax.tick_params(axis='y', labelsize=8)

#ax.grid(False)

# width * height
fig.set_size_inches(8, 5)
    plt.xticks(rotation=60)
    # show and save off the graph
    plt.tight_layout(pad=3.0, w_pad=3.0, h_pad=2.0)
    plt.savefig('data/' + str(season) + 'Playoffs_Save_perc_SOGA.png',
                bbox_inches='tight',
                pad_inches=0.5)
    #plt.show()
    plt.clf()
    # swarm plots on blocks for and against
    fig = plt.figure(figsize=(12, 12))
    fig.suptitle('Blocks Against vs Blocks for per Game by Division' + '' +
                 str(season) + '' + 'Playoffs',
                 fontsize=14)

    plt.subplot(4, 2, 1)
    sns.swarmplot(x='Ev_Team', y='Blocks_A', data=metroDf)
    # Label the axes
    plt.xlabel('Metro Divison')
    plt.ylabel('Blocks Against')
    plt.xticks(rotation=60)

    plt.subplot(4, 2, 2)
    sns.swarmplot(x='Ev_Team', y='Blocks_for', data=metroDf)
    plt.xlabel('Atlantic Divison')
    plt.ylabel('Blocks for')
    plt.xticks(rotation=60)

    plt.subplot(4, 2, 3)
    sns.swarmplot(x='Ev_Team', y='Blocks_A', data=atlanticDf)
    plt.xlabel('Atlantic Divison')
    plt.ylabel('Blocks Against')
Example #32
0
sns.pointplot(data=df, x="연령대코드(5세단위)", y="신장(5Cm 단위)", hue="음주여부", ci="sd")
sns.pointplot(data=df, x="연령대코드(5세단위)", y="혈색소", ci=None)

■■■■■ boxplot 그래프
sns.boxplot(data=df, x="신장(5Cm단위)", y="체중(5Kg 단위)")
sns.boxplot(data=df, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="성별코드")
sns.boxplot(data=df, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부")

■■■■■ violinplot 그래프
sns.violinplot(data=df, x="신장(5Cm단위)", y="체중(5Kg 단위)")
sns.violinplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부")
sns.violinplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부", split=True)
sns.violinplot(data=df_sample, x="연령대코드(5세단위)", y="혈색소", hue="음주여부", split=True)

■■■■■ warm plot 그래프
sns.swarmplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부")
sns.swarmplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부")
sns.violinplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)")
sns.swarmplot(data=df_sample, x="연령대코드(5세단위)", y="혈색소", hue="음주여부")

■■■■■ Implot 그래프  # 회귀선을 볼수 있다  #col 구분하여 표를 나눠서 그릴수 있다
sns.lmplot(data=df_sample, x="연령대코드(5세단위)", y="혈색소", hue="음주여부")
sns.lmplot(data=df_sample, x="연령대코드(5세단위)", y="혈색소", hue="음주여부", col="성별코드")

★★★★★★★★★수치형 데이터★★★★★★★★★★
■■■■■ scatterplot 그래프 X,Y 수치형 데이터
sns.scatterplot(data=df, x="(혈청지오티)AST", y="(혈청지오티)ALT")
sns.scatterplot(data=df_sample, x="(혈청지오티)AST", y="(혈청지오티)ALT", hue="음주여부")
sns.scatterplot(data=df_sample, x="(혈청지오티)AST", y="(혈청지오티)ALT", hue="허리둘레")
sns.scatterplot(data=df_sample, x="(혈청지오티)AST", y="(혈청지오티)ALT", hue="음주여부", size="체중(5Kg 단위)") # Size를 구분하여 작성가능
Example #33
0
# importing packages
import seaborn as sns
import matplotlib.pyplot as plt

# loading dataset
data = sns.load_dataset("tips")

# plot the swarmplot
# size set to 5
sns.swarmplot(x="day", y="total_bill", data=data, size=5)
plt.show()
Example #34
0
        'pct_assigned_GRCh38', 'pct_remain_after_dedupe_1',
        'pct_remain_after_dedupe_2'
    ]

    meta_all = []
    for k, v in meta.items():
        this = v[cols_to_keep]
        this.insert(1, 'batch', k)
        meta_all.append(this)
    meta_all = pd.concat(meta_all, axis=0)
    meta_all.insert(5, 'assigned_GRCh38',
                    meta_all.read_count * meta_all.pct_assigned_GRCh38 / 100.)
    meta_all.to_excel(os.path.join(outdir, "metadata_all.xlsx"))

    # compare raw read counts
    ax = sns.swarmplot(data=meta_all, x='batch', y='read_count')
    ax.figure.savefig(os.path.join(outdir, 'raw_read_counts.png'), dpi=200)
    ax.cla()

    # mapped read counts
    ax = sns.swarmplot(data=meta_all, x='batch', y='uniquely_mapped_GRCh38')
    ax.figure.savefig(os.path.join(outdir, 'uniquely_mapped_read_counts.png'),
                      dpi=200)
    ax.cla()

    # assigned read counts
    ax = sns.swarmplot(data=meta_all, x='batch', y='assigned_GRCh38')
    ax.figure.savefig(os.path.join(outdir, 'assigned_read_counts.png'),
                      dpi=200)
    ax.cla()
Example #35
0
import matplotlib.pyplot as plt

sns.set(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(7, 6))
ax.set_xscale("log")

# Load the example planets dataset
planets = sns.load_dataset("planets")

# Plot the orbital period with horizontal boxes
sns.boxplot(x="distance",
            y="method",
            data=planets,
            whis=[0, 100],
            palette="vlag")

# Add in points to show each observation
sns.swarmplot(x="distance",
              y="method",
              data=planets,
              size=2,
              color=".3",
              linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
sns.despine(trim=True, left=True)
matplotlib.rcParams['axes.unicode_minus'] = False

# In[47]:

# 팀별 선수 출루율 분포 boxplot이용하여 알아보자

import seaborn as sns

sns.boxplot(data=player_stat, x='팀', y='출루율')

# In[48]:

# 팀별 선수 출루율 swarmplot과 boxplot같이 사용하여 살펴보기

sns.boxplot(data=player_stat, x='팀', y='출루율')
sns.swarmplot(data=player_stat, x='팀', y='출루율')

# In[59]:

# swarmplot과 boxplot같이 사용하면 색상이 겺쳐 구분하기 어려움
# 이럴경우 boxplot 색상 제거하고 간단하게 표시하면 깔끔하게 표현가능
# showcaps = False --> 박스 상/하단 가로라인 보이지 않게 하기
# whiskerprops{ 'linewidth' : 0}  --> 박스 상/하단 세로라인 보이지 않게 하기
# showfliers = False  --> 박스 범위 벗어난 아웃라이어 표시하지 않기
# boxprops = { 'facecolor' : 'None' }  --> 박스 색상 지우기

sns.boxplot(data=player_stat,
            x='팀',
            y='출루율',
            showcaps=False,
            whiskerprops={'linewidth': 0},
def main():
    # Sidebar
    st.sidebar.header("About -")
    st.sidebar.info("pdm04_st_Mid_exam")

    # Title
    st.title("Mid exam - pdm04, 김채영")
    st.header("- EDA of Pima diabetes data -")

    st.set_option('deprecation.showPyplotGlobalUse', False)

    # Get the data from github
    df = pd.read_csv("https://github.com/Redwoods/Py/raw/master/pdm2020/my-note/py-pandas/data/diabetes.csv")

    st.dataframe(df)

    # Return dataframe
    if st.checkbox("Show Data description"):
        st.dataframe(df.describe())
        # shape
        st.subheader("shape")
        df.shape

        #
        st.subheader("Check & cleaning data")
        df.isnull().values.any(), df.isna().sum()

        vars = df.columns
        st.write(vars)
        df = df[vars].dropna()
        df.shape

    if st.checkbox("Skew of attribute distributions"):
        skew = df.skew()
        st.write(skew)
        st.markdown('- 데이터 왜곡도')

    st.markdown("* * *")

    #
    st.header("- Visualizing data -")

    #
    st.subheader("Check the balance of classes in the data through plot")
    if st.checkbox("Outcome plot"):
        classes=df.Outcome
        sns.countplot(classes, label='count')
        st.pyplot()
        nDB,DB=classes.value_counts()
        st.write('False: non-diabetes',nDB)
        st.write('True: diabetes',DB)

        classes.value_counts(), type(classes)
        st.text("0 : 정상인, 1 : 당뇨병 환자")

    st.markdown("* * *")

    #
    st.subheader("Show the data as a chart")
    if st.checkbox("chart"):
        st.line_chart(df)
    
    st.markdown("* * *")

    #
    st.subheader("Univariate plots:")

    #
    if st.checkbox("Histograms"):
        st.subheader("Histograms")
        plt.rcParams['figure.figsize'] = [12, 10] # set the figure size 
        st.write(df.hist())
        st.pyplot()
    
    if st.checkbox("Density Plots"):
        st.subheader("Density Plots")
        st.write(df.plot(kind='density', subplots=True, layout=(3,3), sharex=False))
        st.pyplot()

    if st.checkbox("Box and Whisker Plots"):
        st.subheader("Box and Whisker Plots")
        st.write(df.plot(kind= 'box', subplots=True, layout=(3,3), sharex=False, sharey=False))
        st.pyplot()

    st.markdown("* * *")

    #
    st.subheader("Multivariate Plots:")

    #
    if st.checkbox("Correlation plot"):
        st.subheader("Correlation plot")
        df.corr()
        plt.figure(figsize=(12,10))
        sns.heatmap(df.corr(),annot=True, cmap= "RdYlGn", vmin=-1, vmax=1)
        st.pyplot()
    
    if st.checkbox("Compute correlation matrix"):
        st.subheader("Correlations of attributes in the data")
        correlations = df.corr(method = 'pearson')
        st.write(correlations)
        st.markdown('- 값이 1에 가까울수록 상관성이 있음!')
    
    if st.checkbox("result"):
        st.markdown('- 상관성 분석 결과\n'
            '   * Age vs. Pregnancies : 0.54\n'
            '   * Glucose vs. Outcome : 0.47\n'
            '   * SkinThickness vs. Insulin : 0.44\n'
            '   * SkinThickness vs. BMI : 0.39\n')
        st.markdown('- 상관성이 높은 변수들에 대한 좀 더 자세한 시각화가 필요하다.')

    st.markdown("* * *")

    #
    # Import required package 
    from pandas.plotting import scatter_matrix
    plt.rcParams['figure.figsize'] = [12, 12]

    if st.checkbox("Scatter Plot Matrix"):
        st.subheader("Scatter Plot Matrix")
        scatter_matrix(df)
        plt.show()
        st.pyplot()

    if st.checkbox("Scatter Plot_1"):
        st.subheader("Scatter Plot")
        sns.pairplot(df, hue="Outcome", markers=["o", "s"],palette="husl")
        st.pyplot()

    if st.checkbox("Scatter Plot_2"):
        st.subheader("0, 1을 noDM, DM으로 변경")
        df_temp = df.copy()
        df_temp['Outcome'] = df_temp['Outcome'].replace([0, 1],['noDM', 'DM'])
        sns.pairplot(df_temp, hue='Outcome', markers=["o", "s"],palette="husl")
        st.pyplot()

    st.markdown("* * *")

    #
    if st.checkbox("6 high correlation"):
        st.subheader("상관성이 높은 6개의 특성에 대한 산포도")
        high_corr = ['Pregnancies', 'Glucose', 'SkinThickness', 'Insulin', 'BMI','Age', 'Outcome']
        df_temp2 = df.copy()
        df_temp2['Outcome'] = df_temp2['Outcome'].replace([0, 1],['noDM', 'DM'])
        sns.pairplot(df_temp2[high_corr], hue='Outcome')
        st.pyplot()
    
    if st.checkbox("3 high correlation"):
        st.subheader("상관성이 높은 3개의 특성에 대한 산포도")
        highest_corr = ['Pregnancies', 'Age', 'Outcome']
        df_temp3 = df.copy()
        df_temp3['Outcome'] = df_temp3['Outcome'].replace([0, 1],['noDM', 'DM'])
        sns.pairplot(df_temp3[highest_corr], hue='Outcome')
        st.pyplot()

    st.markdown("* * *")

    #
    st.subheader("Advanced plots:")

    #
    if st.checkbox("Standarization of data and Violinplot"):
        st.markdown('- Standarization of data (Normalization)')
        df_n = (df - df.mean())/df.std()
        df_n

        y=df.Outcome
        df2=pd.concat([y, df_n.iloc[:,0:8]], axis=1)
        y.shape,df2.shape

        df3=pd.melt(df2,id_vars='Outcome', var_name='features',value_name='values')
        df3.head(), df3.shape
        
        st.subheader("Violinplot")
        plt.figure(figsize=(10,10))
        sns.violinplot(x='features', y='values', hue='Outcome', data=df3, split=True, inner='quart')
        plt.xticks(rotation=45)
        st.pyplot()

        #
        if st.checkbox("Customizing seaborn plot"):
            st.subheader("Customizing seaborn plot")
            sns.set(style='whitegrid', palette='muted')
            plt.figure(figsize=(10,10))
            sns.swarmplot(x='features', y='values', hue='Outcome', data=df3)
            plt.xticks(rotation=45)
            st.pyplot()

    st.markdown("* * *")
    "Life expectancy at birth, total (years)",
    "Development of Life Expectancy by Region\nby Year since 1960",
    y_scale="linear")

#%%
f, ax = plt.subplots(figsize=(10, 9))
sns.set_style("ticks", {
    'axes.grid': True,
    'grid.color': '.8',
    'grid.linestyle': '-'
})
plt.rcParams.update({'axes.titlesize' : 18, 'lines.linewidth' : 3,\
    'axes.labelsize' : 16, 'xtick.labelsize' : 16, 'ytick.labelsize' : 16})
plt.title("Development of Life Expectancy by Country\nby Decade since 1960",
          fontdict={"fontsize": 20})
sns.swarmplot(x="Decade", y="Life expectancy at birth, total (years)", hue="Region",\
    palette=region_palette, data=mean_by_country_and_decade)

#%% [markdown]
#### Conclusions - Development of Life Expectancy Over Time
#The following observations can be made from the data above:
# - The gap in life expectancy has closed (more than halved) between 6 of te 7 regions;
# - Meanwhile life expectancy for the Sub-Saharan Africa region has not improved at the same rate, mainly as a result of a plateau in the 1990s;
# - The net result is that the gap between those countries with the worst and best record for life expectancy has not closed appreciably since 1960.
#
#%% [markdown]
### Stage 6.5 - Analysing Gross Domestic Product (GDP)
#Using a number of techniques to get a feel for the life expectany data:
# - Looking at top 10 and bottom 10 countries in 2018;
# - Distribution of data by region in 2018;
# - Analysing how it has developed over time since 1960.
#
Example #39
0
def plot():
    ###################

    rcParams['pdf.fonttype'] = 42
    rcParams['ps.fonttype'] = 42
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Arial']

    ######################

    r = pickle.load(open('spade_stats.p', 'rb'))
    congru_stats = r['congru_stats']
    incongru_stats = r['incongru_stats']
    candidate_per_sess = r['candidate_per_sess']
    ######################

    congru_dens = []
    incongru_dens = []
    for idx, cs in enumerate(candidate_per_sess):
        if cs[1] > 1000:
            congru_dens.append(
                np.sum(np.array(congru_stats['sess_ids']) == cs[0]) / cs[1])
        if cs[2] > 1000:
            incongru_dens.append(
                np.sum(np.array(incongru_stats['sess_ids']) == cs[0]) / cs[2])

    congru_boot = boot.ci(congru_dens, np.mean, n_samples=1000)
    incongru_boot = boot.ci(incongru_dens, np.mean, n_samples=1000)

    congru_sem = np.std(congru_dens) / np.sqrt(len(congru_dens))
    incongru_sem = np.std(incongru_dens) / np.sqrt(len(incongru_dens))

    # p_value = permutation_test(congru_dens,incongru_dens,method='approximate',num_rounds=10000)

    (fh, ax) = plt.subplots(1, 1, figsize=(1.5 / 2.54, 4 / 2.54), dpi=300)
    mm = [np.mean(incongru_dens), np.mean(congru_dens)
          ] / np.mean(incongru_dens)
    ax.bar(1, mm[0], color='k', edgecolor='k')
    ax.bar(2, mm[1], color='w', edgecolor='k')
    ax.errorbar([1, 2],
                mm,
                np.hstack((incongru_sem, congru_sem)) / np.mean(incongru_dens),
                color='none',
                ecolor='grey',
                capsize=3)
    ax.set_yscale('log')
    ax.set_ylim([1e-1 * 2, 1e2])
    ax.set_ylabel('Norm. motif density')
    ax.set_xticks([1, 2])
    ax.set_xticklabels(['Incongru.', 'Congruent'], rotation=45, ha='right')
    # plt.close('all')
    fh.savefig('spade_4su_pattern_density.pdf', bbox_inches='tight')

    ### candidiates
    congru_candi = []
    incongru_candi = []
    for idx, cs in enumerate(candidate_per_sess):
        if cs[1] > 1000:
            congru_candi.append(cs[1])
        if cs[2] > 1000:
            incongru_candi.append(cs[2])

    rcParams['pdf.fonttype'] = 42
    rcParams['ps.fonttype'] = 42
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Arial']

    (fh, ax) = plt.subplots(1, 1, figsize=(1 / 2.54, 4 / 2.54), dpi=300)

    ax.scatter(np.random.random(len(incongru_candi)) * 0.2 + 0.9,
               incongru_candi,
               s=4,
               c='k',
               alpha=0.5,
               edgecolors='none')

    ax.scatter(np.random.random(len(congru_candi)) * 0.2 + 2.9,
               congru_candi,
               s=4,
               c='k',
               alpha=0.5,
               edgecolors='none')
    ax.errorbar(4,np.mean(congru_candi),\
                    np.std(congru_candi)/np.sqrt(len(congru_candi)),
                    fmt='ro',ecolor='r',elinewidth=0.5,capsize=2,ms=4,mfc='none')

    ax.errorbar(0,np.mean(incongru_candi),\
                    np.std(incongru_candi)/np.sqrt(len(incongru_candi)),
                    fmt='ro',ecolor='r',elinewidth=0.5,capsize=2,ms=4,mfc='none')

    ax.set_yscale('log')
    ax.set_xlim([-1, 5])
    ax.set_xticks([])
    ax.set_yticks([1000, 100000, 10000000])
    plt.show()

    fh.savefig('4su_candi_count.pdf', bbox_inches='tight')
    stats.ranksums(incongru_candi, congru_candi)

    # for patt in congru_stats['pertrial']:
    #     [np.mean(x) for x in patt]
    #     pass

    # breakpoint()
    ####################

    s1sigsel = np.array(congru_stats['perHz_pvalues'])[:, 1] < 0.05
    s2sigsel = np.array(congru_stats['perHz_pvalues'])[:, 2] < 0.05
    (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300)
    # error on yaxis
    ax.scatter(np.array(congru_stats['perHz_mm'])[np.logical_not(s1sigsel), 1],
               np.array(congru_stats['perHz_mm'])[np.logical_not(s1sigsel), 0],
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['perHz_mm'])[np.logical_not(s2sigsel), 3],
               np.array(congru_stats['perHz_mm'])[np.logical_not(s2sigsel), 2],
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['perHz_mm'])[s1sigsel, 1],
               np.array(congru_stats['perHz_mm'])[s1sigsel, 0],
               s=1,
               c='r',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['perHz_mm'])[s2sigsel, 3],
               np.array(congru_stats['perHz_mm'])[s2sigsel, 2],
               s=1,
               c='r',
               marker='.',
               alpha=0.4)
    ax.plot([0, 0.26], [0, 0.26], '--k')

    ax.set_yticks([0, 0.1, 0.2])
    ax.set_xticks([0, 0.1, 0.2])
    ax.set_xlabel('patterns / spike / s, error trial')
    ax.set_ylabel('patterns / spike / s, correct trial')
    ax.set_xlim((0, 0.26))
    ax.set_ylim((0, 0.26))
    fh.savefig('spade_4su_pattern_correct_error.pdf', bbox_inches='tight')
    #############################

    s1sigsel = np.array(congru_stats['motif_pvalues'])[:, 1] < 0.05
    s2sigsel = np.array(congru_stats['motif_pvalues'])[:, 2] < 0.05
    (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300)
    # error on yaxis
    ax.scatter(np.array(congru_stats['mm'])[np.logical_not(s1sigsel), 1] / 6,
               np.array(congru_stats['mm'])[np.logical_not(s1sigsel), 0] / 6,
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['mm'])[np.logical_not(s2sigsel), 3] / 6,
               np.array(congru_stats['mm'])[np.logical_not(s2sigsel), 2] / 6,
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['mm'])[s1sigsel, 1] / 6,
               np.array(congru_stats['mm'])[s1sigsel, 0] / 6,
               s=1,
               c='r',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['mm'])[s2sigsel, 3] / 6,
               np.array(congru_stats['mm'])[s2sigsel, 2] / 6,
               s=1,
               c='r',
               marker='.',
               alpha=0.4)
    ax.plot([0, 3.6], [0, 3.6], '--k')

    ax.set_yticks(np.arange(0, 4))
    ax.set_xticks(np.arange(0, 4))
    ax.set_xlabel('patterns / s, error trial')
    ax.set_ylabel('patterns / s, correct trial')
    ax.set_xlim((0, 3.6))
    ax.set_ylim((0, 3.6))
    fh.savefig('spade_4su_raw_pattern_correct_error.pdf', bbox_inches='tight')

    #############################
    s1sel = np.array(congru_stats['prefered_samp']) == 1
    s2sel = np.array(congru_stats['prefered_samp']) == 2
    sigsel = np.array(congru_stats['perHz_pvalues'])[:, 0] < 0.05
    (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300)
    #prefer 1, 1 on yaxis
    ax.scatter(np.array(congru_stats['perHz_mm'])[
        np.logical_and(s1sel, np.logical_not(sigsel)), 2],
               np.array(congru_stats['perHz_mm'])[
                   np.logical_and(s1sel, np.logical_not(sigsel)), 0],
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(congru_stats['perHz_mm'])[
        np.logical_and(s2sel, np.logical_not(sigsel)), 0],
               np.array(congru_stats['perHz_mm'])[
                   np.logical_and(s2sel, np.logical_not(sigsel)), 2],
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(
        np.array(congru_stats['perHz_mm'])[np.logical_and(s1sel, sigsel), 2],
        np.array(congru_stats['perHz_mm'])[np.logical_and(s1sel, sigsel), 0],
        s=1,
        c='r',
        marker='.',
        alpha=0.4)

    ax.scatter(
        np.array(congru_stats['perHz_mm'])[np.logical_and(s2sel, sigsel), 0],
        np.array(congru_stats['perHz_mm'])[np.logical_and(s2sel, sigsel), 2],
        s=1,
        c='r',
        marker='.',
        alpha=0.4)
    ax.plot([0, 0.26], [0, 0.26], '--k')

    ax.set_yticks([0, 0.1, 0.2])
    ax.set_xticks([0, 0.1, 0.2])
    ax.set_xlabel('patterns / spike / s, non-prefered')
    ax.set_ylabel('patterns / spike / s, prefered')
    ax.set_xlim((0, 0.26))
    ax.set_ylim((0, 0.26))
    fh.savefig('spade_4su_pattern_prefered_nonprefered.pdf',
               bbox_inches='tight')

    ###########selectivity
    s1sel = np.array(congru_stats['prefered_samp']) == 1
    s2sel = np.array(congru_stats['prefered_samp']) == 2
    sigsel = np.array(congru_stats['perHz_pvalues'])[:, 0] < 0.05

    prefered_raw = np.hstack(
        (np.array(congru_stats['mm'])[s1sel, 0], np.array(
            congru_stats['mm'])[s2sel, 2])) / 6
    nonpref_raw = np.hstack(
        (np.array(congru_stats['mm'])[s1sel, 2], np.array(
            congru_stats['mm'])[s2sel, 0])) / 6
    # mm=(np.mean(nonpref),np.mean(prefered))
    # pref_boot=boot.ci(prefered, np.mean,n_samples=1000)
    # npref_boot=boot.ci(nonpref, np.mean,n_samples=1000)
    selec_idx_raw = ((prefered_raw - nonpref_raw) /
                     (prefered_raw + nonpref_raw))

    prefered = np.hstack((np.array(congru_stats['perHz_mm'])[s1sel, 0],
                          np.array(congru_stats['perHz_mm'])[s2sel, 2])) / 6
    nonpref = np.hstack((np.array(congru_stats['perHz_mm'])[s1sel, 2],
                         np.array(congru_stats['perHz_mm'])[s2sel, 0])) / 6
    # perHz_mm=(np.mean(nonpref),np.mean(prefered))
    # pref_boot=boot.ci(prefered, np.mean,n_samples=1000)
    # npref_boot=boot.ci(nonpref, np.mean,n_samples=1000)
    selec_idx = ((prefered - nonpref) / (prefered + nonpref))

    swmy = np.hstack((selec_idx_raw, selec_idx))
    swmx = np.hstack(
        (np.ones_like(selec_idx_raw), np.ones_like(selec_idx) * 2))

    (fh, ax) = plt.subplots(1, 1, figsize=(15 / 2.54, 15 / 2.54), dpi=300)
    # ax.scatter(np.ones_like(selec_idx),selec_idx)
    ax = sns.swarmplot(x=swmx, y=swmy, size=1, ax=ax, color='silver')
    ax = sns.boxplot(x=swmx,
                     y=swmy,
                     showcaps=False,
                     boxprops={'facecolor': 'None'},
                     showfliers=False,
                     whiskerprops={'linewidth': 0},
                     ax=ax)

    ax.set_ylabel('Selectivity index')
    ax.set_xticks([0, 1])
    ax.set_xticklabels(['Patterns / s', 'Patterns / spike'],
                       rotation=45,
                       ha='right')

    fh.savefig('spade_4su_pattern_selectivity_index.pdf', bbox_inches='tight')

    #####################################

    s1sel = np.array(congru_stats['prefered_samp']) == 1
    s2sel = np.array(congru_stats['prefered_samp']) == 2
    sigsel = np.array(congru_stats['motif_pvalues'])[:, 0] < 0.05
    (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300)
    #prefer 1, 1 on yaxis
    ax.scatter(np.array(
        congru_stats['mm'])[np.logical_and(s1sel, np.logical_not(sigsel)), 2] /
               6,
               np.array(congru_stats['mm'])[
                   np.logical_and(s1sel, np.logical_not(sigsel)), 0] / 6,
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(np.array(
        congru_stats['mm'])[np.logical_and(s2sel, np.logical_not(sigsel)), 0] /
               6,
               np.array(congru_stats['mm'])[
                   np.logical_and(s2sel, np.logical_not(sigsel)), 2] / 6,
               s=1,
               c='silver',
               marker='.',
               alpha=0.4)

    ax.scatter(
        np.array(congru_stats['mm'])[np.logical_and(s1sel, sigsel), 2] / 6,
        np.array(congru_stats['mm'])[np.logical_and(s1sel, sigsel), 0] / 6,
        s=1,
        c='r',
        marker='.',
        alpha=0.4)

    ax.scatter(
        np.array(congru_stats['mm'])[np.logical_and(s2sel, sigsel), 0] / 6,
        np.array(congru_stats['mm'])[np.logical_and(s2sel, sigsel), 2] / 6,
        s=1,
        c='r',
        marker='.',
        alpha=0.4)
    ax.plot([0, 3.6], [0, 3.6], '--k')

    ax.set_yticks(np.arange(0, 4))
    ax.set_xticks(np.arange(0, 4))
    ax.set_xlabel('patterns / s, non-prefered')
    ax.set_ylabel('patterns / s, prefered')
    ax.set_xlim((0, 3.6))
    ax.set_ylim((0, 3.6))
    fh.savefig('spade_4su_raw_pattern_prefered_nonprefered.pdf',
               bbox_inches='tight')

    ### for comparison of r rather than fr

    s1sel = np.array(congru_stats['prefered_samp']) == 1
    s2sel = np.array(congru_stats['prefered_samp']) == 2
    sigsel = np.array(congru_stats['motif_pvalues'])[:, 0] < 0.05
    (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300)
    #prefer 1, 1 on yaxis
    ax.scatter(np.array(congru_stats['mm'])[s1sel, 2] / 6,
               np.array(congru_stats['mm'])[s1sel, 0] / 6,
               s=1,
               c='k',
               marker='.',
               alpha=1)

    ax.scatter(np.array(congru_stats['mm'])[s2sel, 0] / 6,
               np.array(congru_stats['mm'])[s2sel, 2] / 6,
               s=1,
               c='k',
               marker='.',
               alpha=1)

    ax.plot([0, 3.6], [0, 3.6], '--', color='silver')

    xx = np.hstack([
        np.array(congru_stats['mm'])[s1sel, 2],
        np.array(congru_stats['mm'])[s2sel, 0]
    ])
    yy = np.hstack([
        np.array(congru_stats['mm'])[s1sel, 0],
        np.array(congru_stats['mm'])[s2sel, 2]
    ])
    print(signedstat=stats.wilcoxon(xx, yy))

    # (slope, intercept,rvalue,pvalue,stderr)=stats.linregress(xx,yy)
    # ax.plot([0,3.6],[intercept/6,slope*3.6+intercept/6],'--r')

    ax.set_yticks(np.arange(0, 4))
    ax.set_xticks(np.arange(0, 4))
    ax.set_xlabel('patterns / s, non-prefered')
    ax.set_ylabel('patterns / s, prefered')
    ax.set_xlim((0, 3.6))
    ax.set_ylim((0, 3.6))
    fh.savefig('spade_4su_raw_pattern_prefered_nonprefered.pdf',
               bbox_inches='tight')
Example #40
0
pupil_z = pup.ztransform_pupil_size(pupil_filt)
pup_dat = np.hstack((np.mean(pupil_filt, axis=1), pupil_z))
label = ['pupil'] * len(pupil_z) + ['pupil_z'] * len(pupil_z)
label2 = [0] * len(pupil_z) + [1] * len(pupil_z)
df = pd.DataFrame({
    'pupil': pup_dat,
    'type': label,
    'label': label2,
    'correct': np.hstack((performance, performance))
})

sns.set_context('talk')
fig, ax = plt.subplots(1, 2, figsize=(15, 8))
sns.swarmplot(y='pupil',
              x='type',
              hue='correct',
              data=df[df['type'] == 'pupil'],
              ax=ax[0],
              alpha=0.7)
sns.boxplot(y='pupil',
            x='type',
            data=df[df['type'] == 'pupil'],
            ax=ax[0],
            showfliers=False,
            color='gray',
            whis=[20, 80])
sns.swarmplot(y='pupil',
              x='type',
              hue='correct',
              data=df[df['type'] == 'pupil_z'],
              ax=ax[1],
              alpha=0.7)
Example #41
0
if flip:
    input_df['biotype'] = np.abs(input_df['biotype']-1) # works because we only ever have 2 biotypes

for col in cols:
    db[col] = zscore_by_group(input_df[col], labels, healthy_group)

db = pd.melt(db, id_vars=['id', 'biotype', 'diagnosis'], value_vars=cols)

# show diagnostic distributions for each biotype seperarely
sns.set_style('white')
fig, (ax1, ax2) = plt.subplots(figsize=(10, 7), nrows=2, sharex=True)
plt.subplots_adjust(left=0.125, bottom=0.15, right=0.9, top=0.85, wspace=0.25, hspace=0.25)
plt.suptitle('Diagnosis distribution per biotype')

sns.swarmplot(x="variable", y="value", hue="diagnosis", data=db.loc[db['biotype'] == 0], ax=ax1)
ax1.set_ylim([-4, 4])
ax1.set_title('Average-performing biotype')
ax1.set_xticklabels([], rotation=45, ha='right')
ax1.hlines(0,  ax1.xaxis.get_majorticklocs()[0],  ax1.xaxis.get_majorticklocs()[-1])

sns.swarmplot(x="variable", y="value", hue="diagnosis", data=db.loc[db['biotype'] == 1], ax=ax2)
ax2.set_ylim([-4, 4])
ax2.set_title('Poor-performing biotype')
ax2.set_xticklabels(names, rotation=45, ha='right')
ax2.hlines(0,  ax1.xaxis.get_majorticklocs()[0],  ax1.xaxis.get_majorticklocs()[-1])

sns.plt.savefig('biotype_yscores_per_diagnosis_and_biotype.pdf')
sns.plt.close()

    # box plots of rank differences (ML vs naive) of top hits
    fig = plt.figure()
    fig.suptitle(x_to_plot_gt + " and " + y_to_plot_gt)
    top_percents = [10, 20, 50, 100]  # percent above which to do cutoff
    for i, top_percent in enumerate(top_percents):

        ax = fig.add_subplot(2, 2, i + 1)
        top_ranknum = int(len(df_ranked) * (1 - top_percent / 100))

        df_ranked_top = df_ranked[df_ranked[x_to_plot_gt] > top_ranknum].copy()

        df_ranked_top['ML vs GT'] = np.abs(
            df_ranked_top[x_to_plot_gt] -
            df_ranked_top[x_to_plot_ML]) + np.abs(df_ranked_top[y_to_plot_gt] -
                                                  df_ranked_top[y_to_plot_ML])
        df_ranked_top['naive vs GT'] = np.abs(
            df_ranked_top[x_to_plot_gt] -
            df_ranked_top[x_to_plot_naive]) + np.abs(
                df_ranked_top[y_to_plot_gt] - df_ranked_top[y_to_plot_naive])
        df_comp = df_ranked_top.melt(value_vars=['ML vs GT', 'naive vs GT'],
                                     var_name='model',
                                     value_name='rank difference')
        ax = sns.swarmplot(x='model',
                           y='rank difference',
                           data=df_comp,
                           color=".25",
                           alpha=0.5)
        ax = sns.boxplot(x='model', y='rank difference', data=df_comp)
        ax.set_title("top {}%".format(top_percent))
        ax.set_xlabel('')
        plt.tight_layout()
Example #43
0
#Create a stripplot of the Award_Amount with the Model Selected on the y axis with jitter enabled.
# Create the stripplot
sns.stripplot(data=df,
              x='Award_Amount',
              y='Model Selected',
              jitter=True)

plt.show()



#Create a swarmplot() of the same data, but also include the hue by Region.
# Create and display a swarmplot with hue set to the Region
sns.swarmplot(data=df,
              x='Award_Amount',
              y='Model Selected',
              hue='Region')

plt.show()




#Create and display a boxplot of the data with Award_Amount on the x axis and Model Selected on the y axis.
# Create a boxplot
sns.boxplot(data=df,
            x='Award_Amount',
            y='Model Selected')

plt.show()
plt.clf()
Example #44
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 15 22:15:25 2019

@author: ankusmanish
"""

#Write a program to draw swarm plot of “total bill” against day for a dataset given in url

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sea

data = pd.read_csv('tips.csv')

x = data['day']
y = data['total_bill']

plt.figure(figsize=(8, 8))
sea.swarmplot(x, y)
plt.xlabel('Day', fontsize=20)
plt.ylabel('Total Bill', fontsize=20)
plt.show()
Example #45
0
    "https://reneshbedre.github.io/assets/posts/anova/onewayanova.txt",
    sep="\t")
# reshape the d dataframe suitable for statsmodels package
df_melt = pd.melt(df.reset_index(),
                  id_vars=['index'],
                  value_vars=['A', 'B', 'C', 'D'])
# replace column names
df_melt.columns = ['index', 'treatments', 'value']

# %%
# generate a boxplot to see the data distribution by treatments. Using boxplot, we can
# easily detect the differences between different treatments
import matplotlib.pyplot as plt
import seaborn as sns
ax = sns.boxplot(x='treatments', y='value', data=df_melt, color='#99c2a2')
ax = sns.swarmplot(x="treatments", y="value", data=df_melt, color='#7d0013')
plt.show()
# %%
import scipy.stats as stats
# stats f_oneway functions takes the groups as input and returns ANOVA F and p value
fvalue, pvalue = stats.f_oneway(df['A'], df['B'], df['C'], df['D'])
print(fvalue, pvalue)
# 17.492810457516338 2.639241146210922e-05
# %%
# get ANOVA table as R like output
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Ordinary Least Squares (OLS) model
model = ols('value ~ C(treatments)', data=df_melt).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
Example #46
0
# Number of Painting in each genre
plt.figure(figsize=(20, 10))
sns.boxenplot(x="nationality",
              y="paintings",
              color="y",
              scale="linear",
              data=df1)
plt.title('Number of Painting in each nation')
plt.xlabel('Age Group')
plt.ylabel('Number of Painting')
plt.xticks(rotation=60)
plt.show()

# Number of Painting by Genre
plt.figure(figsize=(20, 10))
sns.swarmplot(x=df1['genre'], y=df1['paintings'], color="black")
plt.xticks(rotation=60)
plt.title('Number of Painting by Genre')
plt.xlabel('Genre')
plt.ylabel('Number of Painting')
plt.show()

# Number of Painting by Nation
plt.figure(figsize=(20, 10))
sns.swarmplot(x=df1['nationality'], y=df1['paintings'], color="red")
plt.xticks(rotation=60)
plt.title('Number of Painting by Nation')
plt.xlabel('Nation')
plt.ylabel('Number of Painting')
plt.show()
    ax1.legend(construct_legend)
    ax1.plot(t, 100*np.ones_like(t), 'k--')
    
    plt.ylim([80, 105])
    plt.xlim([-0.2, 2])
    ax1.set_xlabel('Time (s)')
    
    
    # percent change bar plots    
    ax2 = inset_axes(ax1, width="30%", height="40%", loc=4, borderpad=3)
    
    df_barplot = df_percents[df_percents['index']==construct]

    sns.swarmplot(x = 'exp', 
              y='mean percent', 
              color='black',
              data=df_barplot,
              order=colors.keys())
    
    sns.boxplot(x='exp', 
                y='mean percent', 
                data=df_barplot, 
                # palette=colors,
                color='white',
                whis=1.5,
                showfliers=False,
                dodge=False,
                hue='exp',
                palette=colors,
                order=colors.keys(),
                width=0.5)
Example #48
0
           value='100-percentile2',
           estimator=np.median)
sns.tsplot(dfd,
           'percentile1',
           'subj',
           condition='condition',
           value='100-percentile2',
           err_style="unit_traces",
           estimator=np.median)
plt.title('Rest before VS. Motor before')
plt.ylim(-1, 1.5)
plt.plot([0, 100], [100, 0], 'k--')
plt.show()

sns.boxplot(x='condition', y='auc', data=aucs)
sns.swarmplot(x='condition', y='auc', data=aucs, color='k', alpha=0.5)
print(aucs.loc[aucs['condition'] == 'exp'])
print(aucs.loc[aucs['condition'] == 'control'])
print(aucs.loc[aucs['condition'] == 'exp', 'auc'].as_matrix() -
      aucs.loc[aucs['condition'] == 'control', 'auc'].as_matrix())
from scipy.stats import ttest_ind, ttest_1samp, ttest_rel, wilcoxon, ranksums
print(
    ttest_ind(aucs.loc[aucs['condition'] == 'exp', 'auc'],
              aucs.loc[aucs['condition'] == 'control', 'auc']))
print(
    ttest_rel(aucs.loc[aucs['condition'] == 'exp', 'auc'],
              aucs.loc[aucs['condition'] == 'control', 'auc']))
print(
    wilcoxon(aucs.loc[aucs['condition'] == 'exp', 'auc'],
             aucs.loc[aucs['condition'] == 'control', 'auc']))
print(
Example #49
0
import seaborn as sns
import matplotlib.pyplot as plt

#load iris data
iris = sns.load_dataset("iris")

sns.swarmplot(x="species", y="petal_length", data=iris)

#show plot
plt.show()
Example #50
0
# coding=utf-8

# %matplotlib inline jupyter
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats, integrate
import pandas as pd

sns.set(style="whitegrid", color_codes=True)
np.random.seed(sum(map(ord, "categorical")))
titanic = sns.load_dataset("titanic")
tips = sns.load_dataset("tips")
iris = sns.load_dataset("iris")

sns.stripplot(x="day", y="total_bill", data=tips)
sns.stripplot(x="day", y="total_bill", data=tips, jitter=True)

sns.swarmplot(x="day", y="total_bill", data=tips)
sns.swarmplot(x="day", y="total_bill", hue="sex", data=tips)

# 盒图
sns.boxplot(x="day", y="total_bill", hue="time", data=tips)

# 小提琴图
sns.violinplot(x="day", y="total_bill", hue="time", data=tips)

sns.violinplot(x="total_bill", y="day", hue="time", data=tips, split=True)
Example #51
0
# In[ ]:

pivoted = pd.pivot_table(df_raw, values='sales', columns='Year', index='Week')
pivoted.plot(figsize=(12, 12))

# In[ ]:

pivoted = pd.pivot_table(df_raw, values='sales', columns='Month', index='Day')
pivoted.plot(figsize=(12, 12))

# In[ ]:

temp_1 = df_raw.groupby(['Year', 'Month',
                         'item'])['sales'].mean().reset_index()
plt.figure(figsize=(12, 8))
sns.swarmplot('item', 'sales', data=temp_1, hue='Month')
# Place legend to the right
plt.legend(bbox_to_anchor=(1, 1), loc=2)

# In[ ]:

#In case the above plot is clutterd(which it is), try this, (Will create a grid for Year vs Month)
#sns.factorplot('item', 'sales', data=temp_1, hue = 'Month', col='Year',row='Month', kind='swarm', size = 5);

# In[ ]:

temp_1 = df_raw.groupby(['Year', 'Month'])['sales'].mean().reset_index()
plt.figure(figsize=(12, 8))
sns.lmplot('Month', 'sales', data=temp_1, hue='Year', fit_reg=False)

# In[ ]:
# Box plot
sns.boxplot('day', 'total_bill', data=tips)
sns.boxplot('day', 'total_bill', data=tips, hue='smoker')

# violin plot
sns.violinplot('day', 'total_bill', data=tips)
sns.violinplot('day', 'total_bill', data=tips, hue='sex')
sns.violinplot('day', 'total_bill', data=tips, hue='sex', split=True)

# Strip plot
sns.stripplot('day', 'total_bill', data=tips)
sns.stripplot('day', 'total_bill', data=tips, jitter=True)
sns.stripplot('day', 'total_bill', data=tips, jitter=True, hue='sex')
sns.stripplot('day',
              'total_bill',
              data=tips,
              jitter=True,
              hue='sex',
              split=True)

# Swarm Plot
sns.swarmplot('day', 'total_bill', data=tips)

# Swarm and Violin Plot
sns.violinplot('day', 'total_bill', data=tips)
sns.swarmplot('day', 'total_bill', data=tips, color='black')

# Factor Plot
sns.factorplot('day', 'total_bill', data=tips, kind='bar')
sns.factorplot('day', 'total_bill', data=tips, kind='violin')
Example #53
0
fig, axes = plt.subplots(2, 10)
stds = pd.DataFrame(columns=['std', 'group'])
for g, group in enumerate(['Real', 'Mock']):
    for s, subj in enumerate(df.loc[df.group == group, 'subj'].unique()):
        axes[0, s].set_title('S'+str(s))
        for d, day in enumerate(df.loc[(df.group == group) & (df.subj == subj), 'day'].unique()):
            stds.loc[len(stds)] = {'std': df.loc[(df.group == group) & (df.subj == subj) & (df.day == day), 'slope'].std(), 'group': group}
        axes[g, s].hist(df.loc[(df.group == group) & (df.subj == subj), 'slope'], np.linspace(-0.7, 0.7, 50), density=True)

axes[0, 0].set_ylabel('Real')
axes[1, 0].set_ylabel('Mock')

#sns.pairplot(df, 'subj', vars=['slope'])
plt.show()

sns.barplot(x='group', y='std', data=stds, estimator=np.median)
sns.swarmplot(x='group', y='std', data=stds, color='r')
plt.show()

sns.kdeplot(df.loc[(df.group == 'Real'), 'slope'])
sns.kdeplot(df.loc[(df.group == 'Mock'), 'slope'])
plt.show()

from scipy.stats import *
print(bartlett(df.loc[(df.group == 'Real'), 'slope'], df.loc[(df.group == 'Mock'), 'slope']))
print(bartlett(df.loc[(df.group == 'Real'), 'slope'], df.loc[(df.group == 'Real'), 'slope']))
print(levene(df.loc[(df.group == 'Real'), 'slope'], df.loc[(df.group == 'Mock'), 'slope']))
print(levene(df.loc[(df.group == 'Real'), 'slope'], df.loc[(df.group == 'Real'), 'slope']))
print(normaltest(df.loc[(df.group == 'Real'), 'slope']))
print(normaltest(df.loc[(df.group == 'Mock'), 'slope']))
titanic=sb.load_dataset("titanic")
titanic.to_csv("db_titanic.csv",index=False)
print(titanic)
sb.barplot(x="age",y="embark_town",orient='h',data=titanic)
mpl.show()

sb.barplot(x="embark_town",y="age",data=titanic)
mpl.show()

mpl.scatter("survived","age",data=titanic)
mpl.show()

sb.pointplot(x="sex",y="age",data=titanic)
mpl.show()

sb.swarmplot(x="pclass",y="age",data=titanic)
mpl.show()

iris=sb.load_dataset("iris")
sb.set_style("darkgrid")
sb.kdeplot(iris.loc[(iris['species']=='setosa'),'sepal_length'],color='b',shade=True,Label='setosa')
sb.kdeplot(iris.loc[(iris['species']=='virginica'),'sepal_length'],color='r',shade=True,Label='virginica')
mpl.show()

sb.countplot(x='class',hue='who',data=titanic)
mpl.show()


sb.countplot(x='sex',hue='who',data=titanic,palette="PuRd")
mpl.show()
Example #55
0
df = pd.read_csv(filepath, sep=";", decimal=',', index_col=0)

df = df.reset_index()
#df["Normalized intensity"] =df.groupby(["Experiment", "Genotype"])["Mean intensity"].apply(lambda x: x/x.mean())
means_stds = df.groupby(['Experiment'
                         ])['Mean intensity'].agg('mean').reset_index()

means_stds = means_stds.rename(columns={"Mean intensity": "mean_norm"})
df = df.merge(means_stds, on=(["Experiment"]))
df["Normalized intensity"] = df["Mean intensity"] / df["mean_norm"]

pal = sns.color_palette("viridis", 4)
g = sns.boxplot(y="Mean intensity", x="Genotype", data=df, order=["WT", "KO"])
g = sns.swarmplot(y="Mean intensity",
                  x="Genotype",
                  hue="Experiment",
                  data=df,
                  order=["WT", "KO"],
                  palette=pal)

plt.show()

cat1_wt = df[df['Genotype'] == 'WT']
cat1_KO = df[df['Genotype'] == 'KO']
print(ttest_ind(cat1_wt['Mean intensity'], cat1_KO['Mean intensity']))

df2 = df.groupby([df["Experiment"], df["Genotype"]]).mean()
df2.reset_index()
df3 = df2.reset_index()
print("Plotting means ")
pal = sns.color_palette("viridis", 4)
g = sns.pointplot(y="Mean intensity",
sc.tl.dpt(dmr_t, n_branchings=0, n_dcs=15)

sns.lmplot(data=dmr_t.obs, x='dpt_pseudotime', y='EpiBurden')

lin = tuple(sorted(list(dmr_t.obs['DMR_leiden'].values.unique())))
dmr_t.obs['DMR_leiden'] = dmr_t.obs['DMR_leiden'].cat.reorder_categories(list(lin), ordered=True)

color_dict = {
    "leiden_A": "#d62728",
    "leiden_B": "#ff7f0e",
    "leiden_C": "#1f77b4",
    "leiden_D": "#2ca02c"
} # equivalent to dict(zip(list(dmr_t.obs['DMR_leiden'].value_counts().index), dmr_t.uns['DMR_leiden_colors']))

sns.boxplot(data=dmr_t.obs, x='DMR_leiden', y='EpiBurden', palette=color_dict)
sns.swarmplot(data=dmr_t.obs, x='DMR_leiden', y='EpiBurden', color=".2")

rna = pd.read_table("/mnt/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/02.RNA-seq/STAD_SNUH_vst.txt", index_col=0, sep=' ')
rna = pd.read_table("/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/02.RNA-seq/GENCODE_V24/STAD_SNUH_vst.txt", index_col=0, sep=' ')
rna.columns = list(map(lambda x: "X" + x, rna.columns))

deg_tn_protein = pd.read_table("/mnt/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/02.RNA-seq/STAD_SNUH_Tumor_leiden_vst_DEG_Leiden_A_D_protein.txt", index_col="ID")
deg_tn_protein.columns = list(map(lambda x: "X" + x, deg_tn_protein.columns))



pro_met = pd.read_table("Promoter_up500down500_ALL.txt", index_col="ID")
pro_met = pd.read_table("/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/03.WGBS/Promoter_cCRE_ALL.txt", index_col="ID")
pro_met = pd.read_table("/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/03.WGBS/NEW/Promoter_cCRE_ALL.txt", index_col="ID")
pro_met.columns = list(map(lambda x: "X" + x, pro_met.columns))
pro_met_info = pd.DataFrame(list(zip(list(map(lambda x: x.split('/')[0], pro_met.index)), list(map(lambda x: x.split('/')[1], pro_met.index)), list(map(lambda x: x.split('/')[2], pro_met.index)), list(map(lambda x: x.split('/')[-1], pro_met.index)))), columns=['Loc', 'GeneID', 'EnsemblID', 'CpG'], index=pro_met.index)
Example #57
0
                              "shade": True,
                              "cumulative": cdf
                          },
                          hist=False,
                          color='cyan')
        plt.plot([me['Time']] * 2, [0, ax.get_ylim()[1]], color='black')
        plt.xlabel('Overall')
        ax.xaxis.set_major_formatter(formatter)

        plt.tight_layout()
        txt = 'cdf' if cdf else 'pdf'
        plt.savefig('stages_' + txt + '.svg')

    plt.figure(figsize=[10, 10])
    ax = sns.boxplot(x='Time', y='Div', data=df)
    sns.swarmplot(x='Time', y='Div', data=df, size=2, color=".3", linewidth=0)
    plt.title('Total Time by Division')
    plt.xlabel('Total Time')
    ax.xaxis.set_major_formatter(formatter)
    plt.savefig('time_by_div.svg')

    df['Sex'] = df['Div'].map(lambda x: 'F' in x or 'ATH' in x)

    plt.figure(figsize=[10, 6])
    ax = sns.distplot(df.loc[df['Sex'] == 1, 'Time'],
                      kde_kws={"shade": True},
                      hist=False)
    ax = sns.distplot(df.loc[df['Sex'] == 0, 'Time'],
                      kde_kws={"shade": True},
                      hist=False)
    plt.legend(['Women', 'Men'])
Example #58
0
sb.violinplot(x='day', y='total_bill', data=tips, hue='sex',
              split=True)  # Graph on each side of plot

# Strip Plots
sb.stripplot(x='day', y='total_bill', data=tips)
sb.stripplot(x='day', y='total_bill', data=tips, jitter=True)
sb.stripplot(x='day', y='total_bill', data=tips, jitter=True, hue='sex')
sb.stripplot(x='day',
             y='total_bill',
             data=tips,
             jitter=True,
             hue='sex',
             split=True)

# Swarm Plots
sb.swarmplot(x='day', y='total_bill', data=tips)

# Violin & Swarm Plots
sb.violinplot(x='day', y='total_bill', data=tips)
sb.swarmplot(x='day', y='total_bill', data=tips, color='black')

# Factor Plots (General-Purpose with Kind Specification)
sb.factorplot(x='day', y='total_bill', data=tips)
sb.factorplot(x='day', y='total_bill', data=tips, kind='box')
sb.factorplot(x='day', y='total_bill', data=tips, kind='bar')
sb.factorplot(x='day', y='total_bill', data=tips, kind='violin')
sb.factorplot(x='day', y='total_bill', data=tips, kind='strip')
sb.factorplot(x='day', y='total_bill', data=tips, kind='swarm')

# Matrix Plots ---------------------------------------------------------------
    # print('Just finished collecting and storing data for ' + city_var)

    # end of city loop - all cities should have been processed

'''Calculate summary statistics over the 30-day period and store in a new
dataframe, "df_summary"; separate the latitude and longitude into two columns
converting them from strings to floats; then write the DataFrame to a .csv
file called "summary.csv".'''
df_summary = pd.DataFrame(columns=('city', 'long', 'lat', 'max_tmax',
                                   'min_tmax', 'range_tmax', 'mean_tmax',
                                   'sd_tmax'))

df_summary['max_tmax'] = df.groupby('city')['tmax'].max()
df_summary['min_tmax'] = df.groupby('city')['tmax'].min()
df_summary['mean_tmax'] = df.groupby('city')['tmax'].mean()
df_summary['sd_tmax'] = df.groupby('city')['tmax'].std()
df_summary['range_tmax'] = df_summary['max_tmax'] - df_summary['min_tmax']
df_summary['city'] = df_summary.index

for k, v in cities.iteritems():
    location = tuple(float(x) for x in v.split(','))
    df_summary.set_value(k, 'lat', location[0])
    df_summary.set_value(k, 'long', location[1])

df_summary.to_csv('summary.csv', index=False)

sns.set_style("whitegrid")
ax = sns.boxplot(x="city", y="tmax", data=df.sort_values(by='city'))
ax = sns.swarmplot(x="city", y="tmax", data=df.sort_values(by='city'),
                   color=".25")
Example #60
0
sns.set_style("ticks")
sns.set_context("talk")

df = pd.read_csv("VRTag_days.csv")

#df['daycond'] = df.day + df.cond.astype(str)

## Pointplot for simple, easy mean/sem visualization



df = df.groupby(['subject','day','condition']).mean().reset_index()

#sns.pointplot(x="day",y="dist", hue = "condition", ax=ax,palette = p1,data=df, dodge= True,ci=68)
sns.swarmplot(x="day",y="dist", hue = "condition", dodge= True, ax=ax,data=df, hue_order = ["video","vr"])


#ax.legend_.remove()
sns.despine(ax=ax)
<<<<<<< HEAD
ax.set(xlabel="Condition",ylabel="Day 1 minus Day 2 distance (pixels)")

ax.set(xlabel="Condition",ylabel="Distance from Correct (pixels)")

## box and swarm for specific data-point visualization

#sns.boxplot(x="day",y="dist",ax=ax,palette = p1,data=df, dodge= True)
#sns.swarmplot(x="day",y="dist",ax=ax,color = "black",data=df, dodge= True)