def plot_distances(distance_data, filename, title, plot_variable='distance'): seeds = sorted(set(distance_data['region'])) distance_data = distance_data.sort_values(['region', 'cutoff']) sns.set() num_plots = len(seeds) figure, axes_sets = plt.subplots(nrows=num_plots, ncols=1, squeeze=False) axes_sets = list(chain(*axes_sets)) # 2-dim array -> 1-dim list for ax, seed in zip(axes_sets, seeds): seed_data = distance_data[distance_data['region'] == seed] seed_data = seed_data.assign( count=lambda df: df['cutoff'].map( df.groupby(by=['cutoff'])[plot_variable].count())) seed_data['cutoff_n'] = seed_data.apply(format_cutoff, 'columns') sns.violinplot(x='cutoff_n', y=plot_variable, data=seed_data, cut=0, alpha=0.7, ax=ax) plt.setp(ax.lines, zorder=100) plt.setp(ax.collections, zorder=100) sns.swarmplot(x='cutoff_n', y=plot_variable, data=seed_data, color='k', ax=ax) ax.set_ylabel(seed + '\n' + plot_variable) axes_sets[0].set_title(title) plt.savefig(filename)
def create_swarmplot(df, path, title, colormap, genes, species): """ The function creates a swarmplot using seaborn. :param df: pandas.DataFrame object :param path: The CSV file path. :param title: Title for the plot. :param colormap: Colormap :param genes: Ordered list of genes. :param species: Ordered list of species. :return: """ print("Creating swarmplot for {}".format(path)) output_path = os.path.dirname(path) output = join_folder(output_path, "%s_swarmplot.png" % title) fig = plt.figure(figsize=(16, 10), dpi=180) # new figure sns.swarmplot(x='Gene Name', y='Orthologues', hue='Species', order=genes, hue_order=species, data=df, palette=colormap) # draw swarmplot plt.ylabel("# Orthologues") plt.xlabel("Gene Name") plt.ylim(0, ) plt.yticks(fontsize=10) plt.xticks(fontsize=10) plt.savefig(output) # saving figure as output plt.close() return output
def _plot_categorical_and_continuous(df, xlabel, ylabel, x_keys, y_keys, ax, cmap, n_cat=5, plottype="box"): """ Plot a categorical variable and a continuous variable against each other. Types of plots include box plot, violin plot, strip plot and swarm plot. Parameters ---------- df : pd.DataFrame A pandas DataFrame with the data xlabel : str The column name for the variable on the x-axis ylabel : str The column name for the variable on the y-axis ax : matplotlib.Axes object The matplotlib.Axes object to plot the bubble plot into cmap : matplotlib.cm.colormap A matplotlib colormap to use for shading the bubbles n_cat : int The number of categories; used for creating the colour map plottype : {"box" | "violin" | "strip" | "swarm"} The type of plot to produce; default is a box plot Returns ------- ax : matplotlib.Axes object The same matplotlib.Axes object for further manipulation """ if x_keys is xlabel: keys = y_keys elif y_keys is ylabel: keys = x_keys else: raise Exception("Something went terribly, horribly wrong!") current_palette = sns.color_palette(cmap, n_cat) if plottype == "box": sns.boxplot(x=xlabel, y=ylabel, data=df, order=keys, palette=current_palette, ax=ax) elif plottype == "strip": sns.stripplot(x=xlabel, y=ylabel, data=df, order=keys, palette=current_palette, ax=ax) elif plottype == "swarm": sns.swarmplot(x=xlabel, y=ylabel, data=df, order=keys, palette=current_palette, ax=ax) elif plottype == "violin": sns.violinplot(x=xlabel, y=ylabel, data=df, order=keys, palette=current_palette, ax=ax) else: raise Exception("plottype not recognized!") return ax
def plot_facet(self, data, color, x=None, y=None, levels_x=None, levels_y=None, palette=None, **kwargs): ax = kwargs.get("ax", plt.gca()) corpus_id = "coquery_invisible_corpus_id" params = {"data": data, "palette": palette} self.horizontal = True if not x and not y: params.update({"x": corpus_id}), self._xlab = x self._ylab = "" elif x and not y: params.update({"x": x, "y": corpus_id, "order": levels_x}) self.horizontal = False self._xlab = x self._ylab = "Corpus position" elif y and not x: params.update({"y": y, "x": corpus_id, "order": levels_y}) self._xlab = "Corpus position" self._ylab = y elif x and y: params.update({"x": corpus_id, "y": y, "hue": x, "order": levels_y, "hue_order": levels_x}) self._xlab = "Corpus position" self._ylab = y sns.swarmplot(**params) return ax
def plotResults(tr, resultKey='resultInputPsf', doRates=False, title='', asHist=False, doPrint=True, actuallyPlot=True): import matplotlib.pyplot as plt import matplotlib matplotlib.style.use('ggplot') import seaborn as sns sns.set(style="whitegrid", palette="pastel", color_codes=True) methods = ['ALstack', 'ZOGY', 'SZOGY', 'ALstack_decorr'] tr = [t for t in tr if t is not None and t[resultKey]] FN = pd.DataFrame({key: np.array([t[resultKey][key]['FN'] for t in tr]) for key in methods}) FP = pd.DataFrame({key: np.array([t[resultKey][key]['FP'] for t in tr]) for key in methods}) TP = pd.DataFrame({key: np.array([t[resultKey][key]['TP'] for t in tr]) for key in methods}) title_suffix = 's' if doRates: FN /= (FN + TP) FP /= (FN + TP) TP /= (FN + TP) title_suffix = ' rate' if doPrint: print 'FN:', '\n', FN.mean() print 'FP:', '\n', FP.mean() print 'TP:', '\n', TP.mean() if not actuallyPlot: return TP, FP, FN matplotlib.rcParams['figure.figsize'] = (18.0, 6.0) fig, axes = plt.subplots(nrows=1, ncols=2) if not asHist: sns.violinplot(data=TP, cut=True, linewidth=0.3, bw=0.25, scale='width', alpha=0.5, ax=axes[0]) if TP.shape[0] < 500: sns.swarmplot(data=TP, color='black', size=3, alpha=0.3, ax=axes[0]) sns.boxplot(data=TP, saturation=0.5, boxprops={'facecolor': 'None'}, whiskerprops={'linewidth': 0}, showfliers=False, ax=axes[0]) plt.setp(axes[0], alpha=0.3) axes[0].set_ylabel('True positive' + title_suffix) axes[0].set_title(title) sns.violinplot(data=FP, cut=True, linewidth=0.3, bw=0.5, scale='width', ax=axes[1]) if FP.shape[0] < 500: sns.swarmplot(data=FP, color='black', size=3, alpha=0.3, ax=axes[1]) sns.boxplot(data=FP, saturation=0.5, boxprops={'facecolor': 'None'}, whiskerprops={'linewidth': 0}, showfliers=False, ax=axes[1]) plt.setp(axes[1], alpha=0.3) axes[1].set_ylabel('False positive' + title_suffix) axes[1].set_title(title) else: for t in TP: sns.distplot(TP[t], label=t, norm_hist=False, ax=axes[0]) axes[0].set_xlabel('True positive' + title_suffix) axes[0].set_title(title) legend = axes[0].legend(loc='upper left', shadow=True) for t in FP: sns.distplot(FP[t], label=t, norm_hist=False, ax=axes[1]) axes[1].set_xlabel('False positive' + title_suffix) axes[1].set_title(title) legend = axes[1].legend(loc='upper left', shadow=True) return TP, FP, FN
def fatigue_plots(df): """ Makes plots showing game fatigue for SAS and IND Args: df (pd.DataFrame): dataframe of fatigue data Note: use extract_fatigue() to obtain this data Returns: None Saves plots to examples/ """ plt.figure() sns.swarmplot(x='variable', y='value', data=df[df.Pos == 'Off'][df.Tm == 'IND']) plt.title('Indiana Pacers Fatigue') plt.xlabel('Quarter') plt.ylabel('Mean Offensive Velocity (ft/sec)') plt.ylim(0.015, 0.034) locs, labels = plt.yticks() plt.yticks(locs, map(lambda x: "%.1f" % x, locs*1000)) plt.savefig('examples/INDfatige') plt.figure() sns.swarmplot(x='variable', y='value', data=df[df.Pos == 'Off'][df.Tm == 'SAS']) plt.title('San Antonio Spurs Fatigue') plt.xlabel('Quarter') plt.ylabel('Mean Offensive Velocity (ft/sec)') locs, labels = plt.yticks() plt.yticks(locs, map(lambda x: "%.1f" % x, locs*1000)) plt.savefig('examples/SASfatige')
def plot_prediction2(transformed, predicted, y, label_names, threshold): plot([threshold, threshold], [-50, 50], 'grey', label='Classify boundary') t = pd.DataFrame(transformed, columns=['Dimention Reduction Result After LDA Transform']) p = pd.DataFrame([label_names[int(i[0])] for i in predicted], columns=['prediction']) lab = pd.DataFrame([label_names[int(i[0])] for i in y], columns=['label']) data_t = pd.concat([t, p, lab], axis=1) sns.swarmplot(y='label', x='Dimention Reduction Result After LDA Transform', hue='prediction', data=data_t)
def plot_facet(data, color): sns.swarmplot( x=data[self._groupby[-1]], y=data["coquery_invisible_corpus_id"], order=sorted(self._levels[-1]), palette=self.options["color_palette_values"], data=data)
def ageing_wip_chart(cycle_data, start_column, end_column, done_column=None, now=None, title=None, ax=None): if len(cycle_data.index) == 0: raise UnchartableData("Cannot draw ageing WIP chart with no data") if ax is None: fig, ax = plt.subplots() if title is not None: ax.set_title(title) if now is None: now = pd.Timestamp.now() if done_column is None: done_column = cycle_data.columns[-1] today = now.date() # remove items that are done cycle_data = cycle_data[pd.isnull(cycle_data[done_column])] cycle_data = pd.concat(( cycle_data[['key', 'summary']], cycle_data.ix[:, start_column:end_column] ), axis=1) def extract_status(row): last_valid = row.last_valid_index() if last_valid is None: return np.NaN return last_valid def extract_age(row): started = row[start_column] if pd.isnull(started): return np.NaN return (today - started.date()).days wip_data = cycle_data[['key', 'summary']].copy() wip_data['status'] = cycle_data.apply(extract_status, axis=1) wip_data['age'] = cycle_data.apply(extract_age, axis=1) wip_data.dropna(how='any', inplace=True) sns.swarmplot(x='status', y='age', order=cycle_data.columns[2:], data=wip_data, ax=ax) ax.set_xlabel("Status") ax.set_ylabel("Age (days)") ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=90) bottom, top = ax.get_ylim() ax.set_ylim(0, top) return ax
def dataExpl(data): # Statistical description of the data data.describe() sns.barplot(x='Sex', y='Survived', data=data) age_df = data[['Age','Survived', 'Sex']].copy() age_df.loc[age_df.Age<15,'AgeGroup'] = 'Children' age_df.loc[age_df.Age>=15,'AgeGroup'] = 'Adult' sns.barplot(x='AgeGroup', y='Survived', hue='Sex', data=age_df) sns.swarmplot(x='Age',y='Sex',hue='Survived',data=data)
def swarmplot(df): fig = plt.figure(figsize=[12,8]) ax = fig.gca() df_melted = pd.melt(frame = df, id_vars = ["group"], value_vars = ["height", "heartrate", "weight", "age"], var_name="measurement") sns.swarmplot(x="measurement", y="value", hue="group", data=df_melted) ax.set_xticklabels(['height (cm)', 'heart rate (bpm)', 'weight (kg)', 'age (years)']) ax.set_xlabel('') fig.tight_layout() plt.savefig(os.path.join(FIG_PATH, 'swarmplot.png'), dpi=100)
def swarm2(ax, data, xvar1, xlabel1, xvar2, xlabel2, yvar, ylabel): sns.swarmplot(data=data[[xvar1, xvar2]]) # plot mean value trans = matplotlib.transforms.blended_transform_factory( ax.transAxes, ax.transData ) plt.plot( [0.1, 0.4], [data[xvar1].mean(), data[xvar1].mean()], "r-", transform=trans, ) plt.plot( [0.6, 0.9], [data[xvar2].mean(), data[xvar2].mean()], "r-", transform=trans, ) pvalue = scipy.stats.ttest_ind( data[xvar1].dropna(), data[xvar2].dropna(), equal_var=False ).pvalue dataset = pd.concat([data[xvar1], data[xvar2]]) curr_ylim = ax.get_ylim() curr_ymax = curr_ylim[1] y20 = dataset.max() + (dataset.max() - dataset.min()) * 0.2 if curr_ymax < y20: ax.set_ylim([curr_ylim[0], y20]) # ymax = dataset.max() + (dataset.max() - dataset.min()) * 0.15 ax.annotate( r"$p = {0:.5f}$".format(pvalue), xy=(0.5, 0.95), horizontalalignment="center", xycoords=ax.transAxes, ) ax.annotate( r"$n = {0}$".format(len(data[xvar1].dropna())), xy=(0.5, 0.9), horizontalalignment="center", xycoords=ax.transAxes, ) ax.set_ylabel(ylabel) labels = [xlabel1, xlabel2] ax.set_xticklabels(labels) sns.despine()
def combPlot(x, y, data, hue=None, onlyAverage=False): # For Alexander, if she really only wanted to see one value for # each x value, averaging averages. # # Note: the reset_index() is required to make Seaborn be able to # plot the data for some reason. # http://stackoverflow.com/a/10374456 if onlyAverage: if hue: data = pd.DataFrame(data.groupby([x,hue]).mean().reset_index()) else: data = pd.DataFrame(data.groupby(x).mean().reset_index()) #sns.violinplot(x=x, y=y, data=data, hue=hue, inner=None) #sns.swarmplot(x=x, y=y, data=data, hue=hue, color="w", alpha=.5) sns.swarmplot(x=x, y=y, hue=hue, data=data)
def make_plot_file(top, file_name, time_func, rank_name, rank_max): """ Creates a scatter plot of 'pageviews' depending on time with a combination of 'pagename' and 'projectcode' as the keys to plot. :param top: Pandas dataframe with pages ranked within each occurance of the current timeframe. :param file_name: The name of the png plot file to create. :param time_func: The lambda to create the time data. :param rank_name: The column with the rank. :param rank_max: The highest (least 'pageviews') rank to include; exclusive. :return: """ #Get only the rows with rank less than rank_max top = top[top[rank_name] < rank_max].copy(deep=True) #Create time column top['time'] = top.apply(time_func, axis=1) #Replace blank page names with "main page" top['pagename'] = top['pagename'].fillna("main page") #Create key for the legend and plotting. top['pagename_projectcode'] = top['pagename']+"_"+top['projectcode'] #Create and configure seaborn plot. g = sns.swarmplot(x="time", y="pageviews", hue="pagename_projectcode", data=top); lgd = plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) plt.xticks(rotation=45) fig = plt.figure(1) #Create file fig.savefig(file_name, bbox_extra_artists=(lgd,), bbox_inches='tight') #Clear plot data plt.clf()
def swarmBox(data, x, y, hue, palette=None, order=None, hue_order=None, connect=False): """Depends on plot order of the swarm plot which does not seem dependable at the moment. Better idea would be to adopt code from the actual swarm function for this, adding boxplots separately""" if palette is None: palette = sns.color_palette('Set2', n_colors=data[hue].unique().shape[0]) if hue_order is None: hue_order = sorted(data[hue].unique()) if order is None: order = sorted(data[x].unqiue()) params = dict(data=data, x=x, y=y, hue=hue, palette=palette, order=order, hue_order=hue_order) sns.boxplot(**params, fliersize=0, linewidth=0.5) swarm = sns.swarmplot(**params, linewidth=0.5, edgecolor='black', dodge=True) if connect: zipper = [order] + [swarm.collections[i::len(hue_order)] for i in range(len(hue_order))] for z in zip(*zipper): curx = z[0] collections = z[1:] offsets = [] for c,h in zip(collections, hue_order): ind = (data[x] == curx) & (data[hue] == h) sortii = np.argsort(np.argsort(data.loc[ind, y])) offsets.append(c.get_offsets()[sortii,:]) for zoffsets in zip(*offsets): xvec = [o[0] for o in zoffsets] yvec = [o[1] for o in zoffsets] plt.plot(xvec, yvec, '-', color='gray', linewidth=0.5) plt.legend([plt.Circle(1, color=c) for c in palette], hue_order, title=hue)
def plot_hours_of_day(self, my_jira_df, my_filename_without_path, my_chart_title, my_output_path, my_relative_output_path, my_png_list): # g = sns.Grid(my_jira_df, vars=["DayDiff", "DateNum"]) # g = sns.swarmplot(x="From", y="HoursOfDay", hue="To", data=my_jira_df) g = sns.swarmplot(x="Project", y="HoursOfDay", hue="StateChange", data=my_jira_df) g.set( yticks=list(range(8, 18, 1))) # sns.sinplot() self.save_file(my_filename_without_path, my_chart_title, my_output_path, my_relative_output_path, my_png_list, g)
def plot_acc_grid(models = ALL_MODELS, save_path='../resources/cached_model_grid_scores.csv'): grid = pd.read_csv(save_path) grid = grid[grid['model_names'].isin(models)] f, (ax1, ax2) = plt.subplots(2, figsize=(12,12)); grid_acc = grid[(grid['score'] == 'acc') & (grid['data_fold'] != 'overfit')] sns.swarmplot(data=grid_acc, y='variable', x='value', hue='model_names', ax=ax1); ax1.set(xlabel='scores', ylabel=''); ax1.legend(bbox_to_anchor=(1.05, 1), loc='lower right', borderaxespad=0.); grid_overfit= grid[(grid['score'] == 'acc') & (grid['data_fold'] == 'overfit')] sns.swarmplot(data=grid_overfit, y='variable', x='value', hue='model_names', ax=ax2); ax2.set(xlabel='scores', ylabel=''); ax2.legend_.remove(); plt.show();
def mean_boxplots(df, title, ax): """Plot mean values for individual donors""" signal = df.mean(axis=1).to_frame('value') groups = [group(n) for n in signal.index] signal['group'] = [g.name for g in groups] all_groups = [g for g in reversed(sorted(set(groups)))] all_groups_names = [g.name for g in all_groups] sns.boxplot(x='group', y='value', data=signal, palette='Set3', linewidth=1.0, order=all_groups_names, ax=ax) sns.swarmplot(x='group', y='value', data=signal, color='.25', order=all_groups_names, ax=ax) for i, g in enumerate(all_groups): group_data = signal[signal['group'] == g.name] for j, label in enumerate(group_data.index): ax.annotate(donor(label), xy=(i, group_data.iloc[j, :]['value']), xytext=(5, 0), color=g.color, textcoords='offset points') ax.set_title(title)
def frip_boxplot(age_labels, df, save_to): """ Plots FRiP boxplot for passed in data frame donors: :param age_labels: Age labels for dots coloring :param df: Data frame with information about donors and their FRiP :param save_to: Object for plots saving """ plt.figure() ax = plt.subplot() sns.boxplot(x="age", y="frip", data=df, palette="Set3", linewidth=1.0, order=age_labels, ax=ax) sns.swarmplot(x="age", y="frip", data=df, color=".25", order=age_labels, ax=ax) for i, age_label in enumerate(age_labels): age_data = df[df['age'] == age_label] for j, label in enumerate(age_data.index): ax.annotate(label, xy=(i, age_data.iloc[j, :]['frip']), xytext=(5, 0), color="red" if age_label == "YDS" else "blue", textcoords='offset points') ax.set_title("Signal FRiP") save_plot(save_to)
def sns_viz(dataframe): """ Takes a merged buzzfeed data frame and generates a seaborn time series vizualization """ # load the data locally data = dataframe.groupby('title') # drop the columns we won't be using #data = data.drop(['u_name', 'last_upd', 'pub', 'pub_ts', 'lang', 'id', 'descr', 'cat_id', 'u_id', 'title', 'status', 'metav', 'comment_stat'], 1) # Plot the data sns_plotter = sns.swarmplot(x='pull_cc', y='max_impres', data=data.grou) print(type(sns_plotter)) # show the data, and make it pretty sns_plotter.plt.show()
def plot_by_genes(df, plot_dir, af_key, config): """Plot allele frequencies of known cancer genes in primary, relapse status """ out_file = os.path.join(plot_dir, "driver-af-comparison.pdf") df = df[pd.notnull(df["known"])] with PdfPages(out_file) as pdf_out: for cohort, cohort_df in df.groupby(["cohort"]): labels = sorted(list(cohort_df["status"].unique())) labels.reverse() cohort_df["status"].categories = labels g = sns.violinplot(x=af_key, y="status", data=cohort_df, inner=None) g.set_title("%s -- %s cancer genes" % (cohort, len(cohort_df["known"].unique()))) g = _af_violinplot_shared(g) pdf_out.savefig(g.figure) plt.clf() for cohort, cohort_df in df.groupby(["cohort"]): for gene, gene_df in cohort_df.groupby(["known"]): if len(gene_df["status"].unique()) > 1 and len(gene_df) > 10: gene_df["sample_label"] = gene_df.apply( lambda row: "%s\n(%s variants)" % (row["status"], len(gene_df[gene_df["status"] == row["status"]])), axis=1) labels = list(gene_df["sample_label"].unique()) labels.reverse() gene_df["sample_label"].categories = labels g = sns.violinplot(x=af_key, y="sample_label", data=gene_df, inner=None, bw=.1) sns.swarmplot(x=af_key, y="sample_label", data=gene_df, color="w", alpha=.5) g.set_title("%s -- %s" % (cohort, gene)) g = _af_violinplot_shared(g) pdf_out.savefig(g.figure) if config and (cohort, gene) in config.driver_detailed: out_dir = utils.safe_makedir(os.path.join(plot_dir, "detailed")) out_file = os.path.join(out_dir, "driver-%s-%s.png" % (cohort, gene)) g.figure.savefig(out_file) plt.clf() return out_file
def swarm(ax, data, xlabel1, xlabel2, ylabel): sns.swarmplot(data=data[["y1", "y2"]]) # test that ymax is at least 20% higher than range pvalue = scipy.stats.ttest_ind( data["y1"].dropna(), data["y2"].dropna(), equal_var=False ).pvalue dataset = pd.concat([data["y1"], data["y2"]]) curr_ylim = ax.get_ylim() curr_ymax = curr_ylim[1] y20 = dataset.max() + (dataset.max() - dataset.min()) * 0.2 if curr_ymax < y20: ax.set_ylim([curr_ylim[0], y20]) ymax = dataset.max() + (dataset.max() - dataset.min()) * 0.15 ax.annotate( r"$p = {0:.5f}$".format(pvalue), xy=(0.5, ymax), horizontalalignment="center", ) # ax.annotate( # "", # xy=(0, dataset.max()), # xytext=(1, dataset.max()), # arrowprops={ # "connectionstyle": "bar", # "arrowstyle": "-", # "shrinkA": 20, # "shrinkB": 20, # "lw": 2 # } # ) ax.set_ylabel(ylabel) labels = [xlabel1, xlabel2] ax.set_xticklabels(labels) sns.despine()
def plot_posterior(self, rotate_xticks=False): """ Plots a swarm plot of the data overlaid on top of the 95% HPD and IQR of the posterior distribution. """ # Make summary plot # fig = plt.figure() ax = fig.add_subplot(111) # 1. Get the lower error and upper errorbars for 95% HPD and IQR. lower, lower_q, upper_q, upper = np.percentile(self.trace['fold'][500:], [2.5, 25, 75, 97.5], axis=0) summary_stats = pd.DataFrame() summary_stats['mean'] = self.trace['fold'].mean(axis=0) err_low = summary_stats['mean'] - lower err_high = upper - summary_stats['mean'] iqr_low = summary_stats['mean'] - lower_q iqr_high = upper_q - summary_stats['mean'] # 2. Plot the swarmplot and errorbars. summary_stats['mean'].plot(ls='', ax=ax, yerr=[err_low, err_high]) summary_stats['mean'].plot(ls='', ax=ax, yerr=[iqr_low, iqr_high], elinewidth=4, color='red') sns.swarmplot(data=self.data, x=self.sample_col, y=self.output_col, ax=ax, alpha=0.5) if rotate_xticks: logging.info('rotating xticks') plt.xticks(rotation='vertical') plt.ylabel(self.output_col) return fig, ax
def getLine(data_frame, xaxis = 'confName', yaxis = 'counts'): plt.cla() fig = sns.swarmplot(data = data_frame, x=xaxis, y = yaxis, palette = 'Blues') io = StringIO() plt.savefig(io, format='png') img = base64.encodestring(io.getvalue()) io = StringIO() plt.savefig(io, format='png') data = base64.encodestring(io.getvalue()) script = '''<img src="data:image/png;base64,{}";/>''' return script.format(data)
def plot_week_data(df, sample_type, metric, hue=None, hide_donor_baseline=False, hide_control_baseline=False, dm=None, show_legend=True, label_axes=True): df['week'] = pd.to_numeric(df['week'], errors='coerce') df[metric] = pd.to_numeric(df[metric], errors='coerce') asd_data = filter_sample_md(df, [('SampleType', sample_type), ('Group', 'autism')]) asd_data = asd_data.sort_values(by='week') fig = plt.figure() ax = fig.add_subplot(1,1,1) ax = sns.boxplot(data=asd_data, x='week', y=metric, color='white', ax=ax) ax = sns.swarmplot(data=asd_data, x='week', y=metric, hue=hue, palette=palette, ax=ax) x0 = np.min(df['week']) - 1 x1 = np.max(df['week']) + 1 if not hide_control_baseline: control = control_metric(df, sample_type, metric=metric) control_y = np.median(control) ax.axhline(control_y, color=palette['neurotypical'], linestyle='--', label='neurotypical (median; n=%d)' % len(control)) if not hide_donor_baseline: donor_initial = donor_metric(df, metric=metric, group='donor-initial', sample_type=sample_type) donor_initial_y = np.median(donor_initial) donor_maintenance = donor_metric(df, metric=metric, group='donor-maintenance', sample_type=sample_type) donor_maintenance_y = np.median(donor_maintenance) ax.axhline(donor_initial_y, color=palette['donor'], linestyle='--', label='donor (median; n=%d)' % len(donor_initial)) ax.axhline(donor_maintenance_y, color=palette['donor'], linestyle=':', label='donor (median; n=%d)' % len(donor_maintenance)) if dm is not None: inter_nt_dm = inter_neurotypical_distances(df, dm, sample_type=sample_type) inter_nt = inter_nt_dm.condensed_form() median_inter_nt = np.median(inter_nt) ax.axhline(median_inter_nt, color=palette['neurotypical'], linestyle='-.', label='between neurotypical distance (median; n=%d)' % len(inter_nt)) if show_legend: ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) else: ax.legend().remove() if not label_axes: ax.set_xlabel('') ax.set_ylabel('') return fig
grid = sns.jointplot(v1, v2, alpha=0.4) grid.ax_joint.set_aspect('equal') # In[22]: sns.jointplot(v1, v2, kind='hex') # In[23]: # set the seaborn style for all the following plots sns.set_style('white') sns.jointplot(v1, v2, kind='kde', space=0) # In[24]: iris = pd.read_csv('iris.csv') iris.head() # In[25]: sns.pairplot(iris, hue='Name', diag_kind='kde', size=2) # In[26]: plt.figure(figsize=(8, 6)) plt.subplot(121) sns.swarmplot('Name', 'PetalLength', data=iris) plt.subplot(122) sns.violinplot('Name', 'PetalLength', data=iris)
color='r', yerr=std_RE[1], error_kw=dict(ecolor='black', lw=2, capsize=5, capthick=2), label='DMS-PF') plt.xlabel('Graph metric') plt.ylabel('Relative Error') plt.xticks(index + bar_width, ('GE', 'LE', 'CC', 'CP', 'EC', 'BC', 'PC', 'M')) plt.legend(loc='best') plt.tight_layout() fig.savefig('avg_RE.png') ############################################################################## # try seaborn plots using above data ############################################################################## fig, ax = plt.subplots() df=pd.DataFrame(data = RE[1], index = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6'], columns = ['GE', 'LE', 'CC', 'CP', 'EC', 'BC', 'PC', 'M'] ) ax=sns.violinplot(data=df, scale='count') ax=sns.swarmplot(data=df, color='black') ############################################################################## # Show the plots on the screen ############################################################################## plt.show()
axes[(row,col)] = plt.subplot2grid(gridshape, (row, col),sharex=axes[(2,col)],sharey=axes[(row,0)]) if (col>0): plt.setp(axes[(row,col)].get_yticklabels(), visible=False) if (0<=col<=1) and (row<4): plt.setp(axes[(row,col)].get_xticklabels(), visible=False) if (2<=col<=3) and (row<3): plt.setp(axes[(row,col)].get_xticklabels(), visible=False) axes[(row,col)].plot(npr.randn(col+5),color=colours[i]) df = pd.DataFrame(columns=["axis","value","other"]) df["axis"] = range(10)+range(10) df["value"] = np.concatenate((npr.randn(10),npr.randn(10)+1)) df["other"] = ["P1"]*10+["P2"]*10 print df sns.swarmplot(y=df["value"],ax=ax1,hue=df["axis"],palette=colours,x=df["other"]) ax1.set_ylabel("") ax1.set_xlabel("") ax1.legend_.remove() #fig.tight_layout()#pad=0.4, w_pad=0.5, h_pad=1.0) plt.show(block=True)
for line in sortedSrcFile: colList = line.rstrip().split("\t") cytobandId = colList[0] sourceElementOrder.append(cytobandId) ##### Make plot ################# fig = plt.figure(figsize=(25,5)) #ax = sns.swarmplot(x='cytobandId', y='nbTransductions', data=hotL1Df, size=3, edgecolor="gray", order=sourceElementOrder) ax = sns.swarmplot(x='cytobandId', y='nbTransductions', data=df, size=3, edgecolor="gray", order=sourceElementOrder) ### Axis labels ax.set_xlabel('') ax.set_ylabel('# transductions') # turn the axis labels for item in ax.get_yticklabels(): item.set_rotation(0) for item in ax.get_xticklabels(): item.set_rotation(90) ## Y ticks ax.set(yticks=np.arange(0,91,10))
fig, ax = plt.subplots() #ax.set_title('PKIS1 LOTO (N=224 targets)') #, fontsize=10) ax = sb.violinplot(data=df, palette="Set3", inner='box', scale="count", bw=0.1, alpha=1.0, cut=0, linewidth=1.5, orient=orientation, zorder=0) #sb.violinplot( data=df, palette="Set3", inner='stick', scale="count", bw=0.1, alpha=0.5, cut=0, linewidth=0.5, orient=orientation, ax=ax ) sb.swarmplot(data=df, color='k', size=2, alpha=0.25, ax=ax, orient=orientation) if orientation == 'v': ax.set_xlabel('IBR model') #, fontsize=8) ax.set_ylabel(metric) ax.tick_params(axis='x', labelsize=8) elif orientation == 'h': ax.set_xlabel(metric) ax.set_ylabel('IBR model') ax.tick_params(axis='y', labelsize=8) #ax.grid(False) # width * height fig.set_size_inches(8, 5)
plt.xticks(rotation=60) # show and save off the graph plt.tight_layout(pad=3.0, w_pad=3.0, h_pad=2.0) plt.savefig('data/' + str(season) + 'Playoffs_Save_perc_SOGA.png', bbox_inches='tight', pad_inches=0.5) #plt.show() plt.clf() # swarm plots on blocks for and against fig = plt.figure(figsize=(12, 12)) fig.suptitle('Blocks Against vs Blocks for per Game by Division' + '' + str(season) + '' + 'Playoffs', fontsize=14) plt.subplot(4, 2, 1) sns.swarmplot(x='Ev_Team', y='Blocks_A', data=metroDf) # Label the axes plt.xlabel('Metro Divison') plt.ylabel('Blocks Against') plt.xticks(rotation=60) plt.subplot(4, 2, 2) sns.swarmplot(x='Ev_Team', y='Blocks_for', data=metroDf) plt.xlabel('Atlantic Divison') plt.ylabel('Blocks for') plt.xticks(rotation=60) plt.subplot(4, 2, 3) sns.swarmplot(x='Ev_Team', y='Blocks_A', data=atlanticDf) plt.xlabel('Atlantic Divison') plt.ylabel('Blocks Against')
sns.pointplot(data=df, x="연령대코드(5세단위)", y="신장(5Cm 단위)", hue="음주여부", ci="sd") sns.pointplot(data=df, x="연령대코드(5세단위)", y="혈색소", ci=None) ■■■■■ boxplot 그래프 sns.boxplot(data=df, x="신장(5Cm단위)", y="체중(5Kg 단위)") sns.boxplot(data=df, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="성별코드") sns.boxplot(data=df, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부") ■■■■■ violinplot 그래프 sns.violinplot(data=df, x="신장(5Cm단위)", y="체중(5Kg 단위)") sns.violinplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부") sns.violinplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부", split=True) sns.violinplot(data=df_sample, x="연령대코드(5세단위)", y="혈색소", hue="음주여부", split=True) ■■■■■ warm plot 그래프 sns.swarmplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부") sns.swarmplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)", hue="음주여부") sns.violinplot(data=df_sample, x="신장(5Cm단위)", y="체중(5Kg 단위)") sns.swarmplot(data=df_sample, x="연령대코드(5세단위)", y="혈색소", hue="음주여부") ■■■■■ Implot 그래프 # 회귀선을 볼수 있다 #col 구분하여 표를 나눠서 그릴수 있다 sns.lmplot(data=df_sample, x="연령대코드(5세단위)", y="혈색소", hue="음주여부") sns.lmplot(data=df_sample, x="연령대코드(5세단위)", y="혈색소", hue="음주여부", col="성별코드") ★★★★★★★★★수치형 데이터★★★★★★★★★★ ■■■■■ scatterplot 그래프 X,Y 수치형 데이터 sns.scatterplot(data=df, x="(혈청지오티)AST", y="(혈청지오티)ALT") sns.scatterplot(data=df_sample, x="(혈청지오티)AST", y="(혈청지오티)ALT", hue="음주여부") sns.scatterplot(data=df_sample, x="(혈청지오티)AST", y="(혈청지오티)ALT", hue="허리둘레") sns.scatterplot(data=df_sample, x="(혈청지오티)AST", y="(혈청지오티)ALT", hue="음주여부", size="체중(5Kg 단위)") # Size를 구분하여 작성가능
# importing packages import seaborn as sns import matplotlib.pyplot as plt # loading dataset data = sns.load_dataset("tips") # plot the swarmplot # size set to 5 sns.swarmplot(x="day", y="total_bill", data=data, size=5) plt.show()
'pct_assigned_GRCh38', 'pct_remain_after_dedupe_1', 'pct_remain_after_dedupe_2' ] meta_all = [] for k, v in meta.items(): this = v[cols_to_keep] this.insert(1, 'batch', k) meta_all.append(this) meta_all = pd.concat(meta_all, axis=0) meta_all.insert(5, 'assigned_GRCh38', meta_all.read_count * meta_all.pct_assigned_GRCh38 / 100.) meta_all.to_excel(os.path.join(outdir, "metadata_all.xlsx")) # compare raw read counts ax = sns.swarmplot(data=meta_all, x='batch', y='read_count') ax.figure.savefig(os.path.join(outdir, 'raw_read_counts.png'), dpi=200) ax.cla() # mapped read counts ax = sns.swarmplot(data=meta_all, x='batch', y='uniquely_mapped_GRCh38') ax.figure.savefig(os.path.join(outdir, 'uniquely_mapped_read_counts.png'), dpi=200) ax.cla() # assigned read counts ax = sns.swarmplot(data=meta_all, x='batch', y='assigned_GRCh38') ax.figure.savefig(os.path.join(outdir, 'assigned_read_counts.png'), dpi=200) ax.cla()
import matplotlib.pyplot as plt sns.set(style="ticks") # Initialize the figure with a logarithmic x axis f, ax = plt.subplots(figsize=(7, 6)) ax.set_xscale("log") # Load the example planets dataset planets = sns.load_dataset("planets") # Plot the orbital period with horizontal boxes sns.boxplot(x="distance", y="method", data=planets, whis=[0, 100], palette="vlag") # Add in points to show each observation sns.swarmplot(x="distance", y="method", data=planets, size=2, color=".3", linewidth=0) # Tweak the visual presentation ax.xaxis.grid(True) ax.set(ylabel="") sns.despine(trim=True, left=True)
matplotlib.rcParams['axes.unicode_minus'] = False # In[47]: # 팀별 선수 출루율 분포 boxplot이용하여 알아보자 import seaborn as sns sns.boxplot(data=player_stat, x='팀', y='출루율') # In[48]: # 팀별 선수 출루율 swarmplot과 boxplot같이 사용하여 살펴보기 sns.boxplot(data=player_stat, x='팀', y='출루율') sns.swarmplot(data=player_stat, x='팀', y='출루율') # In[59]: # swarmplot과 boxplot같이 사용하면 색상이 겺쳐 구분하기 어려움 # 이럴경우 boxplot 색상 제거하고 간단하게 표시하면 깔끔하게 표현가능 # showcaps = False --> 박스 상/하단 가로라인 보이지 않게 하기 # whiskerprops{ 'linewidth' : 0} --> 박스 상/하단 세로라인 보이지 않게 하기 # showfliers = False --> 박스 범위 벗어난 아웃라이어 표시하지 않기 # boxprops = { 'facecolor' : 'None' } --> 박스 색상 지우기 sns.boxplot(data=player_stat, x='팀', y='출루율', showcaps=False, whiskerprops={'linewidth': 0},
def main(): # Sidebar st.sidebar.header("About -") st.sidebar.info("pdm04_st_Mid_exam") # Title st.title("Mid exam - pdm04, 김채영") st.header("- EDA of Pima diabetes data -") st.set_option('deprecation.showPyplotGlobalUse', False) # Get the data from github df = pd.read_csv("https://github.com/Redwoods/Py/raw/master/pdm2020/my-note/py-pandas/data/diabetes.csv") st.dataframe(df) # Return dataframe if st.checkbox("Show Data description"): st.dataframe(df.describe()) # shape st.subheader("shape") df.shape # st.subheader("Check & cleaning data") df.isnull().values.any(), df.isna().sum() vars = df.columns st.write(vars) df = df[vars].dropna() df.shape if st.checkbox("Skew of attribute distributions"): skew = df.skew() st.write(skew) st.markdown('- 데이터 왜곡도') st.markdown("* * *") # st.header("- Visualizing data -") # st.subheader("Check the balance of classes in the data through plot") if st.checkbox("Outcome plot"): classes=df.Outcome sns.countplot(classes, label='count') st.pyplot() nDB,DB=classes.value_counts() st.write('False: non-diabetes',nDB) st.write('True: diabetes',DB) classes.value_counts(), type(classes) st.text("0 : 정상인, 1 : 당뇨병 환자") st.markdown("* * *") # st.subheader("Show the data as a chart") if st.checkbox("chart"): st.line_chart(df) st.markdown("* * *") # st.subheader("Univariate plots:") # if st.checkbox("Histograms"): st.subheader("Histograms") plt.rcParams['figure.figsize'] = [12, 10] # set the figure size st.write(df.hist()) st.pyplot() if st.checkbox("Density Plots"): st.subheader("Density Plots") st.write(df.plot(kind='density', subplots=True, layout=(3,3), sharex=False)) st.pyplot() if st.checkbox("Box and Whisker Plots"): st.subheader("Box and Whisker Plots") st.write(df.plot(kind= 'box', subplots=True, layout=(3,3), sharex=False, sharey=False)) st.pyplot() st.markdown("* * *") # st.subheader("Multivariate Plots:") # if st.checkbox("Correlation plot"): st.subheader("Correlation plot") df.corr() plt.figure(figsize=(12,10)) sns.heatmap(df.corr(),annot=True, cmap= "RdYlGn", vmin=-1, vmax=1) st.pyplot() if st.checkbox("Compute correlation matrix"): st.subheader("Correlations of attributes in the data") correlations = df.corr(method = 'pearson') st.write(correlations) st.markdown('- 값이 1에 가까울수록 상관성이 있음!') if st.checkbox("result"): st.markdown('- 상관성 분석 결과\n' ' * Age vs. Pregnancies : 0.54\n' ' * Glucose vs. Outcome : 0.47\n' ' * SkinThickness vs. Insulin : 0.44\n' ' * SkinThickness vs. BMI : 0.39\n') st.markdown('- 상관성이 높은 변수들에 대한 좀 더 자세한 시각화가 필요하다.') st.markdown("* * *") # # Import required package from pandas.plotting import scatter_matrix plt.rcParams['figure.figsize'] = [12, 12] if st.checkbox("Scatter Plot Matrix"): st.subheader("Scatter Plot Matrix") scatter_matrix(df) plt.show() st.pyplot() if st.checkbox("Scatter Plot_1"): st.subheader("Scatter Plot") sns.pairplot(df, hue="Outcome", markers=["o", "s"],palette="husl") st.pyplot() if st.checkbox("Scatter Plot_2"): st.subheader("0, 1을 noDM, DM으로 변경") df_temp = df.copy() df_temp['Outcome'] = df_temp['Outcome'].replace([0, 1],['noDM', 'DM']) sns.pairplot(df_temp, hue='Outcome', markers=["o", "s"],palette="husl") st.pyplot() st.markdown("* * *") # if st.checkbox("6 high correlation"): st.subheader("상관성이 높은 6개의 특성에 대한 산포도") high_corr = ['Pregnancies', 'Glucose', 'SkinThickness', 'Insulin', 'BMI','Age', 'Outcome'] df_temp2 = df.copy() df_temp2['Outcome'] = df_temp2['Outcome'].replace([0, 1],['noDM', 'DM']) sns.pairplot(df_temp2[high_corr], hue='Outcome') st.pyplot() if st.checkbox("3 high correlation"): st.subheader("상관성이 높은 3개의 특성에 대한 산포도") highest_corr = ['Pregnancies', 'Age', 'Outcome'] df_temp3 = df.copy() df_temp3['Outcome'] = df_temp3['Outcome'].replace([0, 1],['noDM', 'DM']) sns.pairplot(df_temp3[highest_corr], hue='Outcome') st.pyplot() st.markdown("* * *") # st.subheader("Advanced plots:") # if st.checkbox("Standarization of data and Violinplot"): st.markdown('- Standarization of data (Normalization)') df_n = (df - df.mean())/df.std() df_n y=df.Outcome df2=pd.concat([y, df_n.iloc[:,0:8]], axis=1) y.shape,df2.shape df3=pd.melt(df2,id_vars='Outcome', var_name='features',value_name='values') df3.head(), df3.shape st.subheader("Violinplot") plt.figure(figsize=(10,10)) sns.violinplot(x='features', y='values', hue='Outcome', data=df3, split=True, inner='quart') plt.xticks(rotation=45) st.pyplot() # if st.checkbox("Customizing seaborn plot"): st.subheader("Customizing seaborn plot") sns.set(style='whitegrid', palette='muted') plt.figure(figsize=(10,10)) sns.swarmplot(x='features', y='values', hue='Outcome', data=df3) plt.xticks(rotation=45) st.pyplot() st.markdown("* * *")
"Life expectancy at birth, total (years)", "Development of Life Expectancy by Region\nby Year since 1960", y_scale="linear") #%% f, ax = plt.subplots(figsize=(10, 9)) sns.set_style("ticks", { 'axes.grid': True, 'grid.color': '.8', 'grid.linestyle': '-' }) plt.rcParams.update({'axes.titlesize' : 18, 'lines.linewidth' : 3,\ 'axes.labelsize' : 16, 'xtick.labelsize' : 16, 'ytick.labelsize' : 16}) plt.title("Development of Life Expectancy by Country\nby Decade since 1960", fontdict={"fontsize": 20}) sns.swarmplot(x="Decade", y="Life expectancy at birth, total (years)", hue="Region",\ palette=region_palette, data=mean_by_country_and_decade) #%% [markdown] #### Conclusions - Development of Life Expectancy Over Time #The following observations can be made from the data above: # - The gap in life expectancy has closed (more than halved) between 6 of te 7 regions; # - Meanwhile life expectancy for the Sub-Saharan Africa region has not improved at the same rate, mainly as a result of a plateau in the 1990s; # - The net result is that the gap between those countries with the worst and best record for life expectancy has not closed appreciably since 1960. # #%% [markdown] ### Stage 6.5 - Analysing Gross Domestic Product (GDP) #Using a number of techniques to get a feel for the life expectany data: # - Looking at top 10 and bottom 10 countries in 2018; # - Distribution of data by region in 2018; # - Analysing how it has developed over time since 1960. #
def plot(): ################### rcParams['pdf.fonttype'] = 42 rcParams['ps.fonttype'] = 42 rcParams['font.family'] = 'sans-serif' rcParams['font.sans-serif'] = ['Arial'] ###################### r = pickle.load(open('spade_stats.p', 'rb')) congru_stats = r['congru_stats'] incongru_stats = r['incongru_stats'] candidate_per_sess = r['candidate_per_sess'] ###################### congru_dens = [] incongru_dens = [] for idx, cs in enumerate(candidate_per_sess): if cs[1] > 1000: congru_dens.append( np.sum(np.array(congru_stats['sess_ids']) == cs[0]) / cs[1]) if cs[2] > 1000: incongru_dens.append( np.sum(np.array(incongru_stats['sess_ids']) == cs[0]) / cs[2]) congru_boot = boot.ci(congru_dens, np.mean, n_samples=1000) incongru_boot = boot.ci(incongru_dens, np.mean, n_samples=1000) congru_sem = np.std(congru_dens) / np.sqrt(len(congru_dens)) incongru_sem = np.std(incongru_dens) / np.sqrt(len(incongru_dens)) # p_value = permutation_test(congru_dens,incongru_dens,method='approximate',num_rounds=10000) (fh, ax) = plt.subplots(1, 1, figsize=(1.5 / 2.54, 4 / 2.54), dpi=300) mm = [np.mean(incongru_dens), np.mean(congru_dens) ] / np.mean(incongru_dens) ax.bar(1, mm[0], color='k', edgecolor='k') ax.bar(2, mm[1], color='w', edgecolor='k') ax.errorbar([1, 2], mm, np.hstack((incongru_sem, congru_sem)) / np.mean(incongru_dens), color='none', ecolor='grey', capsize=3) ax.set_yscale('log') ax.set_ylim([1e-1 * 2, 1e2]) ax.set_ylabel('Norm. motif density') ax.set_xticks([1, 2]) ax.set_xticklabels(['Incongru.', 'Congruent'], rotation=45, ha='right') # plt.close('all') fh.savefig('spade_4su_pattern_density.pdf', bbox_inches='tight') ### candidiates congru_candi = [] incongru_candi = [] for idx, cs in enumerate(candidate_per_sess): if cs[1] > 1000: congru_candi.append(cs[1]) if cs[2] > 1000: incongru_candi.append(cs[2]) rcParams['pdf.fonttype'] = 42 rcParams['ps.fonttype'] = 42 rcParams['font.family'] = 'sans-serif' rcParams['font.sans-serif'] = ['Arial'] (fh, ax) = plt.subplots(1, 1, figsize=(1 / 2.54, 4 / 2.54), dpi=300) ax.scatter(np.random.random(len(incongru_candi)) * 0.2 + 0.9, incongru_candi, s=4, c='k', alpha=0.5, edgecolors='none') ax.scatter(np.random.random(len(congru_candi)) * 0.2 + 2.9, congru_candi, s=4, c='k', alpha=0.5, edgecolors='none') ax.errorbar(4,np.mean(congru_candi),\ np.std(congru_candi)/np.sqrt(len(congru_candi)), fmt='ro',ecolor='r',elinewidth=0.5,capsize=2,ms=4,mfc='none') ax.errorbar(0,np.mean(incongru_candi),\ np.std(incongru_candi)/np.sqrt(len(incongru_candi)), fmt='ro',ecolor='r',elinewidth=0.5,capsize=2,ms=4,mfc='none') ax.set_yscale('log') ax.set_xlim([-1, 5]) ax.set_xticks([]) ax.set_yticks([1000, 100000, 10000000]) plt.show() fh.savefig('4su_candi_count.pdf', bbox_inches='tight') stats.ranksums(incongru_candi, congru_candi) # for patt in congru_stats['pertrial']: # [np.mean(x) for x in patt] # pass # breakpoint() #################### s1sigsel = np.array(congru_stats['perHz_pvalues'])[:, 1] < 0.05 s2sigsel = np.array(congru_stats['perHz_pvalues'])[:, 2] < 0.05 (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300) # error on yaxis ax.scatter(np.array(congru_stats['perHz_mm'])[np.logical_not(s1sigsel), 1], np.array(congru_stats['perHz_mm'])[np.logical_not(s1sigsel), 0], s=1, c='silver', marker='.', alpha=0.4) ax.scatter(np.array(congru_stats['perHz_mm'])[np.logical_not(s2sigsel), 3], np.array(congru_stats['perHz_mm'])[np.logical_not(s2sigsel), 2], s=1, c='silver', marker='.', alpha=0.4) ax.scatter(np.array(congru_stats['perHz_mm'])[s1sigsel, 1], np.array(congru_stats['perHz_mm'])[s1sigsel, 0], s=1, c='r', marker='.', alpha=0.4) ax.scatter(np.array(congru_stats['perHz_mm'])[s2sigsel, 3], np.array(congru_stats['perHz_mm'])[s2sigsel, 2], s=1, c='r', marker='.', alpha=0.4) ax.plot([0, 0.26], [0, 0.26], '--k') ax.set_yticks([0, 0.1, 0.2]) ax.set_xticks([0, 0.1, 0.2]) ax.set_xlabel('patterns / spike / s, error trial') ax.set_ylabel('patterns / spike / s, correct trial') ax.set_xlim((0, 0.26)) ax.set_ylim((0, 0.26)) fh.savefig('spade_4su_pattern_correct_error.pdf', bbox_inches='tight') ############################# s1sigsel = np.array(congru_stats['motif_pvalues'])[:, 1] < 0.05 s2sigsel = np.array(congru_stats['motif_pvalues'])[:, 2] < 0.05 (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300) # error on yaxis ax.scatter(np.array(congru_stats['mm'])[np.logical_not(s1sigsel), 1] / 6, np.array(congru_stats['mm'])[np.logical_not(s1sigsel), 0] / 6, s=1, c='silver', marker='.', alpha=0.4) ax.scatter(np.array(congru_stats['mm'])[np.logical_not(s2sigsel), 3] / 6, np.array(congru_stats['mm'])[np.logical_not(s2sigsel), 2] / 6, s=1, c='silver', marker='.', alpha=0.4) ax.scatter(np.array(congru_stats['mm'])[s1sigsel, 1] / 6, np.array(congru_stats['mm'])[s1sigsel, 0] / 6, s=1, c='r', marker='.', alpha=0.4) ax.scatter(np.array(congru_stats['mm'])[s2sigsel, 3] / 6, np.array(congru_stats['mm'])[s2sigsel, 2] / 6, s=1, c='r', marker='.', alpha=0.4) ax.plot([0, 3.6], [0, 3.6], '--k') ax.set_yticks(np.arange(0, 4)) ax.set_xticks(np.arange(0, 4)) ax.set_xlabel('patterns / s, error trial') ax.set_ylabel('patterns / s, correct trial') ax.set_xlim((0, 3.6)) ax.set_ylim((0, 3.6)) fh.savefig('spade_4su_raw_pattern_correct_error.pdf', bbox_inches='tight') ############################# s1sel = np.array(congru_stats['prefered_samp']) == 1 s2sel = np.array(congru_stats['prefered_samp']) == 2 sigsel = np.array(congru_stats['perHz_pvalues'])[:, 0] < 0.05 (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300) #prefer 1, 1 on yaxis ax.scatter(np.array(congru_stats['perHz_mm'])[ np.logical_and(s1sel, np.logical_not(sigsel)), 2], np.array(congru_stats['perHz_mm'])[ np.logical_and(s1sel, np.logical_not(sigsel)), 0], s=1, c='silver', marker='.', alpha=0.4) ax.scatter(np.array(congru_stats['perHz_mm'])[ np.logical_and(s2sel, np.logical_not(sigsel)), 0], np.array(congru_stats['perHz_mm'])[ np.logical_and(s2sel, np.logical_not(sigsel)), 2], s=1, c='silver', marker='.', alpha=0.4) ax.scatter( np.array(congru_stats['perHz_mm'])[np.logical_and(s1sel, sigsel), 2], np.array(congru_stats['perHz_mm'])[np.logical_and(s1sel, sigsel), 0], s=1, c='r', marker='.', alpha=0.4) ax.scatter( np.array(congru_stats['perHz_mm'])[np.logical_and(s2sel, sigsel), 0], np.array(congru_stats['perHz_mm'])[np.logical_and(s2sel, sigsel), 2], s=1, c='r', marker='.', alpha=0.4) ax.plot([0, 0.26], [0, 0.26], '--k') ax.set_yticks([0, 0.1, 0.2]) ax.set_xticks([0, 0.1, 0.2]) ax.set_xlabel('patterns / spike / s, non-prefered') ax.set_ylabel('patterns / spike / s, prefered') ax.set_xlim((0, 0.26)) ax.set_ylim((0, 0.26)) fh.savefig('spade_4su_pattern_prefered_nonprefered.pdf', bbox_inches='tight') ###########selectivity s1sel = np.array(congru_stats['prefered_samp']) == 1 s2sel = np.array(congru_stats['prefered_samp']) == 2 sigsel = np.array(congru_stats['perHz_pvalues'])[:, 0] < 0.05 prefered_raw = np.hstack( (np.array(congru_stats['mm'])[s1sel, 0], np.array( congru_stats['mm'])[s2sel, 2])) / 6 nonpref_raw = np.hstack( (np.array(congru_stats['mm'])[s1sel, 2], np.array( congru_stats['mm'])[s2sel, 0])) / 6 # mm=(np.mean(nonpref),np.mean(prefered)) # pref_boot=boot.ci(prefered, np.mean,n_samples=1000) # npref_boot=boot.ci(nonpref, np.mean,n_samples=1000) selec_idx_raw = ((prefered_raw - nonpref_raw) / (prefered_raw + nonpref_raw)) prefered = np.hstack((np.array(congru_stats['perHz_mm'])[s1sel, 0], np.array(congru_stats['perHz_mm'])[s2sel, 2])) / 6 nonpref = np.hstack((np.array(congru_stats['perHz_mm'])[s1sel, 2], np.array(congru_stats['perHz_mm'])[s2sel, 0])) / 6 # perHz_mm=(np.mean(nonpref),np.mean(prefered)) # pref_boot=boot.ci(prefered, np.mean,n_samples=1000) # npref_boot=boot.ci(nonpref, np.mean,n_samples=1000) selec_idx = ((prefered - nonpref) / (prefered + nonpref)) swmy = np.hstack((selec_idx_raw, selec_idx)) swmx = np.hstack( (np.ones_like(selec_idx_raw), np.ones_like(selec_idx) * 2)) (fh, ax) = plt.subplots(1, 1, figsize=(15 / 2.54, 15 / 2.54), dpi=300) # ax.scatter(np.ones_like(selec_idx),selec_idx) ax = sns.swarmplot(x=swmx, y=swmy, size=1, ax=ax, color='silver') ax = sns.boxplot(x=swmx, y=swmy, showcaps=False, boxprops={'facecolor': 'None'}, showfliers=False, whiskerprops={'linewidth': 0}, ax=ax) ax.set_ylabel('Selectivity index') ax.set_xticks([0, 1]) ax.set_xticklabels(['Patterns / s', 'Patterns / spike'], rotation=45, ha='right') fh.savefig('spade_4su_pattern_selectivity_index.pdf', bbox_inches='tight') ##################################### s1sel = np.array(congru_stats['prefered_samp']) == 1 s2sel = np.array(congru_stats['prefered_samp']) == 2 sigsel = np.array(congru_stats['motif_pvalues'])[:, 0] < 0.05 (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300) #prefer 1, 1 on yaxis ax.scatter(np.array( congru_stats['mm'])[np.logical_and(s1sel, np.logical_not(sigsel)), 2] / 6, np.array(congru_stats['mm'])[ np.logical_and(s1sel, np.logical_not(sigsel)), 0] / 6, s=1, c='silver', marker='.', alpha=0.4) ax.scatter(np.array( congru_stats['mm'])[np.logical_and(s2sel, np.logical_not(sigsel)), 0] / 6, np.array(congru_stats['mm'])[ np.logical_and(s2sel, np.logical_not(sigsel)), 2] / 6, s=1, c='silver', marker='.', alpha=0.4) ax.scatter( np.array(congru_stats['mm'])[np.logical_and(s1sel, sigsel), 2] / 6, np.array(congru_stats['mm'])[np.logical_and(s1sel, sigsel), 0] / 6, s=1, c='r', marker='.', alpha=0.4) ax.scatter( np.array(congru_stats['mm'])[np.logical_and(s2sel, sigsel), 0] / 6, np.array(congru_stats['mm'])[np.logical_and(s2sel, sigsel), 2] / 6, s=1, c='r', marker='.', alpha=0.4) ax.plot([0, 3.6], [0, 3.6], '--k') ax.set_yticks(np.arange(0, 4)) ax.set_xticks(np.arange(0, 4)) ax.set_xlabel('patterns / s, non-prefered') ax.set_ylabel('patterns / s, prefered') ax.set_xlim((0, 3.6)) ax.set_ylim((0, 3.6)) fh.savefig('spade_4su_raw_pattern_prefered_nonprefered.pdf', bbox_inches='tight') ### for comparison of r rather than fr s1sel = np.array(congru_stats['prefered_samp']) == 1 s2sel = np.array(congru_stats['prefered_samp']) == 2 sigsel = np.array(congru_stats['motif_pvalues'])[:, 0] < 0.05 (fh, ax) = plt.subplots(1, 1, figsize=(5 / 2.54, 5 / 2.54), dpi=300) #prefer 1, 1 on yaxis ax.scatter(np.array(congru_stats['mm'])[s1sel, 2] / 6, np.array(congru_stats['mm'])[s1sel, 0] / 6, s=1, c='k', marker='.', alpha=1) ax.scatter(np.array(congru_stats['mm'])[s2sel, 0] / 6, np.array(congru_stats['mm'])[s2sel, 2] / 6, s=1, c='k', marker='.', alpha=1) ax.plot([0, 3.6], [0, 3.6], '--', color='silver') xx = np.hstack([ np.array(congru_stats['mm'])[s1sel, 2], np.array(congru_stats['mm'])[s2sel, 0] ]) yy = np.hstack([ np.array(congru_stats['mm'])[s1sel, 0], np.array(congru_stats['mm'])[s2sel, 2] ]) print(signedstat=stats.wilcoxon(xx, yy)) # (slope, intercept,rvalue,pvalue,stderr)=stats.linregress(xx,yy) # ax.plot([0,3.6],[intercept/6,slope*3.6+intercept/6],'--r') ax.set_yticks(np.arange(0, 4)) ax.set_xticks(np.arange(0, 4)) ax.set_xlabel('patterns / s, non-prefered') ax.set_ylabel('patterns / s, prefered') ax.set_xlim((0, 3.6)) ax.set_ylim((0, 3.6)) fh.savefig('spade_4su_raw_pattern_prefered_nonprefered.pdf', bbox_inches='tight')
pupil_z = pup.ztransform_pupil_size(pupil_filt) pup_dat = np.hstack((np.mean(pupil_filt, axis=1), pupil_z)) label = ['pupil'] * len(pupil_z) + ['pupil_z'] * len(pupil_z) label2 = [0] * len(pupil_z) + [1] * len(pupil_z) df = pd.DataFrame({ 'pupil': pup_dat, 'type': label, 'label': label2, 'correct': np.hstack((performance, performance)) }) sns.set_context('talk') fig, ax = plt.subplots(1, 2, figsize=(15, 8)) sns.swarmplot(y='pupil', x='type', hue='correct', data=df[df['type'] == 'pupil'], ax=ax[0], alpha=0.7) sns.boxplot(y='pupil', x='type', data=df[df['type'] == 'pupil'], ax=ax[0], showfliers=False, color='gray', whis=[20, 80]) sns.swarmplot(y='pupil', x='type', hue='correct', data=df[df['type'] == 'pupil_z'], ax=ax[1], alpha=0.7)
if flip: input_df['biotype'] = np.abs(input_df['biotype']-1) # works because we only ever have 2 biotypes for col in cols: db[col] = zscore_by_group(input_df[col], labels, healthy_group) db = pd.melt(db, id_vars=['id', 'biotype', 'diagnosis'], value_vars=cols) # show diagnostic distributions for each biotype seperarely sns.set_style('white') fig, (ax1, ax2) = plt.subplots(figsize=(10, 7), nrows=2, sharex=True) plt.subplots_adjust(left=0.125, bottom=0.15, right=0.9, top=0.85, wspace=0.25, hspace=0.25) plt.suptitle('Diagnosis distribution per biotype') sns.swarmplot(x="variable", y="value", hue="diagnosis", data=db.loc[db['biotype'] == 0], ax=ax1) ax1.set_ylim([-4, 4]) ax1.set_title('Average-performing biotype') ax1.set_xticklabels([], rotation=45, ha='right') ax1.hlines(0, ax1.xaxis.get_majorticklocs()[0], ax1.xaxis.get_majorticklocs()[-1]) sns.swarmplot(x="variable", y="value", hue="diagnosis", data=db.loc[db['biotype'] == 1], ax=ax2) ax2.set_ylim([-4, 4]) ax2.set_title('Poor-performing biotype') ax2.set_xticklabels(names, rotation=45, ha='right') ax2.hlines(0, ax1.xaxis.get_majorticklocs()[0], ax1.xaxis.get_majorticklocs()[-1]) sns.plt.savefig('biotype_yscores_per_diagnosis_and_biotype.pdf') sns.plt.close()
# box plots of rank differences (ML vs naive) of top hits fig = plt.figure() fig.suptitle(x_to_plot_gt + " and " + y_to_plot_gt) top_percents = [10, 20, 50, 100] # percent above which to do cutoff for i, top_percent in enumerate(top_percents): ax = fig.add_subplot(2, 2, i + 1) top_ranknum = int(len(df_ranked) * (1 - top_percent / 100)) df_ranked_top = df_ranked[df_ranked[x_to_plot_gt] > top_ranknum].copy() df_ranked_top['ML vs GT'] = np.abs( df_ranked_top[x_to_plot_gt] - df_ranked_top[x_to_plot_ML]) + np.abs(df_ranked_top[y_to_plot_gt] - df_ranked_top[y_to_plot_ML]) df_ranked_top['naive vs GT'] = np.abs( df_ranked_top[x_to_plot_gt] - df_ranked_top[x_to_plot_naive]) + np.abs( df_ranked_top[y_to_plot_gt] - df_ranked_top[y_to_plot_naive]) df_comp = df_ranked_top.melt(value_vars=['ML vs GT', 'naive vs GT'], var_name='model', value_name='rank difference') ax = sns.swarmplot(x='model', y='rank difference', data=df_comp, color=".25", alpha=0.5) ax = sns.boxplot(x='model', y='rank difference', data=df_comp) ax.set_title("top {}%".format(top_percent)) ax.set_xlabel('') plt.tight_layout()
#Create a stripplot of the Award_Amount with the Model Selected on the y axis with jitter enabled. # Create the stripplot sns.stripplot(data=df, x='Award_Amount', y='Model Selected', jitter=True) plt.show() #Create a swarmplot() of the same data, but also include the hue by Region. # Create and display a swarmplot with hue set to the Region sns.swarmplot(data=df, x='Award_Amount', y='Model Selected', hue='Region') plt.show() #Create and display a boxplot of the data with Award_Amount on the x axis and Model Selected on the y axis. # Create a boxplot sns.boxplot(data=df, x='Award_Amount', y='Model Selected') plt.show() plt.clf()
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Aug 15 22:15:25 2019 @author: ankusmanish """ #Write a program to draw swarm plot of “total bill” against day for a dataset given in url import matplotlib.pyplot as plt import pandas as pd import seaborn as sea data = pd.read_csv('tips.csv') x = data['day'] y = data['total_bill'] plt.figure(figsize=(8, 8)) sea.swarmplot(x, y) plt.xlabel('Day', fontsize=20) plt.ylabel('Total Bill', fontsize=20) plt.show()
"https://reneshbedre.github.io/assets/posts/anova/onewayanova.txt", sep="\t") # reshape the d dataframe suitable for statsmodels package df_melt = pd.melt(df.reset_index(), id_vars=['index'], value_vars=['A', 'B', 'C', 'D']) # replace column names df_melt.columns = ['index', 'treatments', 'value'] # %% # generate a boxplot to see the data distribution by treatments. Using boxplot, we can # easily detect the differences between different treatments import matplotlib.pyplot as plt import seaborn as sns ax = sns.boxplot(x='treatments', y='value', data=df_melt, color='#99c2a2') ax = sns.swarmplot(x="treatments", y="value", data=df_melt, color='#7d0013') plt.show() # %% import scipy.stats as stats # stats f_oneway functions takes the groups as input and returns ANOVA F and p value fvalue, pvalue = stats.f_oneway(df['A'], df['B'], df['C'], df['D']) print(fvalue, pvalue) # 17.492810457516338 2.639241146210922e-05 # %% # get ANOVA table as R like output import statsmodels.api as sm from statsmodels.formula.api import ols # Ordinary Least Squares (OLS) model model = ols('value ~ C(treatments)', data=df_melt).fit() anova_table = sm.stats.anova_lm(model, typ=2)
# Number of Painting in each genre plt.figure(figsize=(20, 10)) sns.boxenplot(x="nationality", y="paintings", color="y", scale="linear", data=df1) plt.title('Number of Painting in each nation') plt.xlabel('Age Group') plt.ylabel('Number of Painting') plt.xticks(rotation=60) plt.show() # Number of Painting by Genre plt.figure(figsize=(20, 10)) sns.swarmplot(x=df1['genre'], y=df1['paintings'], color="black") plt.xticks(rotation=60) plt.title('Number of Painting by Genre') plt.xlabel('Genre') plt.ylabel('Number of Painting') plt.show() # Number of Painting by Nation plt.figure(figsize=(20, 10)) sns.swarmplot(x=df1['nationality'], y=df1['paintings'], color="red") plt.xticks(rotation=60) plt.title('Number of Painting by Nation') plt.xlabel('Nation') plt.ylabel('Number of Painting') plt.show()
ax1.legend(construct_legend) ax1.plot(t, 100*np.ones_like(t), 'k--') plt.ylim([80, 105]) plt.xlim([-0.2, 2]) ax1.set_xlabel('Time (s)') # percent change bar plots ax2 = inset_axes(ax1, width="30%", height="40%", loc=4, borderpad=3) df_barplot = df_percents[df_percents['index']==construct] sns.swarmplot(x = 'exp', y='mean percent', color='black', data=df_barplot, order=colors.keys()) sns.boxplot(x='exp', y='mean percent', data=df_barplot, # palette=colors, color='white', whis=1.5, showfliers=False, dodge=False, hue='exp', palette=colors, order=colors.keys(), width=0.5)
value='100-percentile2', estimator=np.median) sns.tsplot(dfd, 'percentile1', 'subj', condition='condition', value='100-percentile2', err_style="unit_traces", estimator=np.median) plt.title('Rest before VS. Motor before') plt.ylim(-1, 1.5) plt.plot([0, 100], [100, 0], 'k--') plt.show() sns.boxplot(x='condition', y='auc', data=aucs) sns.swarmplot(x='condition', y='auc', data=aucs, color='k', alpha=0.5) print(aucs.loc[aucs['condition'] == 'exp']) print(aucs.loc[aucs['condition'] == 'control']) print(aucs.loc[aucs['condition'] == 'exp', 'auc'].as_matrix() - aucs.loc[aucs['condition'] == 'control', 'auc'].as_matrix()) from scipy.stats import ttest_ind, ttest_1samp, ttest_rel, wilcoxon, ranksums print( ttest_ind(aucs.loc[aucs['condition'] == 'exp', 'auc'], aucs.loc[aucs['condition'] == 'control', 'auc'])) print( ttest_rel(aucs.loc[aucs['condition'] == 'exp', 'auc'], aucs.loc[aucs['condition'] == 'control', 'auc'])) print( wilcoxon(aucs.loc[aucs['condition'] == 'exp', 'auc'], aucs.loc[aucs['condition'] == 'control', 'auc'])) print(
import seaborn as sns import matplotlib.pyplot as plt #load iris data iris = sns.load_dataset("iris") sns.swarmplot(x="species", y="petal_length", data=iris) #show plot plt.show()
# coding=utf-8 # %matplotlib inline jupyter import seaborn as sns import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np from scipy import stats, integrate import pandas as pd sns.set(style="whitegrid", color_codes=True) np.random.seed(sum(map(ord, "categorical"))) titanic = sns.load_dataset("titanic") tips = sns.load_dataset("tips") iris = sns.load_dataset("iris") sns.stripplot(x="day", y="total_bill", data=tips) sns.stripplot(x="day", y="total_bill", data=tips, jitter=True) sns.swarmplot(x="day", y="total_bill", data=tips) sns.swarmplot(x="day", y="total_bill", hue="sex", data=tips) # 盒图 sns.boxplot(x="day", y="total_bill", hue="time", data=tips) # 小提琴图 sns.violinplot(x="day", y="total_bill", hue="time", data=tips) sns.violinplot(x="total_bill", y="day", hue="time", data=tips, split=True)
# In[ ]: pivoted = pd.pivot_table(df_raw, values='sales', columns='Year', index='Week') pivoted.plot(figsize=(12, 12)) # In[ ]: pivoted = pd.pivot_table(df_raw, values='sales', columns='Month', index='Day') pivoted.plot(figsize=(12, 12)) # In[ ]: temp_1 = df_raw.groupby(['Year', 'Month', 'item'])['sales'].mean().reset_index() plt.figure(figsize=(12, 8)) sns.swarmplot('item', 'sales', data=temp_1, hue='Month') # Place legend to the right plt.legend(bbox_to_anchor=(1, 1), loc=2) # In[ ]: #In case the above plot is clutterd(which it is), try this, (Will create a grid for Year vs Month) #sns.factorplot('item', 'sales', data=temp_1, hue = 'Month', col='Year',row='Month', kind='swarm', size = 5); # In[ ]: temp_1 = df_raw.groupby(['Year', 'Month'])['sales'].mean().reset_index() plt.figure(figsize=(12, 8)) sns.lmplot('Month', 'sales', data=temp_1, hue='Year', fit_reg=False) # In[ ]:
# Box plot sns.boxplot('day', 'total_bill', data=tips) sns.boxplot('day', 'total_bill', data=tips, hue='smoker') # violin plot sns.violinplot('day', 'total_bill', data=tips) sns.violinplot('day', 'total_bill', data=tips, hue='sex') sns.violinplot('day', 'total_bill', data=tips, hue='sex', split=True) # Strip plot sns.stripplot('day', 'total_bill', data=tips) sns.stripplot('day', 'total_bill', data=tips, jitter=True) sns.stripplot('day', 'total_bill', data=tips, jitter=True, hue='sex') sns.stripplot('day', 'total_bill', data=tips, jitter=True, hue='sex', split=True) # Swarm Plot sns.swarmplot('day', 'total_bill', data=tips) # Swarm and Violin Plot sns.violinplot('day', 'total_bill', data=tips) sns.swarmplot('day', 'total_bill', data=tips, color='black') # Factor Plot sns.factorplot('day', 'total_bill', data=tips, kind='bar') sns.factorplot('day', 'total_bill', data=tips, kind='violin')
fig, axes = plt.subplots(2, 10) stds = pd.DataFrame(columns=['std', 'group']) for g, group in enumerate(['Real', 'Mock']): for s, subj in enumerate(df.loc[df.group == group, 'subj'].unique()): axes[0, s].set_title('S'+str(s)) for d, day in enumerate(df.loc[(df.group == group) & (df.subj == subj), 'day'].unique()): stds.loc[len(stds)] = {'std': df.loc[(df.group == group) & (df.subj == subj) & (df.day == day), 'slope'].std(), 'group': group} axes[g, s].hist(df.loc[(df.group == group) & (df.subj == subj), 'slope'], np.linspace(-0.7, 0.7, 50), density=True) axes[0, 0].set_ylabel('Real') axes[1, 0].set_ylabel('Mock') #sns.pairplot(df, 'subj', vars=['slope']) plt.show() sns.barplot(x='group', y='std', data=stds, estimator=np.median) sns.swarmplot(x='group', y='std', data=stds, color='r') plt.show() sns.kdeplot(df.loc[(df.group == 'Real'), 'slope']) sns.kdeplot(df.loc[(df.group == 'Mock'), 'slope']) plt.show() from scipy.stats import * print(bartlett(df.loc[(df.group == 'Real'), 'slope'], df.loc[(df.group == 'Mock'), 'slope'])) print(bartlett(df.loc[(df.group == 'Real'), 'slope'], df.loc[(df.group == 'Real'), 'slope'])) print(levene(df.loc[(df.group == 'Real'), 'slope'], df.loc[(df.group == 'Mock'), 'slope'])) print(levene(df.loc[(df.group == 'Real'), 'slope'], df.loc[(df.group == 'Real'), 'slope'])) print(normaltest(df.loc[(df.group == 'Real'), 'slope'])) print(normaltest(df.loc[(df.group == 'Mock'), 'slope']))
titanic=sb.load_dataset("titanic") titanic.to_csv("db_titanic.csv",index=False) print(titanic) sb.barplot(x="age",y="embark_town",orient='h',data=titanic) mpl.show() sb.barplot(x="embark_town",y="age",data=titanic) mpl.show() mpl.scatter("survived","age",data=titanic) mpl.show() sb.pointplot(x="sex",y="age",data=titanic) mpl.show() sb.swarmplot(x="pclass",y="age",data=titanic) mpl.show() iris=sb.load_dataset("iris") sb.set_style("darkgrid") sb.kdeplot(iris.loc[(iris['species']=='setosa'),'sepal_length'],color='b',shade=True,Label='setosa') sb.kdeplot(iris.loc[(iris['species']=='virginica'),'sepal_length'],color='r',shade=True,Label='virginica') mpl.show() sb.countplot(x='class',hue='who',data=titanic) mpl.show() sb.countplot(x='sex',hue='who',data=titanic,palette="PuRd") mpl.show()
df = pd.read_csv(filepath, sep=";", decimal=',', index_col=0) df = df.reset_index() #df["Normalized intensity"] =df.groupby(["Experiment", "Genotype"])["Mean intensity"].apply(lambda x: x/x.mean()) means_stds = df.groupby(['Experiment' ])['Mean intensity'].agg('mean').reset_index() means_stds = means_stds.rename(columns={"Mean intensity": "mean_norm"}) df = df.merge(means_stds, on=(["Experiment"])) df["Normalized intensity"] = df["Mean intensity"] / df["mean_norm"] pal = sns.color_palette("viridis", 4) g = sns.boxplot(y="Mean intensity", x="Genotype", data=df, order=["WT", "KO"]) g = sns.swarmplot(y="Mean intensity", x="Genotype", hue="Experiment", data=df, order=["WT", "KO"], palette=pal) plt.show() cat1_wt = df[df['Genotype'] == 'WT'] cat1_KO = df[df['Genotype'] == 'KO'] print(ttest_ind(cat1_wt['Mean intensity'], cat1_KO['Mean intensity'])) df2 = df.groupby([df["Experiment"], df["Genotype"]]).mean() df2.reset_index() df3 = df2.reset_index() print("Plotting means ") pal = sns.color_palette("viridis", 4) g = sns.pointplot(y="Mean intensity",
sc.tl.dpt(dmr_t, n_branchings=0, n_dcs=15) sns.lmplot(data=dmr_t.obs, x='dpt_pseudotime', y='EpiBurden') lin = tuple(sorted(list(dmr_t.obs['DMR_leiden'].values.unique()))) dmr_t.obs['DMR_leiden'] = dmr_t.obs['DMR_leiden'].cat.reorder_categories(list(lin), ordered=True) color_dict = { "leiden_A": "#d62728", "leiden_B": "#ff7f0e", "leiden_C": "#1f77b4", "leiden_D": "#2ca02c" } # equivalent to dict(zip(list(dmr_t.obs['DMR_leiden'].value_counts().index), dmr_t.uns['DMR_leiden_colors'])) sns.boxplot(data=dmr_t.obs, x='DMR_leiden', y='EpiBurden', palette=color_dict) sns.swarmplot(data=dmr_t.obs, x='DMR_leiden', y='EpiBurden', color=".2") rna = pd.read_table("/mnt/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/02.RNA-seq/STAD_SNUH_vst.txt", index_col=0, sep=' ') rna = pd.read_table("/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/02.RNA-seq/GENCODE_V24/STAD_SNUH_vst.txt", index_col=0, sep=' ') rna.columns = list(map(lambda x: "X" + x, rna.columns)) deg_tn_protein = pd.read_table("/mnt/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/02.RNA-seq/STAD_SNUH_Tumor_leiden_vst_DEG_Leiden_A_D_protein.txt", index_col="ID") deg_tn_protein.columns = list(map(lambda x: "X" + x, deg_tn_protein.columns)) pro_met = pd.read_table("Promoter_up500down500_ALL.txt", index_col="ID") pro_met = pd.read_table("/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/03.WGBS/Promoter_cCRE_ALL.txt", index_col="ID") pro_met = pd.read_table("/data/Projects/phenomata/01.Projects/08.StomachCancer_backup/03.WGBS/NEW/Promoter_cCRE_ALL.txt", index_col="ID") pro_met.columns = list(map(lambda x: "X" + x, pro_met.columns)) pro_met_info = pd.DataFrame(list(zip(list(map(lambda x: x.split('/')[0], pro_met.index)), list(map(lambda x: x.split('/')[1], pro_met.index)), list(map(lambda x: x.split('/')[2], pro_met.index)), list(map(lambda x: x.split('/')[-1], pro_met.index)))), columns=['Loc', 'GeneID', 'EnsemblID', 'CpG'], index=pro_met.index)
"shade": True, "cumulative": cdf }, hist=False, color='cyan') plt.plot([me['Time']] * 2, [0, ax.get_ylim()[1]], color='black') plt.xlabel('Overall') ax.xaxis.set_major_formatter(formatter) plt.tight_layout() txt = 'cdf' if cdf else 'pdf' plt.savefig('stages_' + txt + '.svg') plt.figure(figsize=[10, 10]) ax = sns.boxplot(x='Time', y='Div', data=df) sns.swarmplot(x='Time', y='Div', data=df, size=2, color=".3", linewidth=0) plt.title('Total Time by Division') plt.xlabel('Total Time') ax.xaxis.set_major_formatter(formatter) plt.savefig('time_by_div.svg') df['Sex'] = df['Div'].map(lambda x: 'F' in x or 'ATH' in x) plt.figure(figsize=[10, 6]) ax = sns.distplot(df.loc[df['Sex'] == 1, 'Time'], kde_kws={"shade": True}, hist=False) ax = sns.distplot(df.loc[df['Sex'] == 0, 'Time'], kde_kws={"shade": True}, hist=False) plt.legend(['Women', 'Men'])
sb.violinplot(x='day', y='total_bill', data=tips, hue='sex', split=True) # Graph on each side of plot # Strip Plots sb.stripplot(x='day', y='total_bill', data=tips) sb.stripplot(x='day', y='total_bill', data=tips, jitter=True) sb.stripplot(x='day', y='total_bill', data=tips, jitter=True, hue='sex') sb.stripplot(x='day', y='total_bill', data=tips, jitter=True, hue='sex', split=True) # Swarm Plots sb.swarmplot(x='day', y='total_bill', data=tips) # Violin & Swarm Plots sb.violinplot(x='day', y='total_bill', data=tips) sb.swarmplot(x='day', y='total_bill', data=tips, color='black') # Factor Plots (General-Purpose with Kind Specification) sb.factorplot(x='day', y='total_bill', data=tips) sb.factorplot(x='day', y='total_bill', data=tips, kind='box') sb.factorplot(x='day', y='total_bill', data=tips, kind='bar') sb.factorplot(x='day', y='total_bill', data=tips, kind='violin') sb.factorplot(x='day', y='total_bill', data=tips, kind='strip') sb.factorplot(x='day', y='total_bill', data=tips, kind='swarm') # Matrix Plots ---------------------------------------------------------------
# print('Just finished collecting and storing data for ' + city_var) # end of city loop - all cities should have been processed '''Calculate summary statistics over the 30-day period and store in a new dataframe, "df_summary"; separate the latitude and longitude into two columns converting them from strings to floats; then write the DataFrame to a .csv file called "summary.csv".''' df_summary = pd.DataFrame(columns=('city', 'long', 'lat', 'max_tmax', 'min_tmax', 'range_tmax', 'mean_tmax', 'sd_tmax')) df_summary['max_tmax'] = df.groupby('city')['tmax'].max() df_summary['min_tmax'] = df.groupby('city')['tmax'].min() df_summary['mean_tmax'] = df.groupby('city')['tmax'].mean() df_summary['sd_tmax'] = df.groupby('city')['tmax'].std() df_summary['range_tmax'] = df_summary['max_tmax'] - df_summary['min_tmax'] df_summary['city'] = df_summary.index for k, v in cities.iteritems(): location = tuple(float(x) for x in v.split(',')) df_summary.set_value(k, 'lat', location[0]) df_summary.set_value(k, 'long', location[1]) df_summary.to_csv('summary.csv', index=False) sns.set_style("whitegrid") ax = sns.boxplot(x="city", y="tmax", data=df.sort_values(by='city')) ax = sns.swarmplot(x="city", y="tmax", data=df.sort_values(by='city'), color=".25")
sns.set_style("ticks") sns.set_context("talk") df = pd.read_csv("VRTag_days.csv") #df['daycond'] = df.day + df.cond.astype(str) ## Pointplot for simple, easy mean/sem visualization df = df.groupby(['subject','day','condition']).mean().reset_index() #sns.pointplot(x="day",y="dist", hue = "condition", ax=ax,palette = p1,data=df, dodge= True,ci=68) sns.swarmplot(x="day",y="dist", hue = "condition", dodge= True, ax=ax,data=df, hue_order = ["video","vr"]) #ax.legend_.remove() sns.despine(ax=ax) <<<<<<< HEAD ax.set(xlabel="Condition",ylabel="Day 1 minus Day 2 distance (pixels)") ax.set(xlabel="Condition",ylabel="Distance from Correct (pixels)") ## box and swarm for specific data-point visualization #sns.boxplot(x="day",y="dist",ax=ax,palette = p1,data=df, dodge= True) #sns.swarmplot(x="day",y="dist",ax=ax,color = "black",data=df, dodge= True)