def startProcessing(self): """ This function will create plots using python's `matplotlib <http://matplotlib.org/index.html>`_. Currently, it will make: 1. A `violin plot <https://en.wikipedia.org/wiki/Violin_plot>`_ of the distribution of download times for each site given in :ref:`sitesData <sitesData-label>`. A violin plot example: .. image:: images/matploblib-violinplot.png :width: 300pt """ logging.debug("Starting processing with matplotlib...") # Make a violin plot downloadTimes = {} for site in self.sitesData: siteTimes = self.sitesData[site] downloadTimes[site] = [] for time in siteTimes: downloadTimes[site].append(float(time['duration'])) testsize = get_option("raw_testsize") downloadTimes[site] = (float(testsize*8) / (1024*1024)) / numpy.array(downloadTimes[site]) plt.violinplot(downloadTimes.values()) plt.xticks(range(1, len(downloadTimes.keys())+1), downloadTimes.keys()) plt.ylabel("Mb per second") plt.xlabel("Site") plt.title("Violin Plot of StashCache Transfer Speeds per Site") plt.savefig("violinplot.png") plt.clf()
def _violin_plotting(self, data, xticklabels = None, showmeans = True, xlabel = '', ylabel = '', ylim = None): """ Plot violin figures by a 2D data ---------------------------------- Parameters: data: a 2 dimensional data, M*N, where N is the number of category xticklabels: xticklabels, by default is None showmeans: whether to show means in violin plot xlabel: xlabel ylabel: ylabel ylim: limitation of y Examples: ---------- >>> plotviolin(data) """ assert data.ndim == 2, 'A two-dimension data should be inputted' cat_num = data.shape[-1] ax = plt.subplot() plt.violinplot(data, np.arange(1, cat_num+1), showmeans = showmeans) ax.set_xticks(np.arange(1, cat_num+1)) if xticklabels is not None: ax.set_xticklabels(xticklabels, fontsize = self._labelsize) plt.xlabel(xlabel) plt.ylabel(ylabel) if ylim is not None: plt.ylim(ylim) if self._isshow is True: plt.show()
def plot_gat_boxplots(data, labels, title="", showviolin=False): y = data if not y: # empty print("No data provided to plotutils.plot_gat_boxplots") return med, confidence_interval_low, confidence_interval_high = median_confidence_intervals(y) fig, ax = plt.subplots() plt.boxplot(y, notch=False, labels=labels) # Plot separate error bars without line to show median confidence intervals x = np.arange(1, len(y) + 1) plt.errorbar(x, med, yerr=(confidence_interval_low, confidence_interval_high), fmt='none', linewidth=3) if showviolin: mean, mean_confidence_interval_low, mean_confidence_interval_high = mean_confidence_intervals(y) plt.violinplot(y, showmeans=True, showmedians=True) plt.errorbar(x, mean, yerr=(mean_confidence_interval_low, mean_confidence_interval_high), fmt='none', linewidth=3, color='g') plt.title(title) plt.ylabel("Goal Achievement Time (ms)") plt.xlabel("Algorithms") fig.autofmt_xdate() ax.autoscale(tight=True) # ymin, ymax = plt.ylim() # plt.ylim(ymin - 0.1, ymax + 0.1) plt.gcf().tight_layout() return None
def plotViolin(self,data,labels_to_plot='all',series='all',transformations=None,save=True): if transformations is None: transformations = [(lambda x: x,'')] timestamp = utils.s_timestamp() figure_folder = os.path.join(self._default_figure_folder,timestamp) if save: #create a separate_folder if not os.path.exists(figure_folder): os.makedirs(figure_folder) if series == 'all': series = data.keys() if labels_to_plot == 'all': labels_to_plot = data[series[0]].keys() for label_to_plot in labels_to_plot: plt.figure(figsize=(10,15)) data_to_plot = [] for transformation_fun,transformation_label in transformations: for serie_name in series: series_data = data[serie_name] if label_to_plot not in series_data: raise ValueError('Label "{0}" not in series "{1}"'.format(label_to_plot,serie_name)) raw_data = np.array([val for val in series_data[label_to_plot] if val is not None]) transformed_data = transformation_fun(raw_data) data_to_plot.append(transformed_data) plt.ylabel(label_to_plot) plt.title(label_to_plot) plt.violinplot(data_to_plot,showmedians=True) plt.xticks( range(1,1+len(series)), series ) plt.ylim([min([qwe for asd in data_to_plot for qwe in asd])-3,max([qwe for asd in data_to_plot for qwe in asd])+3]) if save: plt.savefig(os.path.join(figure_folder,self.getDescr(label_to_plot))+'.png') else: plt.show()
def main(args): import io import sct_utils as sct import pickle import numpy as np import matplotlib.pyplot as plt # make sure number of inputs and labels are the same if len(arguments.input) != len(arguments.label): raise RuntimeError("Mismatch between # of files and labels") # fs = 10 # font size nb_plots = args.input.__len__() list_data = [] text_results = [] # numerical results to display inside the figure for fname_pickle in args.input: df = pickle.load(io.open(fname_pickle, "rb")) # filter lines based on status. For status definition, see sct_pipeline # Note: the > 0 test is to filter out NaN df_dice = df.query("(status != 200) & (status != 201) & (dice > 0 )")["dice"] list_data.append(df_dice.get_values()) # compute statistics count_passed = df.status[df.status == 0].count() count_failed = df.status[df.status == 99].count() count_crashed_run = df.status[df.status == 1].count() count_crashed_integrity = df.status[df.status == 2].count() count_total = count_passed + count_failed + count_crashed_run + count_crashed_integrity text_results.append('\n'.join(["PASS: {}/{}".format(count_passed, count_total), "FAIL: {}".format(count_failed), "CRASH_RUN: " + str(count_crashed_run), "CRASH_INTEGRITY: " + str(count_crashed_integrity)])) pos = np.arange(nb_plots) # plot fig fig, ax = plt.subplots(1) plt.violinplot(list_data, pos, points=100, widths=0.8, showmeans=True, showextrema=True, showmedians=True, bw_method=0.5) plt.grid(axis='y') plt.ylabel('Dice coefficient') plt.xticks(pos, args.label) ax.spines['right'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['top'].set_visible(False) ylim = ax.get_ylim() for i in range(nb_plots): plt.text(i + 0.02, ylim[0] + 0.01, text_results[i], horizontalalignment='left', verticalalignment='bottom') plt.savefig('violin_plot.png')
def plot_gat_boxplots(data, labels, title="", showviolin=False): """ Generate box plot from the given data. The format of the data should be a list of lists where each inner list is associated with a different label. Median confidence interval errorbars are overlayed on the box plot. :param data: the data to plot :param labels: the labels of the data :param title: the title of the plot :param showviolin: whether to overlay a violin plot over the plot which shows the distribution of the data :return: None """ y = data if not y: # empty print("No data provided to plotutils.plot_gat_boxplots") return None x = np.arange(1, len(y) + 1) # Calculate stats med, confidence_interval_low, confidence_interval_high = median_confidence_intervals(y) # Format axes to remove unnecessary borders and tick marks fig, ax = plt.subplots() ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.tick_params(direction='out') ax.get_xaxis().tick_bottom() ax.get_yaxis().tick_left() # Do the actual plotting plt.boxplot(y, notch=False, labels=labels) # Plot separate error bars without line to show median confidence intervals plt.errorbar(x, med, yerr=(confidence_interval_low, confidence_interval_high), fmt='none', linewidth=3) # Plot violin plot if specified if showviolin: mean, mean_confidence_interval_low, mean_confidence_interval_high = mean_confidence_intervals(y) plt.violinplot(y, showmeans=True, showmedians=True) plt.errorbar(x, mean, yerr=(mean_confidence_interval_low, mean_confidence_interval_high), fmt='none', linewidth=3, color='g') # Set labels and ticks plt.title(title) plt.ylabel("Goal Achievement Time (ms)") plt.xlabel("Algorithms") fig.autofmt_xdate() # auto-rotate x labels if needed ax.autoscale(tight=True) # Remove whitespace from plot plt.gcf().tight_layout() # Further whitespace removal return None # legend is handled by boxplot function
def add_models_bests_scores_to_plot(hyp_dict, plt, change_color, ch_c2): from copy import deepcopy x_axis = [] y_axis = [] cv_dict_list = hyp_dict["all_models"] for idx, cv_dict in enumerate(cv_dict_list): models_list = cv_dict["models"] temp = [] for model_dict in models_list: temp.append(model_dict["best_score"]) x_axis.append(idx + 1) y_axis.append(deepcopy(temp)) a = plt.violinplot(dataset=tuple(y_axis), positions=x_axis, widths=1, showextrema=True, showmeans=True) if change_color: for pc in a["bodies"]: pc.set_facecolor("blue") pc.set_color("blue") pc.set_edgecolor("blue") if ch_c2: for pc in a["bodies"]: pc.set_facecolor("red") pc.set_color("red") pc.set_edgecolor("red")
def violin_plot_data(data, x_key, y_key, color, name , violin_widths=.35 , violin_showextrema=False , violin_showmedians=False , violin_showmeans=False , reg_poly=0 , reg_color="red" , reg_linstyle="-"): x_vals, y_vals = sort_for_violin_plot(data , key=x_key , value=y_key) vplt = plt.violinplot(tdata_times , positions=x_vals , widths=violin_widths , showextrema=violin_showextrema , showmedians=violin_showmedians , showmeans=violin_showmeans) vplt_fake = set_violin_color_label(vplt, color, name) if 0 == reg_poly: return vplt_fake else: reg = np.polyfit(data[x_key], data[y_key], reg_poly) rplt = plt.plot(data[x_key] , np.polyval(reg, loop_data[x_key]) , linestyle=reg_linestyle , color=reg_color) if reg[-1] < 0 : eq = "%.2f$A$ - %.2f\t" % (reg[0], (-1.0 * reg[1])) else: eq = "%.2f$A$ + %.2f\t" % (reg[0], reg[1]) reg_fake = mlines.Line2D([],[],color=reg_color, label=reg_label) if reg_text: reg_eq_txt = plt.text(reg_label, 8 , cdata_lplt_eq, color=cdata_lplt_fake.get_color()) return vplt_fake, reg_fake
def draw_violin(dataset, posns, alpha=1, color='royalblue', meanmarker="*"): """ Draw a violin to the current plot. Color the mean point. (Shared helper for `violin_plot` and `double_violin`) """ ## Add data vp = plt.violinplot(dataset, positions=posns, showmeans=True, showextrema=True, showmedians=True) ## Re-color bodies for v in vp['bodies']: v.set_edgecolors('k') v.set_facecolors(color) v.set_alpha(alpha) ## Draw mean markers # Make original mean line invisible vp['cmeans'].set_alpha(0) # Draw data points for i in range(len(dataset)): plt.plot([posns[i]] * len(dataset[i]), dataset[i], "r+") ## Re-color median, min, max lines to be black for field in ['cmaxes', 'cmins', 'cbars', 'cmedians']: vp[field].set_color('k') # Draw the mean marker for i in range(len(dataset)): plt.plot(posns[i], [np.average(dataset[i])], color='w', marker=meanmarker, markeredgecolor='k') # Draw confidence interval (should be optional) for i in range(len(dataset)): stat = util.stats_of_row(dataset[i]) plt.errorbar(posns[i], stat["mean"], yerr=stat["ci"][1] - stat["mean"], ecolor="magenta", capthick=4) return
def plot_helper(self, ax, data, tit, typ='hi'): maxy = float('-inf') miny = float('inf') if typ == 'hi': alpha = np.min((1, 2.0/len(data['pdfs']))) for subj in data['pdfs']: x = data['xs'][subj] y = data['pdfs'][subj] ty = np.max(y) maxy = ty if ty > maxy else maxy ty = np.min(y) miny = ty if ty < miny else miny plt.plot(np.log(x+1), y, color=self.color, alpha=alpha) # plt.plot(x, y, color=self.color, alpha=alpha) modif = self.set_tick_labels(ax, miny, maxy) plt.ylabel('Density'+modif) elif typ == 'se': alpha = np.min((1, 2.0/len(data))) for subj in data: x = np.linspace(1, len(data[subj]), len(data[subj])) y = data[subj] ty = np.max(y) maxy = ty if ty > maxy else maxy ty = np.min(y) miny = ty if ty < miny else miny plt.plot(x, y, color=self.color, alpha=alpha) modif = self.set_tick_labels(ax, miny, maxy) if tit is 'Spectrum': plt.ylabel('Eigenvalue'+modif) else: plt.ylabel('Portion of Total Variance') elif typ == 'sc': x = 0 y = data.values() if len(y) <= 1: plt.scatter(0, y, color=self.color) plt.xlim([-0.5, 0.5]) else: voil = plt.violinplot(y) voil['bodies'][0].set_color(self.color) plt.ylabel('Count') if typ == 'sc': plt.ylim([np.min(y)*0.9, np.max(y)*1.1]) plt.xticks([]) plt.yticks([np.min(y), np.max(y)]) else: if typ == 'se': plt.xlim([np.min(x), np.max(x)]) plt.xticks([np.min(x), np.max(x)]) else: # plt.xlim([np.min(x), np.max(x)]) # plt.xticks([np.min(x), np.max(x)]) plt.xlim([np.min(np.log(x+1)), np.max(np.log(x+1))]) plt.xticks([np.min(np.log(x+1)), np.max(np.log(x+1))]) plt.ylim([miny, maxy]) plt.yticks([miny, ((maxy - miny)/2), maxy]) plt.title(tit, y=1.04)
def _violinplot(self, data, names, title=None): """For making violinplots.""" if not self.matplotlib_installed: return None nplayers = self.result_set.nplayers width = max(nplayers / 3, 12) height = width / 2 figure = plt.figure(figsize=(width, height)) spacing = 4 positions = spacing * arange(1, nplayers + 1, 1) plt.violinplot(data, positions=positions, widths=spacing / 2, showmedians=True, showextrema=False) plt.xticks(positions, names, rotation=90) plt.xlim(0, spacing * (nplayers + 1)) plt.tick_params(axis='both', which='both', labelsize=8) if title: plt.title(title) return figure
def violinPloter2(data,x,title ,xlabel,ylabel ): plt.rc("figure",facecolor="#ffffff") # make the background white plt.figure() #use the fact that the background is white fig= plt.violinplot(ynew, showmeans=False,showmedians=True) plt.xlabel(xlabel) plt.ylabel(ylabel) #plt.setp(fig, xticks=[y+1 for y in range(len(ynew))], xticklabels=x) plt.gca().xaxis.set_ticklabels(x) #give the right number for the x label plt.title(title) plt.autoscale(True) plt.show()
def main(*args): data = [grouping.every_point_in(fname) for fname in args] ## Add data fig,ax1 = plt.subplots() #add figsize? posns = [1.5 * x for x in range(1,1+len(data))] vp = plt.violinplot(data, positions=posns, showmeans=True, showextrema=True, showmedians=True) ## Re-color bodies for v in vp['bodies']: v.set_edgecolors('k') v.set_facecolors('royalblue') v.set_alpha(1) ## Re-color median, min, max lines to be black for field in ['cmaxes', 'cmins', 'cbars', 'cmedians']: vp[field].set_color('k') ## Draw stars, for means # Make line invisible vp['cmeans'].set_color('royalblue') vp['cmeans'].set_alpha(1) # Draw a * for i in range(len(data)): plt.plot([1.5 * (i+1)], [np.average(data[i])], color='w', marker='*', markeredgecolor='k') ## add a light-colored horizontal grid ax1.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) ## plot axis: runtime + num types new_name = "all-runtimes" ax1.set_axisbelow(True) ax1.set_title("All Runtimes") ax1.set_xlabel("Program") ax1.set_ylabel("Runtime (ms)") # Reset y limit ymin,ymax = plt.ylim() plt.ylim(ymin-20, ymax) plt.xticks(posns, [get_label(fname) for fname in args], rotation=0, size='x-small') # ## Legend # # Reset y limit # ymin,ymax = ax1.get_ylim() # ax1.set_ylim(ymin-5, ymax) # plt.figtext(0.80, 0.04, "---", color='r', weight='roman', size='x-small') # plt.figtext(0.82, 0.04, "Least-sum path", color='k', weight='roman', size='x-small') plt.figtext(0.80, 0.01, '*', color='white', backgroundcolor='royalblue',weight='roman', size='medium') plt.figtext(0.82, 0.01, ' Average Value', color='black', weight='roman', size='x-small') ## Save & clear plt.savefig(new_name) plt.clf() print("Saved figure to %s" % new_name) return
def draw_violin(data, alpha=1, color='royalblue', meanmarker="*", positions=None): ## Add data posns = positions or range(1,1+len(data)) vp = plt.violinplot(data, positions=posns, showmeans=True, showextrema=True, showmedians=True) ## Re-color bodies for v in vp['bodies']: v.set_edgecolors('k') v.set_facecolors(color) v.set_alpha(alpha) ## Re-color median, min, max lines to be black for field in ['cmaxes', 'cmins', 'cbars', 'cmedians']: vp[field].set_color('k') ## Draw stars, for means # Make line invisible vp['cmeans'].set_color(color) vp['cmeans'].set_alpha(alpha) # Draw a * for i in range(len(data)): plt.plot(posns[i], [np.average(data[i])], color='w', marker=meanmarker, markeredgecolor='k') return
def violin(): from data_load import get_lcrb_prcp, get_climate_data prcp = get_lcrb_prcp() _,_,_, phaseind = get_climate_data() cmap = mpl.cm.get_cmap('viridis') rgba = [cmap(0.25), cmap(0.7)] data = [prcp[phaseind['lanina']], prcp, prcp[phaseind['elnino']]] violin_parts = plt.violinplot(data, showmeans = True) plt.ylim(0,650) plt.xticks(range(1,4),['La Nina', 'All Years', 'El Nino'], fontsize = 18) plt.title('MAMJ Precipitation in LCRB by Phase', fontsize = 22, fontweight = 'bold') for feature in ['cbars', 'cmins', 'cmeans', 'cmaxes']: violin_parts[feature].set_color('black') for pc, color in zip(violin_parts['bodies'], [cmap(0), cmap(0.75), cmap(0.4)]): pc.set_facecolor(color) plt.ylabel('Precipitation, mm', fontweight='bold') plt.savefig(EV['HOME'] + '/Desktop/Feb20Response/images/violin') return
def violinplot(xs, ys, rotation='horizontal', points=100, x_tick_offset=0, facecolour='lightgreen', edgecolour='green'): """Plots a vertical scatter plot. xs: List of x labels. ys: List of lists of points to scatter vertically. rotation: x label rotation. Default 'horizontal'. points: Number of points to use in the density estimate. x_tick_offset: How far to offset the x tick labels. Default 0. facecolour: Colour of the violin plots. Default light green. edgecolour: Colour of the violin lines. Default green. """ vp = plt.violinplot(ys, showmeans=True, showextrema=False, points=points) # plt.violinplot has no arguments that let us set colours, so we have to do # it ourselves. http://stackoverflow.com/a/26291582/1105803 for pc in vp['bodies']: pc.set_facecolor(facecolour) pc.set_edgecolor(edgecolour) vp['cmeans'].set_color(edgecolour) plt.xticks([1 + i + x_tick_offset for i in range(len(xs))], xs, rotation=rotation) plt.xlim((0.5, len(xs) + 0.5)) # Adds a little buffer.
def violinplot(data, labels, colors, positions, points = True, maincolor= 'k'): """custom violin plots. Args: data (array, (m x n)): n categories, m data points. labels array (n): x-tick label for each box. colors array (n): facecolor of box. positions array(n): location of violins. Returns: matplotlib boxplot object. """ w = 0.8#np.diff(positions)[0]*0.8 bp = plt.violinplot(data, positions= positions, vert=True, widths=w, \ showmeans=False, showextrema=False, showmedians=True, points=100, bw_method=0.25, hold=None) plt.boxplot(data,notch=1,positions=positions,vert=1) for ind, box in enumerate(bp['bodies']): box.set(color=colors[ind], edgecolor=colors[ind],linewidth=1.5, alpha=1) bp['cmedians'].set(color= maincolor,linewidth=1.5) #bp['cmaxes'].set(color= maincolor,linewidth=1.5) #bp['cbars'].set(color= maincolor,linewidth=1.5) #bp['cmins'].set(color= maincolor,linewidth=1.5) plt.xticks(positions,labels, rotation =20) #### hackish! if points: for index, d in enumerate(data): if len(d)==len(positions) or len(data) != len(positions): jitter = positions + (1-2*np.random.rand(len(d)))*w/4. plt.scatter(jitter, d, color = maincolor, s = 8) else: jitter = positions[index] + (1-2*np.random.rand(len(d)))*w/4. plt.scatter(jitter, d, color = maincolor, s = 8) return bp
def seqnumber(seqname): return int(seqname.split('_')[-1]) with open('all_crassphage_clean.fna', 'r') as f: all_clean_contigs = [ line.strip().replace('>', '').split(' ')[0] for line in f.readlines() if '>' in line ] with open('all_crassphage_clean.pred.fna', 'r') as f: all_orfs = [ line.strip().replace('>', '').split(' ')[0] for line in f.readlines() if '>' in line ] contig_orfs = {contig: [] for contig in all_clean_contigs} for orf in all_orfs: contig_orfs[contigname(orf)].append(orf) contig_lens = [len(contig_orfs[contig]) for contig in contig_orfs] with open('good_contigs_ids', 'r') as f: good_contigs = [line.strip() for line in f.readlines()] good_contig_lens = [len(contig_orfs[contig]) for contig in good_contigs] plt.violinplot([good_contig_lens, contig_lens], vert=False, showmedians=True) plt.yticks((1, 2), ("viable contigs", "all contigs")) plt.xlabel("n° of ORFs")
def savefig(filename): print('saving {}'.format(filename)) plt.savefig('{}.pgf'.format(filename)) plt.savefig('{}.pdf'.format(filename)) # Simple plot fig, ax = newfig(0.45) pos = [1,2,3,4,5,6,7,8,9,10] data = [[] for i in pos] parseNanoDict(argv[1]) violin_parts = plt.violinplot(dataset=data, positions=pos,points=100, widths=0.9, showmeans=True, showextrema=False, showmedians=False) for pc in violin_parts['bodies']: # pc.set_facecolor('yellow') # pc.set_edgecolor('black') pc.set_linewidths(defaultLineWidth) pc.set_alpha(0.8) #ax.plot(s) plt.grid(True, zorder=5, axis='y') plt.xlabel("number of unwinds") plt.ylabel("runtime [us]")
def plot_shift(x, y, paired=False, n_boot=1000, percentiles=np.arange(10, 100, 10), ci=.95, seed=None, show_median=True, violin=True): """Shift plot. Parameters ---------- x, y : array_like First and second set of observations. paired : bool Specify whether ``x`` and ``y`` are related (i.e. repeated measures) or independent. .. versionadded:: 0.3.0 n_boot : int Number of bootstrap iterations. The higher, the better, the slower. percentiles: array_like Sequence of percentiles to compute, which must be between 0 and 100 inclusive. Default set to [10, 20, 30, 40, 50, 60, 70, 80, 90]. ci: float Confidence level (0.95 = 95%). seed : int or None Random seed for generating bootstrap samples, can be integer or None for no seed (default). show_median: boolean If True (default), show the median with black lines. violin: boolean If True (default), plot the density of X and Y distributions. Defaut set to True. Returns ------- fig : matplotlib Figure instance Matplotlib Figure. To get the individual axes, use fig.axes. See also -------- harrelldavis Notes ----- The shift plot is described in [1]_. It computes a shift function [2]_ for two (in)dependent groups using the robust Harrell-Davis quantile estimator in conjunction with bias-corrected bootstrap confidence intervals. References ---------- .. [1] Rousselet, G. A., Pernet, C. R. and Wilcox, R. R. (2017). Beyond differences in means: robust graphical methods to compare two groups in neuroscience. Eur J Neurosci, 46: 1738-1748. doi:10.1111/ejn.13610 .. [2] https://garstats.wordpress.com/2016/07/12/shift-function/ Examples -------- Default shift plot .. plot:: >>> import numpy as np >>> import pingouin as pg >>> np.random.seed(42) >>> x = np.random.normal(5.5, 2, 50) >>> y = np.random.normal(6, 1.5, 50) >>> fig = pg.plot_shift(x, y) With different options .. plot:: >>> import numpy as np >>> import pingouin as pg >>> np.random.seed(42) >>> x = np.random.normal(5.5, 2, 30) >>> y = np.random.normal(6, 1.5, 30) >>> fig = pg.plot_shift(x, y, paired=True, n_boot=2000, ... percentiles=[25, 50, 75], ... show_median=False, seed=456, violin=False) """ from pingouin.regression import _bca from pingouin.nonparametric import harrelldavis as hd # Safety check x = np.asarray(x) y = np.asarray(y) percentiles = np.asarray(percentiles) / 100 # Convert to 0 - 1 range assert x.ndim == 1, 'x must be 1D.' assert y.ndim == 1, 'y must be 1D.' nx, ny = x.size, y.size assert not np.isnan(x).any(), 'Missing values are not allowed.' assert not np.isnan(y).any(), 'Missing values are not allowed.' assert nx >= 10, 'x must have at least 10 samples.' assert ny >= 10, 'y must have at least 10 samples.' assert 0 < ci < 1, 'ci must be between 0 and 1.' if paired: assert nx == ny, 'x and y must have the same size when paired=True.' # Robust percentile x_per = hd(x, percentiles) y_per = hd(y, percentiles) delta = y_per - x_per # Compute bootstrap distribution of differences rng = np.random.RandomState(seed) if paired: bootsam = rng.choice(np.arange(nx), size=(nx, n_boot), replace=True) bootstat = (hd(y[bootsam], percentiles, axis=0) - hd(x[bootsam], percentiles, axis=0)) else: x_list = rng.choice(x, size=(nx, n_boot), replace=True) y_list = rng.choice(y, size=(ny, n_boot), replace=True) bootstat = (hd(y_list, percentiles, axis=0) - hd(x_list, percentiles, axis=0)) # Find upper and lower confidence interval for each quantiles # Bias-corrected confidence interval lower, median_per, upper = [], [], [] for i, d in enumerate(delta): ci = _bca(bootstat[i, :], d, n_boot) median_per.append(_bca(bootstat[i, :], d, n_boot, alpha=1)[0]) lower.append(ci[0]) upper.append(ci[1]) lower = np.asarray(lower) median_per = np.asarray(median_per) upper = np.asarray(upper) # Create long-format dataFrame for use with Seaborn data = pd.DataFrame({'value': np.concatenate([x, y]), 'variable': ['X'] * nx + ['Y'] * ny}) ############################# # Plots X and Y distributions ############################# fig = plt.figure(figsize=(8, 5)) ax1 = plt.subplot2grid((3, 3), (0, 0), rowspan=2, colspan=3) # Boxplot X & Y def adjacent_values(vals, q1, q3): upper_adjacent_value = q3 + (q3 - q1) * 1.5 upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1]) lower_adjacent_value = q1 - (q3 - q1) * 1.5 lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1) return lower_adjacent_value, upper_adjacent_value for dis, pos in zip([x, y], [1.2, -0.2]): qrt1, medians, qrt3 = np.percentile(dis, [25, 50, 75]) whiskers = adjacent_values(np.sort(dis), qrt1, qrt3) ax1.plot(medians, pos, marker='o', color='white', zorder=10) ax1.hlines(pos, qrt1, qrt3, color='k', linestyle='-', lw=7, zorder=9) ax1.hlines(pos, whiskers[0], whiskers[1], color='k', linestyle='-', lw=2, zorder=9) ax1 = sns.stripplot(data=data, x='value', y='variable', orient='h', order=['Y', 'X'], palette=['#88bedc', '#cfcfcf']) if violin: vl = plt.violinplot([y, x], showextrema=False, vert=False, widths=1) # Upper plot paths = vl['bodies'][0].get_paths()[0] paths.vertices[:, 1][paths.vertices[:, 1] >= 1] = 1 paths.vertices[:, 1] = paths.vertices[:, 1] - 1.2 vl['bodies'][0].set_edgecolor('k') vl['bodies'][0].set_facecolor('#88bedc') vl['bodies'][0].set_alpha(0.8) # Lower plot paths = vl['bodies'][1].get_paths()[0] paths.vertices[:, 1][paths.vertices[:, 1] <= 2] = 2 paths.vertices[:, 1] = paths.vertices[:, 1] - 0.8 vl['bodies'][1].set_edgecolor('k') vl['bodies'][1].set_facecolor('#cfcfcf') vl['bodies'][1].set_alpha(0.8) # Rescale ylim ax1.set_ylim(2, -1) for i in range(len(percentiles)): # Connection between quantiles if upper[i] < 0: col = '#4c72b0' elif lower[i] > 0: col = '#c34e52' else: col = 'darkgray' plt.plot([y_per[i], x_per[i]], [0.2, 0.8], marker='o', color=col, zorder=10) # X quantiles plt.plot([x_per[i], x_per[i]], [0.8, 1.2], 'k--', zorder=9) # Y quantiles plt.plot([y_per[i], y_per[i]], [-0.2, 0.2], 'k--', zorder=9) if show_median: x_med, y_med = np.median(x), np.median(y) plt.plot([x_med, x_med], [0.8, 1.2], 'k-') plt.plot([y_med, y_med], [-0.2, 0.2], 'k-') plt.xlabel('Scores (a.u.)', size=15) ax1.set_yticklabels(['Y', 'X'], size=15) ax1.set_ylabel('') ####################### # Plots quantiles shift ####################### ax2 = plt.subplot2grid((3, 3), (2, 0), rowspan=1, colspan=3) for i, per in enumerate(x_per): if upper[i] < 0: col = '#4c72b0' elif lower[i] > 0: col = '#c34e52' else: col = 'darkgray' plt.plot([per, per], [upper[i], lower[i]], lw=3, color=col, zorder=10) plt.plot(per, median_per[i], marker='o', ms=10, color=col, zorder=10) plt.axhline(y=0, ls='--', lw=2, color='gray') ax2.set_xlabel('X quantiles', size=15) ax2.set_ylabel('Y - X quantiles \n differences (a.u.)', size=10) sns.despine() plt.tight_layout() return fig
def plotly_to_matplotlib(pygsti_fig, save_to=None, fontsize=12, prec='compacthp', boxLabels_fontsize=6): """ Convert a pygsti (plotly) figure to a matplotlib figure. Parameters ---------- pygsti_fig : ReportFigure A pyGSTi figure. save_to : str Output filename. Extension determines type. If None, then the matplotlib figure is returned instead of saved. fontsize : int, optional Base fontsize to use for converted figure. prec : int or {"compact","compacth"} Digits of precision to include in labels. boxLabels_fontsize : int, optional The size for labels on the boxes. If 0 then no labels are put on the boxes Returns ------- matplotlib.Figure Matplotlib figure, unless save_to is not None, in which case the figure is closed and None is returned. """ numMPLFigs = len(_plt.get_fignums()) fig = pygsti_fig.plotlyfig data_trace_list = fig['data'] if 'special' in pygsti_fig.metadata: if pygsti_fig.metadata['special'] == "keyplot": return special_keyplot(pygsti_fig, save_to, fontsize) else: raise ValueError("Invalid `special` label: %s" % pygsti_fig.metadata['special']) #if axes is None: mpl_fig, axes = _plt.subplots() # create a new figure if no axes are given layout = fig['layout'] h, w = layout['height'], layout['width'] # todo: get margins and subtract from h,w if mpl_fig is not None and w is not None and h is not None: mpl_size = w / 100.0, h / 100.0 # heusistic mpl_fig.set_size_inches(*mpl_size) # was 12,8 for "super" color plot pygsti_fig.metadata[ 'mpl_fig_size'] = mpl_size # record for later use by rendering commands def get(obj, x, default): """ Needed b/c in plotly v3 layout no longer is a dict """ try: ret = obj[x] return ret if (ret is not None) else default except KeyError: return default raise ValueError( "Non-KeyError raised when trying to access a plotly hierarchy object." ) xaxis, yaxis = layout['xaxis'], layout['yaxis'] #annotations = get(layout,'annotations',[]) title = get(layout, 'title', None) shapes = get(layout, 'shapes', []) # assume only shapes are grid lines bargap = get(layout, 'bargap', 0) xlabel = get(xaxis, 'title', None) ylabel = get(yaxis, 'title', None) xlabels = get(xaxis, 'ticktext', None) ylabels = get(yaxis, 'ticktext', None) xtickvals = get(xaxis, 'tickvals', None) ytickvals = get(yaxis, 'tickvals', None) xaxistype = get(xaxis, 'type', None) yaxistype = get(yaxis, 'type', None) xaxisside = get(xaxis, 'side', 'bottom') yaxisside = get(yaxis, 'side', 'left') xtickangle = get(xaxis, 'tickangle', 0) xlim = get(xaxis, 'range', None) ylim = get(yaxis, 'range', None) if xaxisside == "top": axes.xaxis.set_label_position('top') axes.xaxis.tick_top() #axes.yaxis.set_ticks_position('both') if yaxisside == "right": axes.yaxis.set_label_position('right') axes.yaxis.tick_right() #axes.yaxis.set_ticks_position('both') if title is not None: if xaxisside == "top": axes.set_title(mpl_process_lbl(title), fontsize=fontsize, y=2.5) # push title up higher axes.set_title(mpl_process_lbl(title), fontsize=fontsize) if xlabel is not None: axes.set_xlabel(mpl_process_lbl(xlabel), fontsize=fontsize) if ylabel is not None: axes.set_ylabel(mpl_process_lbl(ylabel), fontsize=fontsize) if xtickvals is not None: axes.set_xticks(xtickvals, minor=False) if ytickvals is not None: axes.set_yticks(ytickvals, minor=False) if xlabels is not None: axes.set_xticklabels(mpl_process_lbls(xlabels), rotation=0, fontsize=(fontsize - 2)) if ylabels is not None: axes.set_yticklabels(mpl_process_lbls(ylabels), fontsize=(fontsize - 2)) if xtickangle != 0: _plt.xticks( rotation=-xtickangle ) # minus b/c ploty & matplotlib have different sign conventions if xaxistype == 'log': axes.set_xscale("log") if yaxistype == 'log': axes.set_yscale("log") if xlim is not None: if xaxistype == 'log': # plotly's limits are already log10'd in this case xlim = 10.0**xlim[0], 10.0**xlim[1] # but matplotlib's aren't axes.set_xlim(xlim) if ylim is not None: if yaxistype == 'log': # plotly's limits are already log10'd in this case ylim = 10.0**ylim[0], 10.0**ylim[1] # but matplotlib's aren't axes.set_ylim(ylim) #figure out barwidth and offsets for bar plots num_bars = sum([get(d, 'type', '') == 'bar' for d in data_trace_list]) currentBarOffset = 0 barWidth = (1.0 - bargap) / num_bars if num_bars > 0 else 1.0 #process traces handles = [] labels = [] # for the legend boxes = [] # for violins for traceDict in data_trace_list: typ = get(traceDict, 'type', 'unknown') name = get(traceDict, 'name', None) showlegend = get(traceDict, 'showlegend', True) if typ == "heatmap": #colorscale = get(traceDict,'colorscale','unknown') # traceDict['z'] is *normalized* already - maybe would work here but not for box value labels plt_data = pygsti_fig.metadata['plt_data'] show_colorscale = get(traceDict, 'showscale', True) mpl_size = (plt_data.shape[1] * 0.5, plt_data.shape[0] * 0.5) mpl_fig.set_size_inches(*mpl_size) #pygsti_fig.metadata['mpl_fig_size'] = mpl_size #record for later use by rendering commands colormap = pygsti_fig.colormap assert (colormap is not None), 'Must separately specify a colormap...' norm, cmap = colormap.get_matplotlib_norm_and_cmap() masked_data = _np.ma.array(plt_data, mask=_np.isnan(plt_data)) heatmap = axes.pcolormesh(masked_data, cmap=cmap, norm=norm) axes.set_xlim(0, plt_data.shape[1]) axes.set_ylim(0, plt_data.shape[0]) if xtickvals is not None: xtics = _np.array( xtickvals) + 0.5 # _np.arange(plt_data.shape[1])+0.5 axes.set_xticks(xtics, minor=False) if ytickvals is not None: ytics = _np.array( ytickvals) + 0.5 # _np.arange(plt_data.shape[0])+0.5 axes.set_yticks(ytics, minor=False) grid = bool(len(shapes) > 1) if grid: def _get_minor_tics(t): return [(t[i] + t[i + 1]) / 2.0 for i in range(len(t) - 1)] axes.set_xticks(_get_minor_tics(xtics), minor=True) axes.set_yticks(_get_minor_tics(ytics), minor=True) axes.grid(which='minor', axis='both', linestyle='-', linewidth=2) off = False # Matplotlib used to allow 'off', but now False should be used if xlabels is None and ylabels is None: axes.tick_params(labelcolor='w', top=off, bottom=off, left=off, right=off) # white tics else: axes.tick_params(top=off, bottom=off, left=off, right=off) #print("DB ann = ", len(annotations)) #boxLabels = bool( len(annotations) >= 1 ) #TODO: why not plt_data.size instead of 1? #boxLabels = True # maybe should always be true? if boxLabels_fontsize > 0: # Write values on colored squares for y in range(plt_data.shape[0]): for x in range(plt_data.shape[1]): if _np.isnan(plt_data[y, x]): continue assert (_np.isfinite(plt_data[y, x]) ), "%s is not finite!" % str(plt_data[y, x]) axes.text( x + 0.5, y + 0.5, mpl_process_lbl(_eformat(plt_data[y, x], prec), math=True), horizontalalignment='center', verticalalignment='center', color=mpl_besttxtcolor(plt_data[y, x], cmap, norm), fontsize=boxLabels_fontsize) if show_colorscale: cbar = _plt.colorbar(heatmap) cbar.ax.tick_params(labelsize=(fontsize - 2)) elif typ == "scatter": mode = get(traceDict, 'mode', 'lines') marker = get(traceDict, 'marker', None) line = get(traceDict, 'line', None) if marker and (line is None): line = marker['line'] # 2nd attempt to get line props if marker: color = get(marker, 'color', None) if line and (color is None): color = get(line, 'color', None) if color is None: color = 'rgb(0,0,0)' color = mpl_color(color) linewidth = float(line['width']) if ( line and get(line, 'width', None) is not None) else 1.0 x = y = None if 'x' in traceDict and 'y' in traceDict: x = traceDict['x'] y = traceDict['y'] elif 'r' in traceDict and 't' in traceDict: x = traceDict['r'] y = traceDict['t'] assert (x is not None and y is not None), "x and y both None in trace: %s" % traceDict lines = _plt.plot(x, y) if mode == 'lines': ls = '-' ms = 'None' elif mode == 'markers': ls = 'None' ms = "." elif mode == 'lines+markers': ls = '-' ms = "." else: raise ValueError("Unknown mode: %s" % mode) _plt.setp(lines, linestyle=ls, marker=ms, color=color, linewidth=linewidth) if showlegend and name: handles.append(lines[0]) labels.append(name) elif typ == "scattergl": # currently used only for colored points... x = traceDict['x'] y = traceDict['y'] assert (x is not None and y is not None), "x and y both None in trace: %s" % traceDict colormap = pygsti_fig.colormap if colormap: norm, cmap = colormap.get_matplotlib_norm_and_cmap() s = _plt.scatter(x, y, c=y, s=50, cmap=cmap, norm=norm) else: s = _plt.scatter(x, y, c=y, s=50, cmap='gray') if showlegend and name: handles.append(s) labels.append(name) elif typ == "bar": xlabels = [str(xl) for xl in traceDict['x'] ] # x "values" are actually bar labels in plotly #always grey=pos, red=neg type of bar plot for now (since that's all pygsti uses) y = _np.asarray(traceDict['y']) if 'plt_yerr' in pygsti_fig.metadata: yerr = pygsti_fig.metadata['plt_yerr'] else: yerr = None # actual x values are just the integers + offset x = _np.arange(y.size) + currentBarOffset currentBarOffset += barWidth # so next bar trace will be offset correctly marker = get(traceDict, 'marker', None) if marker and ('color' in marker): if isinstance(marker['color'], str): color = mpl_color(marker['color']) elif isinstance(marker['color'], list): color = [mpl_color(c) for c in marker['color'] ] # b/c axes.bar can take a list of colors else: color = "gray" if yerr is None: axes.bar(x, y, barWidth, color=color) else: axes.bar(x, y, barWidth, color=color, yerr=yerr.flatten().real) if xtickvals is not None: xtics = _np.array( xtickvals) + 0.5 # _np.arange(plt_data.shape[1])+0.5 else: xtics = x axes.set_xticks(xtics, minor=False) axes.set_xticklabels(mpl_process_lbls(xlabels), rotation=0, fontsize=(fontsize - 4)) elif typ == "histogram": #histnorm = get(traceDict,'histnorm',None) marker = get(traceDict, 'marker', None) color = mpl_color(marker['color'] if marker and isinstance(marker['color'], str) else "gray") xbins = traceDict['xbins'] histdata = traceDict['x'] if abs(xbins['size']) < 1e-6: histBins = 1 else: histBins = int( round((xbins['end'] - xbins['start']) / xbins['size'])) histdata_finite = _np.take( histdata, _np.where(_np.isfinite(histdata)))[ 0] # take gives back (1,N) shaped array (why?) if yaxistype == 'log': if len(histdata_finite) == 0: axes.set_yscale( "linear" ) # no data, and will get an error with log-scale, so switch to linear #histMin = min( histdata_finite ) if cmapFactory.vmin is None else cmapFactory.vmin #histMax = max( histdata_finite ) if cmapFactory.vmax is None else cmapFactory.vmax #_plt.hist(_np.clip(histdata_finite,histMin,histMax), histBins, # range=[histMin, histMax], facecolor='gray', align='mid') _, _, patches = _plt.hist(histdata_finite, histBins, facecolor=color, align='mid') #If we've been given an array of colors if marker and ('color' in marker) and isinstance( marker['color'], list): for p, c in zip(patches, marker['color']): _plt.setp(p, 'facecolor', mpl_color(c)) elif typ == "box": boxes.append(traceDict) if len(boxes) > 0: _plt.violinplot([box['y'] for box in boxes], [box['x0'] for box in boxes], points=10, widths=1., showmeans=False, showextrema=False, showmedians=False) # above kwargs taken from Tim's original RB plot - we could set some of # these from boxes[0]'s properties like 'boxmean' (a boolean) FUTURE? extraartists = [axes] if len(handles) > 0: lgd = _plt.legend(handles, labels, bbox_to_anchor=(1.01, 1.0), borderaxespad=0., loc="upper left") extraartists.append(lgd) if save_to: _gc.collect( ) # too many open files (b/c matplotlib doesn't close everything) can cause the below to fail _plt.savefig(save_to, bbox_extra_artists=extraartists, bbox_inches='tight') # need extra artists otherwise #axis labels get clipped _plt.cla() _plt.close(mpl_fig) del mpl_fig _gc.collect() # again, to be safe... if len(_plt.get_fignums()) != numMPLFigs: raise ValueError( "WARNING: MORE FIGURES OPEN NOW (%d) THAN WHEN WE STARTED %d)!!" % (len(_plt.get_fignums()), numMPLFigs)) return None # figure is closed! else: return mpl_fig
print "Success rate: ", sum(success)/len(success) # store values success_values[el][a] = sum(success)/len(success) temp_len = len(t_min[t_min<num_timesteps-1]) temp = t_min[t_min<num_timesteps-1] t_avg_values[el][a] = np.mean(t_sorted) t_med_values[el][a] = np.median(t_sorted) t_std_values[el][a] = np.std(t_sorted) data.append(temp) plt.figure(1) x = range(len(range_alpha)) plt.violinplot(data, x, showmedians=True) for a in range(len(range_alpha)): alpha = range_alpha[a] beta = range_beta[a] lab.append('%.2f / %.2f' % (alpha, beta)) plt.title('lap %.2f' % lap) fig1.savefig('violin_lap = %.2f.eps' % lap) ax = plt.gca() ax.set_xticks(range(len(range_alpha))) ax.set_xticklabels(lab)
A.append(GetTransform(msg_[1]).get_pos().get_array()) for msg_ in bag_.read_messages(topics='B1'): B1.append(GetTransform(msg_[1]).get_pos().get_array()) for msg_ in bag_.read_messages(topics='B2'): B2.append(GetTransform(msg_[1]).get_pos().get_array()) bag_.close() else: print("The provided path is not a .bag file") if __name__ == "__main__": if len(sys.argv) == 2: ReadBag(sys.argv[1]) error1 = np.linalg.norm(B1, axis=1) - np.linalg.norm(A, axis=1) error2 = np.linalg.norm(B2, axis=1) - np.linalg.norm(A, axis=1) # print(np.mean(error1) ) # print(np.std(error1) ) # print(np.mean(error2) ) # print(np.std(error2) ) plt.violinplot(np.abs(error1)) plt.grid() plt.show() # plt.violinplot(np.abs(error2) ) # plt.grid() # plt.show() else: print("Path to .bag file was not provided")
from nltk import tokenize import dataset import matplotlib.pyplot as plt db = dataset.connect('sqlite:///reviews.db') reviews = db['reviews'].all() analyzer = SentimentIntensityAnalyzer() sentiment_by_stars = [[] for r in range(1, 6)] for review in reviews: full_review = review['title'] + '. ' + review['review'] sentence_list = tokenize.sent_tokenize(full_review) cumulative_sentiment = 0.0 for sentence in sentence_list: vs = analyzer.polarity_scores(sentence) cumulative_sentiment += vs["compound"] average_score = cumulative_sentiment / len(sentence_list) sentiment_by_stars[int(review['rating']) - 1].append(average_score) plt.violinplot(sentiment_by_stars, range(1, 6), vert=False, widths=0.9, showmeans=False, showextrema=True, showmedians=True, bw_method='silverman') plt.axvline(x=0, linewidth=1, color='black') plt.show()
def SelectionViolinPlot(plotfile, ylabel, models, yvalues, symmetrizey, hlines=None, points=None, pointmarkercolor='or', usetex=True, legends=False, fixymin=None, fixymax=None, modelgroups=None): """Creates violin plot showing distribution of selection and significant sites. Calling arguments: *plotfile* : name of PDF plot to create. *ylabel* : ylabel for the plot. *models* : list of models for which we create violin plots. *yvalues* : list of the same length as *models*, each entry is a list of the Y-values (such as P-values). *symmetrizey* : make y-axis symmetric around zero? *hlines* : if not *None*, list of the same length as *models* with each entry a list giving y-value for where we draw horizontal lines for that *model*. Alternatively, can be a single number -- in that case, we draw a horizontal line across the whole plot at that number. *points* : if not *None*, list of the same length as *models* with each entry a list giving the y-value for points to be placed for that *model*. *pointmarkercolor* : specifies marker and color of points in *points*. Should either a length-two string giving marker and color for all points (such as *or* for circles, red) or a list of lists of the same length as *points* with each entry specifying the marker and color for that point. *usetex* : use LaTex formatting of strings? *legends* : Create legend(s) with names of points specified by *pointmarkercolor*? If it is not *False* or *None*, then *legends* should be a list. Each entry should be a 3-tuple *(markercolors, names, title)*. In this tuple, *markercolors* and *names* are lists of the same length, with *markercolors* being a list of marker / color (e.g. *or* for circles, red) and *names* being a list of the string corresponding to each marker / color; *title* is the title for the legend. *fixymin* : if not *None*, the y-minimum is fixed to this value. *fixymax* : if not *None*, the y-maximum is fixed to this value. *modelgroups* : do we "group" models on the x-axis? If so, set this to a list of the same length as *models* with each entry being the group to which that model is assigned. For instance, if *models* is *['ExpCM', 'YNGKP', 'ExpCM', 'YNGKP']*, then *modelgroups* might be *['HA', 'HA', 'NP', 'NP']*. In this case, the two groups are indicated with a line and a label on the x-axis. Models in the same group must be consecutive. If any entry is *None*, the corresponding model is not assigned a group. """ alpha = 0.55 # transparency for points markersize = 25 # size of points markerlw = 0.6 # line width for makers assert os.path.splitext(plotfile)[1].lower() == '.pdf', "plotfile %s does not end with extension '.pdf'" assert len(models) == len(yvalues) >= 1 if modelgroups: assert len(modelgroups) == len(models), "modelgroups is not the same length as models" # make sure models in the same group are consecutive ngroups = 1 previousgroup = modelgroups[0] for group in modelgroups[1 : ]: if group != previousgroup: ngroups += 1 previousgroup = group assert ngroups == len(set(modelgroups)), "models in the same group must be consecutive in modelgroups. This is not the case:\n%s" % str(modelgroups) plt.rc('font', size=12) plt.rc('text', usetex=usetex) lmargin = 0.7 tmargin = 0.1 if modelgroups: bmargin = 0.6 else: bmargin = 0.4 if legends: perlegendwidth = 0.9 rmargin = perlegendwidth * len(legends) + 0.03 else: rmargin = 0.1 (height, widthper) = (2.5, 1.5) violinwidth = 0.7 totwidth = lmargin + rmargin if modelgroups: firstmodel = True withingroupspacing = violinwidth + 0.3 * (1.0 - violinwidth) xs = [] for (imodel, igroup) in zip(models, modelgroups): if not firstmodel and (igroup == lastgroup != None): xs.append(xs[-1] + withingroupspacing) totwidth += withingroupspacing * widthper elif firstmodel: firstmodel = False xs.append(0) totwidth += widthper else: xs.append(xs[-1] + 1) totwidth += widthper lastgroup = igroup else: xs = [x for x in range(len(models))] totwidth += widthper * len(models) totheight = height + tmargin + bmargin plt.figure(figsize=(totwidth, totheight)) plt.axes([lmargin / totwidth, bmargin / totheight, 1.0 - (lmargin + rmargin) / totwidth, 1.0 - (tmargin + bmargin) / totheight]) plt.ylabel(ylabel, fontsize=15) plt.violinplot(yvalues, xs, widths=violinwidth, showextrema=False) xmargin = 0.2 * violinwidth / 2.0 xmin = xs[0] - violinwidth / 2.0 - xmargin xmax = xs[-1] + violinwidth / 2.0 + xmargin plt.xlim(xmin, xmax) if isinstance(hlines, (int, float)): plt.hlines(hlines, xmin, xmax, colors='b', linewidths=1, linestyles='dotted') elif hlines: assert len(hlines) == len(models) line_ys = [] line_xmins = [] line_xmaxs = [] for (i, ix) in enumerate(xs): line_ys += hlines[i] line_xmins += [ix - violinwidth / 2.0] * len(hlines[i]) line_xmaxs += [ix + violinwidth / 2.0] * len(hlines[i]) plt.hlines(line_ys, line_xmins, line_xmaxs, colors='b', linewidths=1, linestyles='dotted') if symmetrizey: (ymin, ymax) = plt.ylim() ymax = 1.05 * max(abs(ymin), abs(ymax)) ymin = -ymax else: (ymin, ymax) = plt.ylim() if points: assert len(points) == len(models) if isinstance(pointmarkercolor, str): assert len(pointmarkercolor) == 2 color = pointmarkercolor[1] marker = pointmarkercolor[0] else: color = [] marker = [] assert len(pointmarkercolor) == len(points), "len(pointmarkercolor) = %d; len(points) = %d" % (len(pointmarkercolor), len(points)) point_xs = [] point_ys = [] for (i, ix) in enumerate(xs): (model_xs, model_ys) = SmartJitter(points[i], yspace=(ymax - ymin) / 25., xspace=0.08, xcenter=ix) point_xs += model_xs point_ys += model_ys if not isinstance(pointmarkercolor, str): imarkercolor = pointmarkercolor[i] assert len(imarkercolor) == len(points[i]), "pointmarkercolor and points have length mismatch for %d" % i color += [x[1] for x in imarkercolor] marker += [x[0] for x in imarkercolor] if isinstance(pointmarkercolor, str): plt.scatter(point_xs, point_ys, s=markersize, c=color, marker=marker, alpha=alpha, lw=markerlw) else: assert len(color) == len(marker) == len(point_xs) for (x, y, c, m) in zip(point_xs, point_ys, color, marker): plt.scatter(x, y, s=markersize, c=c, marker=m, alpha=alpha, lw=markerlw) if fixymin not in [None, False]: ymin = fixymin if fixymax not in [None, False]: ymax = fixymax assert ymin < ymax plt.ylim(ymin, ymax) plt.xticks(xs, models, fontsize=15) if legends: legendx = 1.0 - rmargin / float(totwidth) legendfracwidth = perlegendwidth / float(totwidth) legendtop = 1.0 - tmargin / float(totheight) for (markercolors, legendnames, legendtitle) in legends: assert len(markercolors) == len(legendnames) handles = [matplotlib.lines.Line2D([0], [0], marker=marker, color=color, markersize=markersize, alpha=alpha, lw=markerlw, linestyle='None') for (marker, color) in markercolors] # put in natural sort order assert len(set(legendnames)) == len(legendnames), "Duplicate legendnames entry" sortedlegendnames = list(legendnames) dms_tools.utils.NaturalSort(sortedlegendnames) sortedhandles = [None] * len(sortedlegendnames) for (handle, name) in zip(handles, legendnames): sortedhandles[sortedlegendnames.index(name)] = handle assert None not in sortedhandles if legendtitle: legendtitle = ('\\bf{%s\nsites}' % SplitText(legendtitle, maxchars=6)).replace('\n', '}\n\\bf{') else: legendtitle = '\\bf{sites}' legend = plt.legend(sortedhandles, sortedlegendnames, bbox_to_anchor=(legendx, 0, legendfracwidth, legendtop), bbox_transform=plt.gcf().transFigure, fontsize=13, numpoints=1, title=legendtitle, markerscale=0.25, handlelength=0.7, handletextpad=0.25, borderaxespad=0, labelspacing=0.2) plt.gca().add_artist(legend) legendx += legendfracwidth plt.setp(legend.get_title(), fontsize=13) if modelgroups: for group in set(modelgroups): if not group: continue start_i = min([i for (i, g) in enumerate(modelgroups) if g == group]) end_i = max([i for (i, g) in enumerate(modelgroups) if g == group]) start_x = (xmargin + xs[start_i]) / (xmax - xmin) # axes coordinates end_x = (xmargin + xs[end_i] + violinwidth) / (xmax - xmin) # axes coordinates line_y = -0.12 # in axes coordinates cap_height = 0.03 line = plt.Line2D([start_x, end_x], [line_y, line_y], transform=plt.gca().transAxes, color='black', linewidth=1.5, solid_capstyle='butt') line.set_clip_on(False) plt.gca().add_line(line) for x in [start_x, end_x]: # caps on end of lines line = plt.Line2D([x, x], [line_y + cap_height, line_y - cap_height], transform=plt.gca().transAxes, color='black', linewidth=1.5, solid_capstyle='butt') line.set_clip_on(False) plt.gca().add_line(line) plt.text((start_x + end_x) / 2.0, line_y - 0.04, group, transform=plt.gca().transAxes, horizontalalignment='center', verticalalignment='top', fontsize=15) plt.savefig(plotfile) plt.clf() plt.close()
"WDDFF(MODWT-SVR)\n(9-month ahead)", ] x = list(range(25)) ylabels=[ r"$NSE$",r"$NRMSE$",r"$PPTS(5)(\%)$", ] x_s=[-1.1,-1.1,-1.1] y_s=[0.93,1.8,78] plt.figure(figsize=(7.48, 5.54)) for i in range(len(all_datas)): ax1 = plt.subplot(3, 1, i+1) ax1.yaxis.grid(True) ax1.text(x_s[i],y_s[i],fig_index[i],fontsize=7) vplot1 = plt.violinplot( dataset=all_datas[i], positions=x, showmeans=True, ) ax1.plot(list(range(0,5)),lines[i][0],'--',lw=0.5,color='blue') ax1.plot(list(range(5,10)),lines[i][1],'--',lw=0.5,color='blue') ax1.plot(list(range(10,15)),lines[i][2],'--',lw=0.5,color='blue') ax1.plot(list(range(15,20)),lines[i][3],'--',lw=0.5,color='blue') ax1.plot(list(range(20,25)),lines[i][4],'--',lw=0.5,color='blue') print(type(vplot1["cmeans"])) plt.ylabel(ylabels[i]) if i==len(all_datas)-1: plt.xticks(x, labels, rotation=45) else: plt.xticks([]) for pc in vplot1['bodies']: pc.set_facecolor('#D43F3A')
for i in range(len(genes)): _num = ind[i] _name = genes[i] expr_li = [ X2[fg == 0, _num], X2[fg == 1, _num], X2[fg == 2, _num], X2[fg == 3, _num], X2[fg == 4, _num], X2[fg == 5, _num], X2[fg == 6, _num], X2[fg == 7, _num], X2[fg == 8, _num], X2[fg == 9, _num] ] plt.figure() plt.title(_name, fontsize=18) plt.xticks(range(1, 11)) plt.tick_params(labelsize=18) parts = plt.violinplot(expr_li, showmeans=False, showmedians=False, showextrema=False) cnt = 0 for pc in parts['bodies']: data = expr_li[cnt] pc.set_facecolor(c[cnt]) pc.set_edgecolor('black') pc.set_alpha(0.8) q1, med, q3 = np.percentile(data, [25, 50, 75]) mean = np.mean(data) plt.vlines(cnt + 1, q1, q3, color='k', linestyle='-', lw=3) plt.scatter(cnt + 1, mean, marker='o',
vmin = min(min(min(v) for v in values), 0) vmax = max(max(max(v) for v in values), 0) if dim == 1: plot.ylabel('time') if len(values[0]) == 1: plot.bar(range(len(values)), [vs[0] for vs in values], align = 'center', orientation = 'vertical', tick_label = labels) labels = None elif 'VIOLIN_STYLE' in os.environ: plot.violinplot(values, vert = True, showmeans = 'SHOW_MEAN' in os.environ, showmedians = 'SHOW_MEAN' not in os.environ, showextrema = True) else: plot.boxplot(values, vert = True, notch = 'NOTCH_STYLE' in os.environ, patch_artist = 'PATCH_ARTIST' in os.environ) if 'SHOW_MEAN' in os.environ: for i in range(len(values)): mean = sum(values[i]) / len(values[i]) plot.plot([i + 0.75, i + 1.25], [mean, mean]); if labels is not None: plot.setp(fig.axes, xticks = [x + 1 for x in range(len(values))], xticklabels = labels)
def main(): out = '../analysis_output/' pickle_folder = '../pickles' pickle_folders_to_load = [f for f in os.listdir(pickle_folder) if os.path.isdir(join(pickle_folder, f))] pickle_folders_to_load = sorted(pickle_folders_to_load) sdr_dict = {} coefs = {} sdrs = [] exclude = ['noise_', 'speed', '0.0'] include = ['noise_', '0.0'] noise_time = 'noise_time' speed = 'speed' stft_swap = 'stft_swap' sdr_by_noise_type = {noise_time: {}, speed: {}, stft_swap: {}} coefs_by_noise_type = {noise_time: set(), speed: set(), stft_swap: set()} sdr_type = 'foreground' perfect_coef = 0.0 first = True for folder in pickle_folders_to_load: beat_spec_name = join(pickle_folder, folder, folder + '__beat_spec.pick') sdrs_name = join(pickle_folder, folder, folder + '__sdrs.pick') # beat_spec = pickle.load(open(beat_spec_name, 'rb')) # # dct = scipy.fftpack.dct(beat_spec) # if any([folder.find(e) != -1 for e in exclude]): # continue coef = float(folder.split('_')[-1]) sdr_vals = pickle.load(open(sdrs_name, 'rb')) cur_name = folder.split('_')[0] + '__' + folder.split('_')[2] if coef == perfect_coef: if cur_name not in sdr_by_noise_type[noise_time]: sdr_by_noise_type[noise_time][cur_name] = [] if cur_name not in sdr_by_noise_type[speed]: sdr_by_noise_type[speed][cur_name] = [] if cur_name not in sdr_by_noise_type[stft_swap]: sdr_by_noise_type[stft_swap][cur_name] = [] perfect_sdr = sdr_vals[sdr_type][0] # sdr_by_noise_type[noise_time][cur_name].append(sdr_vals[sdr_type][0] - perfect_sdr) # sdr_by_noise_type[speed][cur_name].append(sdr_vals[sdr_type][0] - perfect_sdr) # sdr_by_noise_type[stft_swap][cur_name].append(sdr_vals[sdr_type][0] - perfect_sdr) continue if folder.find(noise_time) != -1: sdr_by_noise_type[noise_time][cur_name].append(sdr_vals[sdr_type][0] - perfect_sdr) coefs_by_noise_type[noise_time].add(coef) elif folder.find(speed) != -1: sdr_by_noise_type[speed][cur_name].append(sdr_vals[sdr_type][0] - perfect_sdr) coefs_by_noise_type[speed].add(coef) else: sdr_by_noise_type[stft_swap][cur_name].append(sdr_vals[sdr_type][0] - perfect_sdr) coefs_by_noise_type[stft_swap].add(coef) for noise_type, coefs in sdr_by_noise_type.iteritems(): # sorted_sdrs = sorted(coefs.items(), key=operator.itemgetter(0)) sdr_values = zip(*[diffs for diffs in coefs.values()]) sdr_coefs = sorted(coefs_by_noise_type[noise_type]) sdr_coefs.insert(0, '') plt.close('all') # plt.yscale('log') plt.violinplot(sdr_values, showmeans=False, showmedians=True) plt.xticks(range(len(sdr_values)+1), sdr_coefs, rotation='vertical') plt.title('repetition perturbation coef vs. foreground SDR {}'.format(noise_type)) plt.xlabel('perturbation coefficient') plt.ylabel('SDR (dB)') plt.subplots_adjust(bottom=0.15) plt.savefig(out + 'bigger_dataset_sdr_foreground_diff_{}.png'.format(noise_type))
def set_violin_color_label(vplot, color, label): for p in vplot['bodies']: p.set_color(color) return mpatch.Patch(color=color, label=label) # Function application analysis on casts fn_app_casts_data = np.genfromtxt('partially-typed-function-app-results-coercions=15.txt' , dtype=(int, int, int, float, float, float, float) , names="casts, types, coercions, trun, titer, crun, citer" , usecols=("casts, titer, citer")) # Twosomes function application analysis by casts tdata_pos, tdata_times = sort_for_violin_plot(fn_app_casts_data, key="casts", value="titer") tdata_vplt = plt.violinplot(tdata_times , positions=tdata_pos , widths=0.5 , showextrema=False , showmedians=False , showmeans=False) tdata_vplt_fake = set_violin_color_label(tdata_vplt, "blue", "Type-Based Casts") # Coercions cast introduction analysis cdata_pos, cdata_times = sort_for_violin_plot(fn_app_casts_data, key='casts', value='citer') cdata_vplt = plt.violinplot(cdata_times , positions=cdata_pos , widths=0.6 , showextrema=False , showmedians=False , showmeans=False) cdata_vplt_fake = set_violin_color_label(cdata_vplt, "green", "Coercions") plt.axis([-1, max(cdata_pos) + 1, -10, 1600])
ACTUALDATA = [[] for _ in range(NumberOfValues)] for ii in range(len(Y_test)): predic = Y_predict[ii] test = Y_test[ii] DATA[MICDIC[test]].append(predic) ACTUALDATA[MICDIC[test]].append(test) ACCclass = [] NumberDataClass = [] for ii in range(NumberOfValues): ACCclass.append(accuracy_dilute(DATA[ii], ACTUALDATA[ii])) NumberDataClass.append(len(DATA[ii])) plt.violinplot(DATA, showmeans=True, showextrema=True, positions=ActualValues) plt.plot(x, x, color='g') Z1 = np.polyfit(Y_test, Y_predict, 1) P1 = np.poly1d(Z1) R2 = metrics.r2_score(Y_predict, P1(Y_test)) plt.plot(Y_test, P1(Y_test), color='y') plt.plot(x, x - 1, color='r') plt.plot(x, x + 1, color='r') plt.xlabel('Actual value (log2 scale)') plt.ylabel('Predicted value (log2 scale)') plt.title('. Predicted vs actual values. Accuracy:'+\ str(np.round(accuracy_dilute(Y_predict,Y_test),4))) plt.legend([ 'Perfect prediction',
plt.xticks(fontsize=18) plt.yticks(fontsize=18) plt.legend(fontsize=18, loc='upper right') plt.savefig("../../plots/uubAllBinCenterCalibCh"+pmtId+".png", dpi=100) #plt.ylim(-15,55) #plt.savefig("../../plots/uubAllBinCenterCalibZoom"+pmtId+".png", dpi=100) plt.clf() print("uubAllBinCenterCalib OK") ''' # ===================== # *** For Area/Peak *** # ============================= # *** For Chis distribution *** ''' # *** HBase *** xbl = [] ybl = [] distChis = [] for st in range(0, 19): tmp = 0 ybl = [] for evt in range(0, histos.GetEntries()): histos.GetEntry(evt) tmp = histos.chisHbasePk.GetBinContent(st) if tmp > 0: ybl.append( tmp ) if len(ybl) > 0: distChis.append( ybl )
def figure_2_1(): plt.violinplot(dataset=np.random.randn(200, 10) + np.random.randn(10)) plt.xlabel("Action") plt.ylabel("Reward distribution") plt.savefig('../images/figure_2_1.png') plt.close()
cdict = {'full': '#003366', 'res':'#900000'} for ttype in ['full', 'res']: v_means = [] for fol in range(1,4): vals = [] for cell in cells: data = pickle.load(open(folder[fol]+'rgrExpr_'+ttype+'_'+cell+'_1.0.pkl', 'rb')) vals.append(data['r_value']) v_means.append(np.array(vals)) if ttype=='full': pos = np.array([0.5, 2.0, 3.5]) else: pos = np.array([1.0, 2.5, 4.0]) violin = plt.violinplot(v_means, pos, widths=0.3, showmeans=True, showextrema=False) plt.setp(violin['bodies'], facecolor=cdict[ttype], edgecolor=cdict[ttype]) for key in ['cmeans']: plt.setp(violin[key], color=cdict[ttype], linewidth='3', alpha=0.5) blue_patch = mpatches.Patch(color='#003366', label='Full data') red_patch = mpatches.Patch(color='#900000', label='Residuals') plt.legend(handles=[blue_patch, red_patch]) plt.xticks([0.75, 2.25, 3.75], ['Binary', 'ChromStates', 'Signals'], fontsize=16, color='k') plt.yticks(np.arange(3, 11)/10.0, fontsize=16, color='k') plt.ylabel('Pearson R', fontsize=18, color='k') plt.title('Expression', fontsize=18) plt.savefig('figures/r2Expr.png', bbox_inches='tight') plt.figure()
def main(): # your favourite units here energy_unit = u.TeV angle_unit = u.deg dist_unit = u.m parser = make_argparser() parser.add_argument('-o', '--outfile', type=str, help="if given, write output file with reconstruction results") parser.add_argument('--plot_c', action='store_true', help="plot camera-wise displays") group = parser.add_mutually_exclusive_group() group.add_argument('--proton', action='store_true', help="do protons instead of gammas") group.add_argument('--electron', action='store_true', help="do electrons instead of gammas") args = parser.parse_args() if args.infile_list: filenamelist = [] for f in args.infile_list: filenamelist += glob("{}/{}".format(args.indir, f)) elif args.proton: filenamelist = glob("{}/proton/*gz".format(args.indir)) channel = "proton" elif args.electron: filenamelist = glob("{}/electron/*gz".format(args.indir)) channel = "electron" elif args.gamma: filenamelist = glob("{}/gamma/*gz".format(args.indir)) channel = "gamma" else: raise ValueError("don't know which input to use...") filenamelist.sort() if not filenamelist: print("no files found; check indir: {}".format(args.indir)) exit(-1) else: print("found {} files".format(len(filenamelist))) tel_phi = {} tel_theta = {} # keeping track of events and where they were rejected Eventcutflow = CutFlow("EventCutFlow") Imagecutflow = CutFlow("ImageCutFlow") # takes care of image cleaning cleaner = ImageCleaner(mode=args.mode, cutflow=Imagecutflow, wavelet_options=args.raw, skip_edge_events=args.skip_edge_events, island_cleaning=True) # the class that does the shower reconstruction shower_reco = HillasReconstructor() shower_max_estimator = ShowerMaxEstimator("paranal") preper = EventPreparer(cleaner=cleaner, hillas_parameters=hillas_parameters, shower_reco=shower_reco, event_cutflow=Eventcutflow, image_cutflow=Imagecutflow, # event/image cuts: allowed_cam_ids=[], # means: all min_ntel=3, min_charge=args.min_charge, min_pixel=3) # a signal handler to abort the event loop but still do the post-processing signal_handler = SignalHandler() signal.signal(signal.SIGINT, signal_handler) try: # this class defines the reconstruction parameters to keep track of class RecoEvent(tb.IsDescription): NTels_trigg = tb.Int16Col(dflt=1, pos=0) NTels_clean = tb.Int16Col(dflt=1, pos=1) EnMC = tb.Float32Col(dflt=1, pos=2) xi = tb.Float32Col(dflt=1, pos=3) DeltaR = tb.Float32Col(dflt=1, pos=4) ErrEstPos = tb.Float32Col(dflt=1, pos=5) ErrEstDir = tb.Float32Col(dflt=1, pos=6) h_max = tb.Float32Col(dflt=1, pos=7) reco_outfile = tb.open_file( args.outfile, mode="w", # if we don't want to write the event list to disk, need to add more arguments **({} if args.store else {"driver": "H5FD_CORE", "driver_core_backing_store": False})) reco_table = reco_outfile.create_table("/", "reco_event", RecoEvent) reco_event = reco_table.row except: reco_event = RecoEvent() print("no pytables installed?") # ## ####### ####### ######## # ## ## ## ## ## ## ## # ## ## ## ## ## ## ## # ## ## ## ## ## ######## # ## ## ## ## ## ## # ## ## ## ## ## ## # ######## ####### ####### ## cam_id_map = {} # define here which telescopes to loop over allowed_tels = None # allowed_tels = prod3b_tel_ids("L+F+D") for i, filename in enumerate(filenamelist[:args.last]): print("file: {i} filename = {filename}".format(i=i, filename=filename)) source = hessio_event_source(filename, allowed_tels=allowed_tels, max_events=args.max_events) # loop that cleans and parametrises the images and performs the reconstruction for (event, hillas_dict, n_tels, tot_signal, max_signal, pos_fit, dir_fit, h_max, err_est_pos, err_est_dir) in preper.prepare_event(source): shower = event.mc org_alt = u.Quantity(shower.alt).to(u.deg) org_az = u.Quantity(shower.az).to(u.deg) if org_az > 180 * u.deg: org_az -= 360 * u.deg org_the = alt_to_theta(org_alt) org_phi = az_to_phi(org_az) if org_phi > 180 * u.deg: org_phi -= 360 * u.deg if org_phi < -180 * u.deg: org_phi += 360 * u.deg shower_org = linalg.set_phi_theta(org_phi, org_the) shower_core = convert_astropy_array([shower.core_x, shower.core_y]) xi = linalg.angle(dir_fit, shower_org).to(angle_unit) diff = linalg.length(pos_fit[:2] - shower_core) # print some performance print() print("xi = {:4.3f}".format(xi)) print("pos = {:4.3f}".format(diff)) print("h_max reco: {:4.3f}".format(h_max.to(u.km))) print("err_est_dir: {:4.3f}".format(err_est_dir.to(angle_unit))) print("err_est_pos: {:4.3f}".format(err_est_pos)) try: # store the reconstruction data in the PyTable reco_event["NTels_trigg"] = n_tels["tot"] reco_event["NTels_clean"] = len(shower_reco.circles) reco_event["EnMC"] = event.mc.energy / energy_unit reco_event["xi"] = xi / angle_unit reco_event["DeltaR"] = diff / dist_unit reco_event["ErrEstPos"] = err_est_pos / dist_unit reco_event["ErrEstDir"] = err_est_dir / angle_unit reco_event["h_max"] = h_max / dist_unit reco_event.append() reco_table.flush() print() print("xi res (68-percentile) = {:4.3f} {}" .format(np.percentile(reco_table.cols.xi, 68), angle_unit)) print("core res (68-percentile) = {:4.3f} {}" .format(np.percentile(reco_table.cols.DeltaR, 68), dist_unit)) print("h_max (median) = {:4.3f} {}" .format(np.percentile(reco_table.cols.h_max, 50), dist_unit)) except NoPyTables: pass if args.plot_c: from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.gca(projection='3d') for c in shower_reco.circles.values(): points = [c.pos + t * c.a * u.km for t in np.linspace(0, 15, 3)] ax.plot(*np.array(points).T, linewidth=np.sqrt(c.weight) / 10) ax.scatter(*c.pos[:, None].value, s=np.sqrt(c.weight)) plt.xlabel("x") plt.ylabel("y") plt.pause(.1) # this plots # • the MC shower core # • the reconstructed shower core # • the used telescopes # • and the trace of the Hillas plane on the ground plt.figure() for tel_id, c in shower_reco.circles.items(): plt.scatter(c.pos[0], c.pos[1], s=np.sqrt(c.weight)) plt.gca().annotate(tel_id, (c.pos[0].value, c.pos[1].value)) plt.plot([c.pos[0].value-500*c.norm[1], c.pos[0].value+500*c.norm[1]], [c.pos[1].value+500*c.norm[0], c.pos[1].value-500*c.norm[0]], linewidth=np.sqrt(c.weight)/10) plt.scatter(*pos_fit[:2], c="black", marker="*", label="fitted") plt.scatter(*shower_core[:2], c="black", marker="P", label="MC") plt.legend() plt.xlabel("x") plt.ylabel("y") plt.xlim(-1400, 1400) plt.ylim(-1400, 1400) plt.show() if signal_handler.stop: break if signal_handler.stop: break print("\n" + "="*35 + "\n") print("xi res (68-percentile) = {:4.3f} {}" .format(np.percentile(reco_table.cols.xi, 68), angle_unit)) print("core res (68-percentile) = {:4.3f} {}" .format(np.percentile(reco_table.cols.DeltaR, 68), dist_unit)) print("h_max (median) = {:4.3f} {}" .format(np.percentile(reco_table.cols.h_max, 50), dist_unit)) # print the cutflows for telescopes and camera images print("\n\n") Eventcutflow("min2Tels trig") print() Imagecutflow(sort_column=1) # if we don't want to plot anything, we can exit now if not args.plot: return # ######## ## ####### ######## ###### # ## ## ## ## ## ## ## ## # ## ## ## ## ## ## ## # ######## ## ## ## ## ###### # ## ## ## ## ## ## # ## ## ## ## ## ## ## # ## ######## ####### ## ###### plt.figure() plt.hist(reco_table.cols.h_max, bins=np.linspace(000, 15000, 51, True)) plt.title(channel) plt.xlabel("h_max reco") plt.pause(.1) figure = plt.figure() xi_edges = np.linspace(0, 5, 20) plt.hist(reco_table.cols.xi, bins=xi_edges, log=True) plt.xlabel(r"$\xi$ / deg") if args.write: save_fig('{}/reco_xi_{}'.format(args.plots_dir, args.mode)) plt.pause(.1) plt.figure() plt.hist(reco_table.cols.ErrEstDir[:], bins=np.linspace(0, 20, 50)) plt.title(channel) plt.xlabel("beta") plt.pause(.1) plt.figure() plt.hist(np.log10(reco_table.cols.xi[:] / reco_table.cols.ErrEstDir[:]), bins=50) plt.title(channel) plt.xlabel("log_10(xi / beta)") plt.pause(.1) # convert the xi-list into a dict with the number of used telescopes as keys xi_vs_tel = {} for xi, ntel in zip(reco_table.cols.xi, reco_table.cols.NTels_clean): if ntel not in xi_vs_tel: xi_vs_tel[ntel] = [xi] else: xi_vs_tel[ntel].append(xi) print(args.mode) for ntel, xis in sorted(xi_vs_tel.items()): print("NTel: {} -- median xi: {}".format(ntel, np.median(xis))) # print("histogram:", np.histogram(xis, bins=xi_edges)) # create a list of energy bin-edges and -centres for violin plots Energy_edges = np.linspace(2, 8, 13) Energy_centres = (Energy_edges[1:]+Energy_edges[:-1])/2. # convert the xi-list in to an energy-binned dict with the bin centre as keys xi_vs_energy = {} for en, xi in zip(reco_table.cols.EnMC, reco_table.cols.xi): # get the bin number this event belongs into sbin = np.digitize(np.log10(en), Energy_edges)-1 # the central value of the bin is the key for the dictionary if Energy_centres[sbin] not in xi_vs_energy: xi_vs_energy[Energy_centres[sbin]] = [xi] else: xi_vs_energy[Energy_centres[sbin]] += [xi] # plotting the angular error as violin plots with binning in # number of telescopes and shower energy figure = plt.figure() plt.subplot(211) plt.violinplot([np.log10(a) for a in xi_vs_tel.values()], [a for a in xi_vs_tel.keys()], points=60, widths=.75, showextrema=False, showmedians=True) plt.xlabel("Number of Telescopes") plt.ylabel(r"log($\xi$ / deg)") plt.ylim(-3, 2) plt.grid() plt.subplot(212) plt.violinplot([np.log10(a) for a in xi_vs_energy.values()], [a for a in xi_vs_energy.keys()], points=60, widths=(Energy_edges[1] - Energy_edges[0]) / 1.5, showextrema=False, showmedians=True) plt.xlabel(r"log(Energy / GeV)") plt.ylabel(r"log($\xi$ / deg)") plt.ylim(-3, 2) plt.grid() plt.tight_layout() if args.write: save_fig('{}/reco_xi_vs_E_NTel_{}'.format(args.plots_dir, args.mode)) plt.pause(.1) # convert the diffs-list into a dict with the number of used telescopes as keys diff_vs_tel = {} for diff, ntel in zip(reco_table.cols.DeltaR, reco_table.cols.NTels_clean): if ntel not in diff_vs_tel: diff_vs_tel[ntel] = [diff] else: diff_vs_tel[ntel].append(diff) # convert the diffs-list in to an energy-binned dict with the bin centre as keys diff_vs_energy = {} for en, diff in zip(reco_table.cols.EnMC, reco_table.cols.DeltaR): # get the bin number this event belongs into sbin = np.digitize(np.log10(en), Energy_edges) - 1 # the central value of the bin is the key for the dictionary if Energy_centres[sbin] not in diff_vs_energy: diff_vs_energy[Energy_centres[sbin]] = [diff] else: diff_vs_energy[Energy_centres[sbin]] += [diff] # plotting the core position error as violin plots with binning in # number of telescopes an shower energy plt.figure() plt.subplot(211) plt.violinplot([np.log10(a) for a in diff_vs_tel.values()], [a for a in diff_vs_tel.keys()], points=60, widths=.75, showextrema=False, showmedians=True) plt.xlabel("Number of Telescopes") plt.ylabel(r"log($\Delta R$ / m)") plt.grid() plt.subplot(212) plt.violinplot([np.log10(a) for a in diff_vs_energy.values()], [a for a in diff_vs_energy.keys()], points=60, widths=(Energy_edges[1] - Energy_edges[0]) / 1.5, showextrema=False, showmedians=True) plt.xlabel(r"log(Energy / GeV)") plt.ylabel(r"log($\Delta R$ / m)") plt.grid() plt.tight_layout() if args.write: save_fig('{}/reco_dist_vs_E_NTel_{}'.format(args.plots_dir, args.mode)) plt.show()
plt.figure() plt.boxplot([np.log10(x) for x in MSE]) ax = plt.gca() ax.set_yticks(np.arange(-7, 0)) ax.set_yticklabels(10.0**np.arange(-7, 0)) ax.set_xticklabels(datasets) plt.grid() plt.xlabel("Dataset", fontsize=11) plt.xticks(rotation=45, fontsize=11) plt.ylabel("MSE", fontsize=11) plt.yticks(fontsize=11) plt.savefig('./../imgs/boxplot.pdf', format='pdf', bbox_inches='tight') plt.figure() plt.violinplot([np.log10(x) for x in MSE], showmeans=False, showmedians=True) ax = plt.gca() ax.set_yticks(np.arange(-7, 0)) ax.set_yticklabels(10.0**np.arange(-7, 0)) plt.setp(ax, xticks=[idx + 1 for idx in range(len(MSE))], xticklabels=datasets) plt.grid() plt.xlabel("Dataset", fontsize=11) plt.xticks(rotation=45, fontsize=11) plt.ylabel("MSE", fontsize=11) plt.yticks(fontsize=11) plt.savefig('./../imgs/violinplot.pdf', format='pdf', bbox_inches='tight') # 5) plot the PSD
### Add text #axHistx.text(5.05,0.05,r'*LOWESS Smoothing',fontsize=8) plt.savefig(directoryfigure + 'scattertest2.png',dpi=800) ########################################################################### ########################################################################### ########################################################################### ########################################################################### fig = plt.figure() axb = fig.add_subplot(312) datai = [varx[mask],vary[mask]] vp = plt.violinplot(datai,showmeans=True,showmedians=False,vert=False,widths=0.6) axb.spines['top'].set_color('none') axb.spines['right'].set_color('none') axb.spines['left'].set_color('none') axb.spines['bottom'].set_color('none') axb.xaxis.set_ticks_position('bottom') axb.tick_params(left='off',right='off',bottom='off') plt.setp(axb,xticks=[]) axb.set_aspect(1.9) plt.setp(axb,yticks=[y+1 for y in range(len(datai))], yticklabels=['PIOMAS','ICESat-J']) for i in vp['bodies']:
Xv.append([x[x.columns[2]]]) Xv.append([x[x.columns[3]]]) Xv.append([x[x.columns[4]]]) Xv.append([x[x.columns[5]]]) Xv.append([x[x.columns[6]]]) Xv.append([x[x.columns[7]]]) Xv.append([x[x.columns[8]]]) Xv.append([x[x.columns[9]]]) Xv.append([x[x.columns[10]]]) Xv.append([x[x.columns[11]]]) pos = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] plt.violinplot(Xv, pos, points=40, widths=0.5, showmeans=True, showextrema=True, showmedians=True, bw_method='silverman') plt.title('Violin plot of Timbre Average', fontsize=10) y.describe() y.mode() # fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(6, 6)) # # axes[0, 1].violinplot(x[1:], pos, points=40, widths=0.5, # showmeans=True, showextrema=True, showmedians=True, # bw_method='silverman') # axes[0, 1].set_title('Violin plot of Timbre average', fontsize=10) #
def conf_int(data, confidence=0.95): a = 1.0 * np.array(data) n = len(a) m, se = np.mean(a), scipy.stats.sem(a) if (n < 2) or (se == 0): return np.nan h = se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1) return h ################################################################ ## Line Plot RT-Prio (n, n) scaling ################################################################ d = (pd.read_csv('../perf-data/dur.csv', )) fig, axes = plt.subplots(1, 1) print(d['time']) d.sort_values(["start", "pipe"], axis=0, ascending=True, inplace=True) print(d) d = d.reset_index() print(d) plt.violinplot(d['time']) plt.show() # fig.savefig('dur-violin.pdf')
elif 'right' in file_names[i][j].lower() or 'rh' in file_names[i][j].lower(): df = pd.read_csv(file_names[i][j]) means = df.mean(axis = 0) #print (xticklabels[k]) #print (violin_labels[2 * i - 1], means[k]) # On a separate row and with a separate column iterator, fill in data single_plot[2 * i - 1].append(means[k]) #Violin plots plt.figure(xticklabels[k]) plt.violinplot(single_plot, showmeans=True) # Plot logistics plt.title(xticklabels[k]) plt.xlabel('Group') plt.ylabel('Observed Values') # Label the x axis more specifically plt.xticks([i+1 for i in range(2 * max_group)], violin_labels) # Print out all the plots plt.show() # Code to read in one csv file and isolate the different types of values
covsTeo, covsNum = np.array(covSuperList).transpose((1, 2, 0, 3, 4, 5)).reshape((2, 4, -1, 2, 2)) scaleNum = np.linalg.inv(covsNum) # saco la norma de frobenius de cada matriz covSuperFrob = np.linalg.norm(covSuperList, axis=(4, 5)) # %% p = 2 matt = np.eye(p) np.exp(-p/2) / spe.gamma(N/2) / 2**(N*p/2) / np.linalg.det(matt)**(p/2+0.5) sts.wishart.pdf(matt, df=N-1, scale=matt) # %% rv = sts.wishart() rv.pdf() frobQuotList = (covSuperFrob[:, 0] / covSuperFrob[:,1]).transpose((1, 0, 2)).reshape((4, -1)) plt.plot(frobQuotList) plt.hist(frobQuotList[3]) plt.violinplot(frobQuotList.T, showmeans=True, showextrema=False)
def boxplot_embedding_models(): sns.set_context("paper", font_scale=2.6) # Embedder with Discriminator # SDR mcos_disc_sdr = np.load('results/eval_res/r-mcos-disc_sdr.npy') mcos_disc_sdr.shape = (mcos_disc_sdr.shape[0], 1) # Masking SDR mcos_disc_mask_sdr = np.load('results/eval_res/r-mcos-disc_mask_sdr.npy') mcos_disc_mask_sdr.shape = (mcos_disc_mask_sdr.shape[0], 1) # Embedding SDR mcos_disc_lat_sdr = np.load('results/eval_res/r-mcos-disc_lat_sdr.npy') mcos_disc_lat_sdr.shape = (mcos_disc_lat_sdr.shape[0], 1) # ADT mcos_disc_adt = np.load('results/eval_res/r-mcos-disc_adt.npy') mcos_disc_adt.shape = (mcos_disc_adt.shape[0], 1) # EMBD-ADT mcos_disc_embd_adt = np.load('results/eval_res/r-mcos-disc_lat_adt.npy') mcos_disc_embd_adt.shape = (mcos_disc_embd_adt.shape[0], 1) # Embedder with Additivity loss # SDR mcos_embd_sdr = np.load('results/eval_res/r-mcos-embd_sdr.npy') mcos_embd_sdr.shape = (mcos_embd_sdr.shape[0], 1) # Masking SDR mcos_embd_mask_sdr = np.load('results/eval_res/r-mcos-embd_mask_sdr.npy') mcos_embd_mask_sdr.shape = (mcos_embd_mask_sdr.shape[0], 1) # Embedding SDR mcos_embd_lat_sdr = np.load('results/eval_res/r-mcos-embd_lat_sdr.npy') mcos_embd_lat_sdr.shape = (mcos_embd_lat_sdr.shape[0], 1) # ADT mcos_embd_adt = np.load('results/eval_res/r-mcos-embd_adt.npy') mcos_embd_adt.shape = (mcos_embd_adt.shape[0], 1) # EMBD-ADT mcos_embd_lat_adt = np.load('results/eval_res/r-mcos-embd_lat_adt.npy') mcos_embd_lat_adt.shape = (mcos_embd_lat_adt.shape[0], 1) # Embedder that makes singing voice louder # SDR mcos_embd_loud_sdr = np.load('results/eval_res/r-mcos-embd-loud_sdr.npy') mcos_embd_loud_sdr.shape = (mcos_embd_loud_sdr.shape[0], 1) # Masking SDR mcos_embd_loud_mask_sdr = np.load('results/eval_res/r-mcos-embd-loud_mask_sdr.npy') mcos_embd_loud_mask_sdr.shape = (mcos_embd_mask_sdr.shape[0], 1) # Embedding SDR mcos_embd_loud_lat_sdr = np.load('results/eval_res/r-mcos-embd-loud_lat_sdr.npy') mcos_embd_loud_lat_sdr.shape = (mcos_embd_loud_lat_sdr.shape[0], 1) # ADT mcos_embd_loud_adt = np.load('results/eval_res/r-mcos-embd-loud_adt.npy') mcos_embd_loud_adt.shape = (mcos_embd_loud_adt.shape[0], 1) # EMBD-ADT mcos_embd_loud_lat_adt = np.load('results/eval_res/r-mcos-embd-loud_lat_adt.npy') mcos_embd_loud_lat_adt.shape = (mcos_embd_loud_lat_adt.shape[0], 1) # SDR plot si_sdr_res = np.hstack((mcos_disc_sdr, mcos_disc_mask_sdr, mcos_disc_lat_sdr, mcos_embd_sdr, mcos_embd_mask_sdr, mcos_embd_lat_sdr, mcos_embd_loud_sdr, mcos_embd_loud_mask_sdr, mcos_embd_loud_lat_sdr)) plt.violinplot(si_sdr_res, showmeans=False, showextrema=False, showmedians=True) plt.xticks([1, 2, 3, 4, 5, 6, 7, 8, 9], ['nnMCS-Disc.', 'nnMCS-Disc:Mask', 'nnMCS-Disc:Lat', 'nnMCS-Embd.', 'nnMCS-Embd:Mask', 'nnMCS-Embd:Lat', 'nnMCS-Loud.', 'nnMCS-Loud:Mask', 'nnMCS-Loud:Lat'], fontsize=9) plt.ylabel('SI-SDR (dB)') plt.ylim(4, 37.3) med_vals = np.round(np.median(si_sdr_res, axis=0), 1) plt.annotate(str(med_vals[0]), xy=(1, med_vals[0] + 1), xytext=(0.95, med_vals[0] + 0.3)) plt.annotate(str(med_vals[1]), xy=(2, med_vals[1] + 1), xytext=(1.95, med_vals[1] + 0.3)) plt.annotate(str(med_vals[2]), xy=(3, med_vals[2] + 1), xytext=(2.95, med_vals[2] + 0.3)) plt.annotate(str(med_vals[3]), xy=(4, med_vals[3] + 1), xytext=(3.95, med_vals[3] + 0.3)) plt.annotate(str(med_vals[4]), xy=(5, med_vals[4] + 1), xytext=(4.95, med_vals[4] + 0.3)) plt.annotate(str(med_vals[5]), xy=(6, med_vals[5] + 1), xytext=(5.95, med_vals[5] + 0.3)) plt.annotate(str(med_vals[6]), xy=(7, med_vals[6] + 1), xytext=(6.95, med_vals[6] + 0.3)) plt.annotate(str(med_vals[7]), xy=(8, med_vals[7] + 1), xytext=(7.95, med_vals[7] + 0.3)) plt.annotate(str(med_vals[8]), xy=(9, med_vals[8] + 1), xytext=(8.95, med_vals[8] + 0.3)) # ADT plot plt.figure() adt_res = np.hstack((mcos_disc_adt, mcos_disc_embd_adt, mcos_embd_adt, mcos_embd_lat_adt, mcos_embd_loud_adt, mcos_embd_loud_lat_adt)) plt.violinplot(adt_res, showmeans=False, showextrema=False, showmedians=True) plt.xticks([1, 2, 3, 4, 5, 6], ['nnMCS-Disc.', 'nnMCS-Disc:Lat', 'nnMCS-Embd.', 'nnMCS-Embd:Lat', 'nnMCS-Loud.', 'nnMCS-Loud:Lat'], fontsize=9) plt.ylabel(r'$L_1$ Error') plt.ylim(0, 0.5) med_vals = np.round(np.median(adt_res, axis=0), 2) plt.annotate(str(med_vals[0]), xy=(1, med_vals[0] + 0.001), xytext=(0.95, med_vals[0] + 0.005)) plt.annotate(str(med_vals[1]), xy=(2, med_vals[1] + 0.001), xytext=(1.95, med_vals[1] + 0.005)) plt.annotate(str(med_vals[2]), xy=(3, med_vals[2] + 0.001), xytext=(2.95, med_vals[2] + 0.005)) plt.annotate(str(med_vals[3]), xy=(4, med_vals[3] + 0.001), xytext=(3.95, med_vals[3] + 0.005)) plt.annotate(str(med_vals[4]), xy=(5, med_vals[4] + 0.001), xytext=(4.95, med_vals[4] + 0.005)) plt.annotate(str(med_vals[5]), xy=(6, med_vals[5] + 0.001), xytext=(5.95, med_vals[5] + 0.005)) plt.show()
def createPlots(c1, c2, data, label, colour, x_axis, y_axis, point_size, name, axis, grid_linestyle, mode, grid_colour=None): # Load the data x = data[c1] y = data[c2] # Arrange the data x_axis = np.arange(x_axis[0], x_axis[1], x_axis[2]) y_axis = np.arange(y_axis[0], y_axis[1], y_axis[2]) # Graph with points if mode is PTN: plt.plot(x, y, 'o', label=label, markersize=np.sqrt(point_size[0]), color=colour) # Line graph with discontinuous lines elif mode is LN_DISC: plt.plot(x, y, label=label, marker='.', markersize=np.sqrt(point_size[1]), color=colour, linestyle=':') plt.fill_between(x, y, alpha=0.4, color=colour) elif mode is LN_MEDIAN: grid_col = colour[0] graph_col = colour[1] plt.plot(x, y, label=label, marker='.', markersize=np.sqrt(point_size[1]), color=grid_col, linestyle=':') plt.fill_between(x, y, alpha=0.2, color=grid_col) for idx, x_i in enumerate(x): # Vertical line plt.plot((x_i, x_i), (0, y[idx]), alpha=0.7, color=graph_col[idx], linestyle="dashed") # Horizontal line plt.plot((0, x_i), (y[idx], y[idx]), alpha=0.7, color=graph_col[idx], linestyle="dashed") # Point plt.plot(x_i, y[idx], 'o', markersize=3, alpha=0.7, color=graph_col[idx]) # Set colour variable to set the grid colour = grid_col elif mode is VLN: violin_parts = plt.violinplot(y, [x[1]], points=100, widths=4, showmeans=True, showextrema=True, showmedians=True, bw_method=0.5) for part in ('cbars', 'cmins', 'cmaxes', 'cmeans', 'cmedians'): vp = violin_parts[part] vp.set_color(colour) vp.set_linewidth(1) for part in violin_parts['bodies']: part.set_color(colour) part.set_alpha(0.3) # Incorrect plot mode else: print("ERROR: Wrong plot mode") return -1 # Set the ranges for the plt.xlim([x_axis[0], x_axis[len(x_axis) - 1]]) plt.ylim([y_axis[0], y_axis[len(y_axis) - 1]]) plt.xticks(x_axis) plt.yticks(y_axis) # Set the name to the graph and the names for the axis plt.title(name) plt.xlabel(axis[0]) plt.ylabel(axis[1]) # Set the colour to the grid if grid_colour is None: plt.grid(True, color=colour, alpha=0.3, linestyle=grid_linestyle) else: plt.grid(True, color=grid_colour, linestyle=grid_linestyle)
def plotpvalueofChi2IndependenceTest(densities, functionName, c=0.0, out=False): numDensities = len(densities) numIterations = 0 for i, (setting, stats) in enumerate(densities.items()): numIterations = max(numIterations, len(stats)) data = np.zeros((numIterations, 2 * numDensities)) names = [None] * data.shape[1] i = 0 for i, setting in enumerate( ["kde_gaussian", "kde_epanechnikov", "sgde_zero", "sgde_boundaries"]): stats = densities[setting] if "sgde" in setting: if "zero" in setting: names[2 * i] = "SGDE \n set-to-zero \n shuffled" names[2 * i + 1] = "SGDE \n set-to-zero \n not shuffled" else: names[2 * i] = "SGDE \n interp. bound. \n shuffled" names[2 * i + 1] = "SGDE \n interp. bound. \n not shuffled" elif "nataf" in setting: names[2 * i] = "Nataf \n shuffled" names[2 * i + 1] = "Nataf \n not shuffled" elif "gaussian" in setting: names[2 * i] = "KDE \n Gaussian \n shuffled" names[2 * i + 1] = "KDE \n Gaussian \n not shuffled" elif "epanechnikov" in setting: names[2 * i] = "KDE \n Epan. \n shuffled" names[2 * i + 1] = "KDE \n Epan. \n not shuffled" for j, values in enumerate(stats.values()): numDims = values["config"]["numDims"] # apply the chi 2 test bins = np.linspace(0, 1, 10) samples = values["samples"]["shuffled"]["uniform_validation"] inner_samples = np.array([]) for sample in samples: if c < sample[0] < 1 - c and c < sample[1] < 1 - c: inner_samples = np.append(inner_samples, sample) inner_samples = inner_samples.reshape((inner_samples.size // 2), 2) h0 = np.histogram2d(inner_samples[:, 0], inner_samples[:, 1], bins=bins)[0][2:-2, 2:-2] pvalue_shuffled = chi2_contingency(h0)[1] if False and j == 0: plt.figure() plt.scatter(inner_samples[:, 0], inner_samples[:, 1]) plt.figure() plt.hist2d(inner_samples[:, 0], inner_samples[:, 1], bins=20) plt.colorbar() plt.title("%s shuffled, %g" % (setting.replace("_", " "), pvalue_shuffled)) samples = values["samples"]["not_shuffled"]["uniform_validation"] inner_samples = np.array([]) for sample in samples: if c < sample[0] < 1 - c and c < sample[1] < 1 - c: inner_samples = np.append(inner_samples, sample) inner_samples = inner_samples.reshape((inner_samples.size // 2), 2) h0 = np.histogram2d(inner_samples[:, 0], inner_samples[:, 1], bins=bins)[0][2:-2, 2:-2] pvalue_not_shuffled = chi2_contingency(h0)[1] if False and j == 0: plt.figure() plt.scatter(inner_samples[:, 0], inner_samples[:, 1]) plt.figure() plt.hist2d(inner_samples[:, 0], inner_samples[:, 1], bins=20) plt.colorbar() plt.title("%s not shuffled, %g" % (setting.replace("_", " "), pvalue_not_shuffled)) plt.show() data[j, 2 * i] = pvalue_shuffled data[j, 2 * i + 1] = pvalue_not_shuffled pos = np.arange(0, len(names)) xlim = (np.min(pos) - 0.5, np.max(pos) + 0.5) fig = plt.figure(figsize=(17, 5)) plt.violinplot(data, pos, points=60, widths=0.7, showmeans=True, showextrema=True, showmedians=True, bw_method=0.5) plt.xticks(pos, names) plt.ylabel("$p$-value") plt.hlines(0.05, xlim[0], xlim[1], linestyle="--") plt.xlim(xlim) if "moons" in functionName: plt.title("$\chi^2$ test", fontproperties=load_font_properties()) else: plt.title("$\chi^2$ test", fontproperties=load_font_properties()) if out: savefig(fig, os.path.join( "plots", "chi_squared_%s_c%i" % (functionName, np.round(c * 100))), tikz=True) plt.close(fig) else: plt.show()
""" y5 = [] x5 = [] start_index = 0 for i in range(len(x_d)): y_add, start_index = (norm_KDE(x_d[i], ordered_spectra, res,start_index)) if y_add > 0: y5.append(y_add) x5.append(x_d[i]) max2 = max(y5)""" #y5 = [y5[x] * hist_max / max2 for x in range(len(y5))] #plt.plot(x5, y5, "black") #plt.vlines(np.mean(mzrange),(hist_max/2)-hist_max*0.05,(hist_max/2)+hist_max*0.05) #plt.vlines([np.mean(mzrange)+theo_width,np.mean(mzrange)-theo_width], (hist_max/2)-hist_max*0.025,(hist_max/2)+hist_max*0.025,color="r",alpha=0.25) #plt.hlines((hist_max/2),np.mean(mzrange)-theo_width,np.mean(mzrange)+theo_width,color="r",alpha=0.25) except: "" #plt.scatter(raw_data,[-0.2]*len(raw_data),marker="+",color="blue",label="raw data") #customlegend = [Line2D([0], [0], color='black', lw=4),Line2D([0], [0], color='blue', lw=4),Line2D([0], [0], color='gray', lw=4),Line2D([0], [0], color='blue', marker="+", lw=0,label='Scatter')] #plt.legend(customlegend, ["Lorentzian KDE bandwidth={0}".format(bandwidth_L_kde),"Sinc KDE bandwidth={0}".format(SinC_bandwidth), "norm KDE bandwidth={0}".format(bandwidth_norm_kde),"Raw Data"]) #customlegend = [Line2D([0], [0], color='black', lw=4),Line2D([0], [0], color='blue', marker="+", lw=0,label='Scatter')] #plt.legend(customlegend, ["norm KDE bandwidth={0}".format(bandwidth_norm_kde),"Raw Data"]) plt.violinplot(data) plt.show()
ratios_nosvase = {} svase_datasets = {} nosvase_datasets = {} for featuretype in ['reg', 'tfbs', 'dnase', 'zld', 'kr', 'med', 'D', 'da', 'twi' ]: svASE_snpcounts, nosvASE_snpcounts = get_datafiles(featuretype) svase_datasets[featuretype] = svASE_snpcounts nosvase_datasets[featuretype] = nosvASE_snpcounts ratios_svase[featuretype] = svASE_snpcounts.rate ratios_nosvase[featuretype] = nosvASE_snpcounts.rate figure() violinplot([svASE_snpcounts.num_snps.dropna(), nosvASE_snpcounts.num_snps.dropna()], showmedians=True, showextrema=False) boxplot([svASE_snpcounts.rate, nosvASE_snpcounts.rate]) ''' if featuretype != 'dnase': scatter(1 + .05 * randn(len(svASE_snpcounts)), svASE_snpcounts.rate) scatter(2 + .05 * randn(len(nosvASE_snpcounts)), nosvASE_snpcounts.rate) ''' xticks([1, 2], ['svASE', 'no svASE']) ymin, ymax = ylim() ylim(-.05 * ymax, ymax) title(featuretype) savefig('analysis/results/snp_rate_{}.png'.format(featuretype))
def summary_plot(shap_values, features=None, feature_names=None, max_display=None, plot_type="dot", color=None, axis_color="#333333", title=None, alpha=1, show=True, sort=True, color_bar=True, auto_size_plot=True, layered_violin_max_num_bins=20, class_names=None, export_path=None, width=8): """Create a SHAP summary plot, colored by feature values when they are provided. Parameters ---------- shap_values : numpy.array Matrix of SHAP values (# samples x # features) features : numpy.array or pandas.DataFrame or list Matrix of feature values (# samples x # features) or a feature_names list as shorthand feature_names : list Names of the features (length # features) max_display : int How many top features to include in the plot (default is 20, or 7 for interaction plots) plot_type : "dot" (default) or "violin" What type of summary plot to produce """ multi_class = False if isinstance(shap_values, list): multi_class = True plot_type = "bar" # only type supported for now else: assert len( shap_values.shape ) != 1, "Summary plots need a matrix of shap_values, not a vector." # default color: if color is None: if plot_type == 'layered_violin': color = "coolwarm" elif multi_class: color = lambda i: colors.red_blue_circle(i / len(shap_values)) else: color = colors.blue_rgb # convert from a DataFrame or other types if str(type(features)) == "<class 'pandas.core.frame.DataFrame'>": if feature_names is None: feature_names = features.columns features = features.values elif isinstance(features, list): if feature_names is None: feature_names = features features = None elif (features is not None) and len( features.shape) == 1 and feature_names is None: feature_names = features features = None num_features = (shap_values[0].shape[1] if multi_class else shap_values.shape[1]) if feature_names is None: feature_names = np.array( [labels['FEATURE'] % str(i) for i in range(num_features)]) # plotting SHAP interaction values if not multi_class and len(shap_values.shape) == 3: if max_display is None: max_display = 7 else: max_display = min(len(feature_names), max_display) sort_inds = np.argsort(-np.abs(shap_values.sum(1)).sum(0)) # get plotting limits delta = 1.0 / (shap_values.shape[1]**2) slow = np.nanpercentile(shap_values, delta) shigh = np.nanpercentile(shap_values, 100 - delta) v = max(abs(slow), abs(shigh)) slow = -v shigh = v pl.figure(figsize=(1.5 * max_display + 1, 0.8 * max_display + 1)) pl.subplot(1, max_display, 1) proj_shap_values = shap_values[:, sort_inds[0], sort_inds] proj_shap_values[:, 1:] *= 2 # because off diag effects are split in half summary_plot(proj_shap_values, features[:, sort_inds] if features is not None else None, feature_names=feature_names[sort_inds], sort=False, show=False, color_bar=False, auto_size_plot=False, max_display=max_display) pl.xlim((slow, shigh)) pl.xlabel("") title_length_limit = 11 pl.title(shorten_text(feature_names[sort_inds[0]], title_length_limit)) for i in range(1, min(len(sort_inds), max_display)): ind = sort_inds[i] pl.subplot(1, max_display, i + 1) proj_shap_values = shap_values[:, ind, sort_inds] proj_shap_values *= 2 proj_shap_values[:, i] /= 2 # because only off diag effects are split in half summary_plot(proj_shap_values, features[:, sort_inds] if features is not None else None, sort=False, feature_names=["" for i in range(len(feature_names))], show=False, color_bar=False, auto_size_plot=False, max_display=max_display) pl.xlim((slow, shigh)) pl.xlabel("") if i == min(len(sort_inds), max_display) // 2: pl.xlabel(labels['INTERACTION_VALUE']) pl.title(shorten_text(feature_names[ind], title_length_limit)) pl.tight_layout(pad=0, w_pad=0, h_pad=0.0) pl.subplots_adjust(hspace=0, wspace=0.1) if export_path: pl.savefig(export_path) if show: pl.show() return if max_display is None: max_display = 20 if sort: # order features by the sum of their effect magnitudes if multi_class: feature_order = np.argsort( np.sum(np.mean(np.abs(shap_values), axis=0), axis=0)) else: feature_order = np.argsort(np.sum(np.abs(shap_values), axis=0)) feature_order = feature_order[-min(max_display, len(feature_order)):] else: feature_order = np.flip(np.arange(min(max_display, num_features)), 0) row_height = 0.4 if auto_size_plot: pl.gcf().set_size_inches(width, len(feature_order) * row_height + 1.5) pl.axvline(x=0, color="#999999", zorder=-1) if plot_type == "dot": for pos, i in enumerate(feature_order): pl.axhline(y=pos, color="#cccccc", lw=0.5, dashes=(1, 5), zorder=-1) shaps = shap_values[:, i] values = None if features is None else features[:, i] inds = np.arange(len(shaps)) np.random.shuffle(inds) if values is not None: values = values[inds] shaps = shaps[inds] colored_feature = True try: values = np.array( values, dtype=np.float64) # make sure this can be numeric except: colored_feature = False N = len(shaps) # hspacing = (np.max(shaps) - np.min(shaps)) / 200 # curr_bin = [] nbins = 100 quant = np.round(nbins * (shaps - np.min(shaps)) / (np.max(shaps) - np.min(shaps) + 1e-8)) inds = np.argsort(quant + np.random.randn(N) * 1e-6) layer = 0 last_bin = -1 ys = np.zeros(N) for ind in inds: if quant[ind] != last_bin: layer = 0 ys[ind] = np.ceil(layer / 2) * ((layer % 2) * 2 - 1) layer += 1 last_bin = quant[ind] ys *= 0.9 * (row_height / np.max(ys + 1)) if features is not None and colored_feature: # trim the color range, but prevent the color range from collapsing vmin = np.nanpercentile(values, 5) vmax = np.nanpercentile(values, 95) if vmin == vmax: vmin = np.nanpercentile(values, 1) vmax = np.nanpercentile(values, 99) if vmin == vmax: vmin = np.min(values) vmax = np.max(values) assert features.shape[0] == len( shaps ), "Feature and SHAP matrices must have the same number of rows!" # plot the nan values in the interaction feature as grey nan_mask = np.isnan(values) pl.scatter(shaps[nan_mask], pos + ys[nan_mask], color="#777777", vmin=vmin, vmax=vmax, s=16, alpha=alpha, linewidth=0, zorder=3, rasterized=len(shaps) > 500) # plot the non-nan values colored by the trimmed feature value cvals = values[np.invert(nan_mask)].astype(np.float64) cvals_imp = cvals.copy() cvals_imp[np.isnan(cvals)] = (vmin + vmax) / 2.0 cvals[cvals_imp > vmax] = vmax cvals[cvals_imp < vmin] = vmin pl.scatter(shaps[np.invert(nan_mask)], pos + ys[np.invert(nan_mask)], cmap=colors.red_blue, vmin=vmin, vmax=vmax, s=16, c=cvals, alpha=alpha, linewidth=0, zorder=3, rasterized=len(shaps) > 500) else: pl.scatter(shaps, pos + ys, s=16, alpha=alpha, linewidth=0, zorder=3, color=color if colored_feature else "#777777", rasterized=len(shaps) > 500) elif plot_type == "violin": for pos, i in enumerate(feature_order): pl.axhline(y=pos, color="#cccccc", lw=0.5, dashes=(1, 5), zorder=-1) if features is not None: global_low = np.nanpercentile( shap_values[:, :len(feature_names)].flatten(), 1) global_high = np.nanpercentile( shap_values[:, :len(feature_names)].flatten(), 99) for pos, i in enumerate(feature_order): shaps = shap_values[:, i] shap_min, shap_max = np.min(shaps), np.max(shaps) rng = shap_max - shap_min xs = np.linspace( np.min(shaps) - rng * 0.2, np.max(shaps) + rng * 0.2, 100) if np.std(shaps) < (global_high - global_low) / 100: ds = gaussian_kde(shaps + np.random.randn(len(shaps)) * (global_high - global_low) / 100)(xs) else: ds = gaussian_kde(shaps)(xs) ds /= np.max(ds) * 3 values = features[:, i] window_size = max(10, len(values) // 20) smooth_values = np.zeros(len(xs) - 1) sort_inds = np.argsort(shaps) trailing_pos = 0 leading_pos = 0 running_sum = 0 back_fill = 0 for j in range(len(xs) - 1): while leading_pos < len(shaps) and xs[j] >= shaps[ sort_inds[leading_pos]]: running_sum += values[sort_inds[leading_pos]] leading_pos += 1 if leading_pos - trailing_pos > 20: running_sum -= values[sort_inds[trailing_pos]] trailing_pos += 1 if leading_pos - trailing_pos > 0: smooth_values[j] = running_sum / (leading_pos - trailing_pos) for k in range(back_fill): smooth_values[j - k - 1] = smooth_values[j] else: back_fill += 1 vmin = np.nanpercentile(values, 5) vmax = np.nanpercentile(values, 95) if vmin == vmax: vmin = np.nanpercentile(values, 1) vmax = np.nanpercentile(values, 99) if vmin == vmax: vmin = np.min(values) vmax = np.max(values) pl.scatter(shaps, np.ones(shap_values.shape[0]) * pos, s=9, cmap=colors.red_blue, vmin=vmin, vmax=vmax, c=values, alpha=alpha, linewidth=0, zorder=1) # smooth_values -= nxp.nanpercentile(smooth_values, 5) # smooth_values /= np.nanpercentile(smooth_values, 95) smooth_values -= vmin if vmax - vmin > 0: smooth_values /= vmax - vmin for i in range(len(xs) - 1): if ds[i] > 0.05 or ds[i + 1] > 0.05: pl.fill_between( [xs[i], xs[i + 1]], [pos + ds[i], pos + ds[i + 1]], [pos - ds[i], pos - ds[i + 1]], color=colors.red_blue(smooth_values[i]), zorder=2) else: parts = pl.violinplot(shap_values[:, feature_order], range(len(feature_order)), points=200, vert=False, widths=0.7, showmeans=False, showextrema=False, showmedians=False) for pc in parts['bodies']: pc.set_facecolor(color) pc.set_edgecolor('none') pc.set_alpha(alpha) elif plot_type == "layered_violin": # courtesy of @kodonnell num_x_points = 200 bins = np.linspace( 0, features.shape[0], layered_violin_max_num_bins + 1 ).round(0).astype( 'int') # the indices of the feature data corresponding to each bin shap_min, shap_max = np.min(shap_values), np.max(shap_values) x_points = np.linspace(shap_min, shap_max, num_x_points) # loop through each feature and plot: for pos, ind in enumerate(feature_order): # decide how to handle: if #unique < layered_violin_max_num_bins then split by unique value, otherwise use bins/percentiles. # to keep simpler code, in the case of uniques, we just adjust the bins to align with the unique counts. feature = features[:, ind] unique, counts = np.unique(feature, return_counts=True) if unique.shape[0] <= layered_violin_max_num_bins: order = np.argsort(unique) thesebins = np.cumsum(counts[order]) thesebins = np.insert(thesebins, 0, 0) else: thesebins = bins nbins = thesebins.shape[0] - 1 # order the feature data so we can apply percentiling order = np.argsort(feature) # x axis is located at y0 = pos, with pos being there for offset y0 = np.ones(num_x_points) * pos # calculate kdes: ys = np.zeros((nbins, num_x_points)) for i in range(nbins): # get shap values in this bin: shaps = shap_values[order[thesebins[i]:thesebins[i + 1]], ind] # if there's only one element, then we can't if shaps.shape[0] == 1: warnings.warn( "not enough data in bin #%d for feature %s, so it'll be ignored. Try increasing the number of records to plot." % (i, feature_names[ind])) # to ignore it, just set it to the previous y-values (so the area between them will be zero). Not ys is already 0, so there's # nothing to do if i == 0 if i > 0: ys[i, :] = ys[i - 1, :] continue # save kde of them: note that we add a tiny bit of gaussian noise to avoid singular matrix errors ys[i, :] = gaussian_kde(shaps + np.random.normal( loc=0, scale=0.001, size=shaps.shape[0]))(x_points) # scale it up so that the 'size' of each y represents the size of the bin. For continuous data this will # do nothing, but when we've gone with the unqique option, this will matter - e.g. if 99% are male and 1% # female, we want the 1% to appear a lot smaller. size = thesebins[i + 1] - thesebins[i] bin_size_if_even = features.shape[0] / nbins relative_bin_size = size / bin_size_if_even ys[i, :] *= relative_bin_size # now plot 'em. We don't plot the individual strips, as this can leave whitespace between them. # instead, we plot the full kde, then remove outer strip and plot over it, etc., to ensure no # whitespace ys = np.cumsum(ys, axis=0) width = 0.8 scale = ys.max( ) * 2 / width # 2 is here as we plot both sides of x axis for i in range(nbins - 1, -1, -1): y = ys[i, :] / scale c = pl.get_cmap(color)( i / (nbins - 1) ) if color in pl.cm.datad else color # if color is a cmap, use it, otherwise use a color pl.fill_between(x_points, pos - y, pos + y, facecolor=c) pl.xlim(shap_min, shap_max) elif not multi_class and plot_type == "bar": feature_inds = feature_order[:max_display] y_pos = np.arange(len(feature_inds)) global_shap_values = np.abs(shap_values).mean(0) pl.barh(y_pos, global_shap_values[feature_inds], 0.7, align='center', color=color) pl.yticks(y_pos, fontsize=13) pl.gca().set_yticklabels([feature_names[i] for i in feature_inds]) elif multi_class and plot_type == "bar": if class_names is None: class_names = ["Class " + str(i) for i in range(len(shap_values))] feature_inds = feature_order[:max_display] y_pos = np.arange(len(feature_inds)) left_pos = np.zeros(len(feature_inds)) class_inds = np.argsort( [-np.abs(shap_values[i]).mean() for i in range(len(shap_values))]) for i, ind in enumerate(class_inds): global_shap_values = np.abs(shap_values[ind]).mean(0) pl.barh(y_pos, global_shap_values[feature_inds], 0.7, left=left_pos, align='center', color=color(i), label=class_names[ind]) left_pos += global_shap_values[feature_inds] pl.yticks(y_pos, fontsize=13) pl.gca().set_yticklabels([feature_names[i] for i in feature_inds]) pl.legend(frameon=False, fontsize=12) # draw the color bar if color_bar and features is not None and plot_type != "bar" and \ (plot_type != "layered_violin" or color in pl.cm.datad): import matplotlib.cm as cm m = cm.ScalarMappable( cmap=colors.red_blue if plot_type != "layered_violin" else pl. get_cmap(color)) m.set_array([0, 1]) cb = pl.colorbar(m, ticks=[0, 1], aspect=1000) cb.set_ticklabels( [labels['FEATURE_VALUE_LOW'], labels['FEATURE_VALUE_HIGH']]) cb.set_label(labels['FEATURE_VALUE'], size=12, labelpad=0) cb.ax.tick_params(labelsize=11, length=0) cb.set_alpha(1) cb.outline.set_visible(False) bbox = cb.ax.get_window_extent().transformed( pl.gcf().dpi_scale_trans.inverted()) cb.ax.set_aspect((bbox.height - 0.9) * 20) # cb.draw_all() pl.gca().xaxis.set_ticks_position('bottom') pl.gca().yaxis.set_ticks_position('none') pl.gca().spines['right'].set_visible(False) pl.gca().spines['top'].set_visible(False) pl.gca().spines['left'].set_visible(False) pl.gca().tick_params(color=axis_color, labelcolor=axis_color) pl.yticks(range(len(feature_order)), [feature_names[i] for i in feature_order], fontsize=13) if plot_type != "bar": pl.gca().tick_params('y', length=20, width=0.5, which='major') pl.gca().tick_params('x', labelsize=11) pl.ylim(-1, len(feature_order)) if plot_type == "bar": pl.xlabel(labels['GLOBAL_VALUE'], fontsize=13) else: pl.xlabel(labels['VALUE'], fontsize=13) if export_path: pl.savefig(export_path) if show: pl.show()
def PlotDataFromFile(insurance_id): import matplotlib.pyplot as plt insurance_label = dbm.GetAccountLabel(insurance_id) # Reads the dataframe df = pd.read_csv('%s/%s.csv' % (insurance_label, insurance_label)) # Drops all NaNs columns df = df.dropna(axis='columns', how='all') # Calculates the occurence frequencies of the term in the documents terms = [] terms_freq = [] for row in df.index: occurences = 0 for column in df.columns[1:]: occurences += df[column][row] terms.append(df['Unnamed: 0'][row]) terms_freq.append(occurences) # Pairs term and it's frequencies zipped = zip(terms, terms_freq) # Sorts decrescently terms with it's frequencies zipped_sorted = sorted(zipped, key=lambda t: t[1], reverse=True) # Data array data = np.array([_[1] for _ in zipped_sorted]) data_labels = [_[0] for _ in zipped_sorted] # Finds the quartiles and median q1, median, q3 = np.percentile(data, [25, 50, 75]) print '1st Quartile', q1 print 'Median ', median print '3rd Quartile', q3 # Draws the bars chart. Term per occurence plt.figure(1) plt.bar(np.arange(len(data_labels)), data, align='center', alpha=0.5) plt.xlabel('Terms') plt.ylabel('Occurences') plt.title('Occurrence of terms in documents of %s' % insurance_label) plt.savefig('%s/%s_ocorrenceTerms.png'\ % (insurance_label, insurance_label)) plt.close() # Desenha o boxplot # Draws the boxplot plt.figure(2) plt.title('Boxplot of %s' % insurance_label) bp = plt.boxplot(data) plt.savefig('%s/%s_boxplot.png'\ % (insurance_label, insurance_label)) plt.close() # Draws the violin plt.figure(3) plt.title('Density and occurrence of terms in docs from %s' % insurance_label) plt.xlabel('Density') plt.ylabel('Occurrences') plt.violinplot(data, showmeans=False, showmedians=True) plt.savefig('%s/%s_violinplot.png' % (insurance_label, insurance_label)) CutFile(q3, zipped, df, insurance_label) plt.close() del df
def plotpvalueofKolmogorovSmirnovTest(densities, functionName, out=False): numDensities = len(densities) numIterations = 0 for i, (setting, stats) in enumerate(densities.items()): numIterations = max(numIterations, len(stats)) data = np.zeros((numIterations, 2 * numDensities)) names = [None] * data.shape[1] i = 0 for i, setting in enumerate( ["kde_gaussian", "kde_epanechnikov", "sgde_zero", "sgde_boundaries"]): stats = densities[setting] if "sgde" in setting: if "zero" in setting: names[2 * i] = "SGDE \n set-to-zero \n shuffled" names[2 * i + 1] = "SGDE \n set-to-zero \n not shuffled" else: names[2 * i] = "SGDE \n interp. bound. \n shuffled" names[2 * i + 1] = "SGDE \n interp. bound. \n not shuffled" elif "nataf" in setting: names[2 * i] = "Nataf \n shuffled" names[2 * i + 1] = "Nataf \n not shuffled" elif "gaussian" in setting: names[2 * i] = "KDE \n Gaussian \n shuffled" names[2 * i + 1] = "KDE \n Gaussian \n not shuffled" elif "epanechnikov" in setting: names[2 * i] = "KDE \n Epan. \n shuffled" names[2 * i + 1] = "KDE \n Epan. \n not shuffled" for j, values in enumerate(stats.values()): numDims = values["config"]["numDims"] pvalues_shuffled = np.zeros(numDims) pvalues_not_shuffled = np.zeros(numDims) for idim in range(numDims): pvalues_shuffled[idim] = values["samples"]["shuffled"][ "kstests"][idim][1] pvalues_not_shuffled[idim] = values["samples"]["not_shuffled"][ "kstests"][idim][1] data[j, 2 * i] = pvalues_shuffled.mean() data[j, 2 * i + 1] = pvalues_not_shuffled.mean() pos = np.arange(0, len(names)) xlim = (np.min(pos) - 0.5, np.max(pos) + 0.5) fig = plt.figure(figsize=(17, 5)) plt.violinplot(data, pos, points=60, widths=0.7, showmeans=True, showextrema=True, showmedians=True, bw_method=0.5) plt.xticks(pos, names) plt.ylabel("$p$-value") plt.hlines(0.05, xlim[0], xlim[1], linestyle="--") plt.xlim(xlim) if "moons" in functionName: plt.title("Kolmogorov-Smirnov test", fontproperties=load_font_properties()) else: plt.title("Kolmogorov-Smirnov test", fontproperties=load_font_properties()) if out: savefig(fig, os.path.join("plots", "kolmogorov_smirnov_%s" % functionName), tikz=True) plt.close(fig) else: plt.show()
np.loadtxt(path_entity + 'susc.csv', delimiter=',')) c, r = correlation_function(simulated_matrix, J) index_ct = find_nearest(ts, critical_temperature) dimensionality = dim(c, r, index_ct) if not np.isinf(r[-1]): dimensionality_sim.append(dimensionality) dimensionality_exp.append(dimensionality_sim) fig, ax = plt.subplots(figsize=(10, 7)) colors = ['blue', 'green', 'red', 'black', 'cyan'] parts = plt.violinplot(dimensionality_exp, positions=np.array(sizes_), showmeans=True, showmedians=False) cont = 0 for pc in parts['bodies']: pc.set_facecolor(colors[cont]) cont += 1 blue_patch = mpatches.Patch(color='blue', label='Graph dimensionality = 1') green_patch = mpatches.Patch(color='green', label='Graph dimensionality = 2') #red_patch = mpatches.Patch(color='red', label='Graph dimensionality = 3') #red_patch = mpatches.Patch(color='black', label='Graph dimensionality = 4') #red_patch = mpatches.Patch(color='cyan', label='Graph dimensionality = 5') # black_patch = mpatches.Patch(color='black', label='Weighted 80%') plt.legend(handles=[blue_patch, green_patch])
plt.scatter(mean_r2, tau_diff) plt.ylabel('mean(fake taus) - real tau') plt.xlabel('mean r$^2$') plt.title('Amygdala \n 1000 iterations') plt.show() #%% Individual violin plots for unit in range(len(filtered_real)): this_unit = filtered_real.iloc[unit] this_id = this_unit['unit_id'] fake_units = filtered_fake[filtered_fake['unit'] == this_id] taus = fake_units['tau'] try: plt.violinplot(taus, showmeans=True) plt.title('%i' % this_id) plt.show() except ValueError: pass
def main(): # your favourite units here energy_unit = u.TeV angle_unit = u.deg dist_unit = u.m parser = make_argparser() parser.add_argument( '-o', '--outfile', type=str, help="if given, write output file with reconstruction results") parser.add_argument('--plot_c', action='store_true', help="plot camera-wise displays") group = parser.add_mutually_exclusive_group() group.add_argument('--proton', action='store_true', help="do protons instead of gammas") group.add_argument('--electron', action='store_true', help="do electrons instead of gammas") args = parser.parse_args() if args.infile_list: filenamelist = [] for f in args.infile_list: filenamelist += glob("{}/{}".format(args.indir, f)) elif args.proton: filenamelist = glob("{}/proton/*gz".format(args.indir)) channel = "proton" elif args.electron: filenamelist = glob("{}/electron/*gz".format(args.indir)) channel = "electron" elif args.gamma: filenamelist = glob("{}/gamma/*gz".format(args.indir)) channel = "gamma" else: raise ValueError("don't know which input to use...") filenamelist.sort() if not filenamelist: print("no files found; check indir: {}".format(args.indir)) exit(-1) else: print("found {} files".format(len(filenamelist))) tel_phi = {} tel_theta = {} # keeping track of events and where they were rejected Eventcutflow = CutFlow("EventCutFlow") Imagecutflow = CutFlow("ImageCutFlow") # takes care of image cleaning cleaner = ImageCleaner(mode=args.mode, cutflow=Imagecutflow, wavelet_options=args.raw, skip_edge_events=args.skip_edge_events, island_cleaning=True) # the class that does the shower reconstruction shower_reco = HillasReconstructor() shower_max_estimator = ShowerMaxEstimator("paranal") preper = EventPreparer( cleaner=cleaner, hillas_parameters=hillas_parameters, shower_reco=shower_reco, event_cutflow=Eventcutflow, image_cutflow=Imagecutflow, # event/image cuts: allowed_cam_ids=[], # means: all min_ntel=3, min_charge=args.min_charge, min_pixel=3) # a signal handler to abort the event loop but still do the post-processing signal_handler = SignalHandler() signal.signal(signal.SIGINT, signal_handler) try: # this class defines the reconstruction parameters to keep track of class RecoEvent(tb.IsDescription): NTels_trigg = tb.Int16Col(dflt=1, pos=0) NTels_clean = tb.Int16Col(dflt=1, pos=1) EnMC = tb.Float32Col(dflt=1, pos=2) xi = tb.Float32Col(dflt=1, pos=3) DeltaR = tb.Float32Col(dflt=1, pos=4) ErrEstPos = tb.Float32Col(dflt=1, pos=5) ErrEstDir = tb.Float32Col(dflt=1, pos=6) h_max = tb.Float32Col(dflt=1, pos=7) reco_outfile = tb.open_file( args.outfile, mode="w", # if we don't want to write the event list to disk, need to add more arguments **({} if args.store else { "driver": "H5FD_CORE", "driver_core_backing_store": False })) reco_table = reco_outfile.create_table("/", "reco_event", RecoEvent) reco_event = reco_table.row except: reco_event = RecoEvent() print("no pytables installed?") # ## ####### ####### ######## # ## ## ## ## ## ## ## # ## ## ## ## ## ## ## # ## ## ## ## ## ######## # ## ## ## ## ## ## # ## ## ## ## ## ## # ######## ####### ####### ## cam_id_map = {} # define here which telescopes to loop over allowed_tels = None # allowed_tels = prod3b_tel_ids("L+F+D") for i, filename in enumerate(filenamelist[:args.last]): print("file: {i} filename = {filename}".format(i=i, filename=filename)) source = hessio_event_source(filename, allowed_tels=allowed_tels, max_events=args.max_events) # loop that cleans and parametrises the images and performs the reconstruction for (event, hillas_dict, n_tels, tot_signal, max_signal, pos_fit, dir_fit, h_max, err_est_pos, err_est_dir) in preper.prepare_event(source): shower = event.mc org_alt = u.Quantity(shower.alt).to(u.deg) org_az = u.Quantity(shower.az).to(u.deg) if org_az > 180 * u.deg: org_az -= 360 * u.deg org_the = alt_to_theta(org_alt) org_phi = az_to_phi(org_az) if org_phi > 180 * u.deg: org_phi -= 360 * u.deg if org_phi < -180 * u.deg: org_phi += 360 * u.deg shower_org = linalg.set_phi_theta(org_phi, org_the) shower_core = convert_astropy_array([shower.core_x, shower.core_y]) xi = linalg.angle(dir_fit, shower_org).to(angle_unit) diff = linalg.length(pos_fit[:2] - shower_core) # print some performance print() print("xi = {:4.3f}".format(xi)) print("pos = {:4.3f}".format(diff)) print("h_max reco: {:4.3f}".format(h_max.to(u.km))) print("err_est_dir: {:4.3f}".format(err_est_dir.to(angle_unit))) print("err_est_pos: {:4.3f}".format(err_est_pos)) try: # store the reconstruction data in the PyTable reco_event["NTels_trigg"] = n_tels["tot"] reco_event["NTels_clean"] = len(shower_reco.circles) reco_event["EnMC"] = event.mc.energy / energy_unit reco_event["xi"] = xi / angle_unit reco_event["DeltaR"] = diff / dist_unit reco_event["ErrEstPos"] = err_est_pos / dist_unit reco_event["ErrEstDir"] = err_est_dir / angle_unit reco_event["h_max"] = h_max / dist_unit reco_event.append() reco_table.flush() print() print("xi res (68-percentile) = {:4.3f} {}".format( np.percentile(reco_table.cols.xi, 68), angle_unit)) print("core res (68-percentile) = {:4.3f} {}".format( np.percentile(reco_table.cols.DeltaR, 68), dist_unit)) print("h_max (median) = {:4.3f} {}".format( np.percentile(reco_table.cols.h_max, 50), dist_unit)) except NoPyTables: pass if args.plot_c: from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = fig.gca(projection='3d') for c in shower_reco.circles.values(): points = [ c.pos + t * c.a * u.km for t in np.linspace(0, 15, 3) ] ax.plot(*np.array(points).T, linewidth=np.sqrt(c.weight) / 10) ax.scatter(*c.pos[:, None].value, s=np.sqrt(c.weight)) plt.xlabel("x") plt.ylabel("y") plt.pause(.1) # this plots # • the MC shower core # • the reconstructed shower core # • the used telescopes # • and the trace of the Hillas plane on the ground plt.figure() for tel_id, c in shower_reco.circles.items(): plt.scatter(c.pos[0], c.pos[1], s=np.sqrt(c.weight)) plt.gca().annotate(tel_id, (c.pos[0].value, c.pos[1].value)) plt.plot([ c.pos[0].value - 500 * c.norm[1], c.pos[0].value + 500 * c.norm[1] ], [ c.pos[1].value + 500 * c.norm[0], c.pos[1].value - 500 * c.norm[0] ], linewidth=np.sqrt(c.weight) / 10) plt.scatter(*pos_fit[:2], c="black", marker="*", label="fitted") plt.scatter(*shower_core[:2], c="black", marker="P", label="MC") plt.legend() plt.xlabel("x") plt.ylabel("y") plt.xlim(-1400, 1400) plt.ylim(-1400, 1400) plt.show() if signal_handler.stop: break if signal_handler.stop: break print("\n" + "=" * 35 + "\n") print("xi res (68-percentile) = {:4.3f} {}".format( np.percentile(reco_table.cols.xi, 68), angle_unit)) print("core res (68-percentile) = {:4.3f} {}".format( np.percentile(reco_table.cols.DeltaR, 68), dist_unit)) print("h_max (median) = {:4.3f} {}".format( np.percentile(reco_table.cols.h_max, 50), dist_unit)) # print the cutflows for telescopes and camera images print("\n\n") Eventcutflow("min2Tels trig") print() Imagecutflow(sort_column=1) # if we don't want to plot anything, we can exit now if not args.plot: return # ######## ## ####### ######## ###### # ## ## ## ## ## ## ## ## # ## ## ## ## ## ## ## # ######## ## ## ## ## ###### # ## ## ## ## ## ## # ## ## ## ## ## ## ## # ## ######## ####### ## ###### plt.figure() plt.hist(reco_table.cols.h_max, bins=np.linspace(000, 15000, 51, True)) plt.title(channel) plt.xlabel("h_max reco") plt.pause(.1) figure = plt.figure() xi_edges = np.linspace(0, 5, 20) plt.hist(reco_table.cols.xi, bins=xi_edges, log=True) plt.xlabel(r"$\xi$ / deg") if args.write: save_fig('{}/reco_xi_{}'.format(args.plots_dir, args.mode)) plt.pause(.1) plt.figure() plt.hist(reco_table.cols.ErrEstDir[:], bins=np.linspace(0, 20, 50)) plt.title(channel) plt.xlabel("beta") plt.pause(.1) plt.figure() plt.hist(np.log10(reco_table.cols.xi[:] / reco_table.cols.ErrEstDir[:]), bins=50) plt.title(channel) plt.xlabel("log_10(xi / beta)") plt.pause(.1) # convert the xi-list into a dict with the number of used telescopes as keys xi_vs_tel = {} for xi, ntel in zip(reco_table.cols.xi, reco_table.cols.NTels_clean): if ntel not in xi_vs_tel: xi_vs_tel[ntel] = [xi] else: xi_vs_tel[ntel].append(xi) print(args.mode) for ntel, xis in sorted(xi_vs_tel.items()): print("NTel: {} -- median xi: {}".format(ntel, np.median(xis))) # print("histogram:", np.histogram(xis, bins=xi_edges)) # create a list of energy bin-edges and -centres for violin plots Energy_edges = np.linspace(2, 8, 13) Energy_centres = (Energy_edges[1:] + Energy_edges[:-1]) / 2. # convert the xi-list in to an energy-binned dict with the bin centre as keys xi_vs_energy = {} for en, xi in zip(reco_table.cols.EnMC, reco_table.cols.xi): # get the bin number this event belongs into sbin = np.digitize(np.log10(en), Energy_edges) - 1 # the central value of the bin is the key for the dictionary if Energy_centres[sbin] not in xi_vs_energy: xi_vs_energy[Energy_centres[sbin]] = [xi] else: xi_vs_energy[Energy_centres[sbin]] += [xi] # plotting the angular error as violin plots with binning in # number of telescopes and shower energy figure = plt.figure() plt.subplot(211) plt.violinplot([np.log10(a) for a in xi_vs_tel.values()], [a for a in xi_vs_tel.keys()], points=60, widths=.75, showextrema=False, showmedians=True) plt.xlabel("Number of Telescopes") plt.ylabel(r"log($\xi$ / deg)") plt.ylim(-3, 2) plt.grid() plt.subplot(212) plt.violinplot([np.log10(a) for a in xi_vs_energy.values()], [a for a in xi_vs_energy.keys()], points=60, widths=(Energy_edges[1] - Energy_edges[0]) / 1.5, showextrema=False, showmedians=True) plt.xlabel(r"log(Energy / GeV)") plt.ylabel(r"log($\xi$ / deg)") plt.ylim(-3, 2) plt.grid() plt.tight_layout() if args.write: save_fig('{}/reco_xi_vs_E_NTel_{}'.format(args.plots_dir, args.mode)) plt.pause(.1) # convert the diffs-list into a dict with the number of used telescopes as keys diff_vs_tel = {} for diff, ntel in zip(reco_table.cols.DeltaR, reco_table.cols.NTels_clean): if ntel not in diff_vs_tel: diff_vs_tel[ntel] = [diff] else: diff_vs_tel[ntel].append(diff) # convert the diffs-list in to an energy-binned dict with the bin centre as keys diff_vs_energy = {} for en, diff in zip(reco_table.cols.EnMC, reco_table.cols.DeltaR): # get the bin number this event belongs into sbin = np.digitize(np.log10(en), Energy_edges) - 1 # the central value of the bin is the key for the dictionary if Energy_centres[sbin] not in diff_vs_energy: diff_vs_energy[Energy_centres[sbin]] = [diff] else: diff_vs_energy[Energy_centres[sbin]] += [diff] # plotting the core position error as violin plots with binning in # number of telescopes an shower energy plt.figure() plt.subplot(211) plt.violinplot([np.log10(a) for a in diff_vs_tel.values()], [a for a in diff_vs_tel.keys()], points=60, widths=.75, showextrema=False, showmedians=True) plt.xlabel("Number of Telescopes") plt.ylabel(r"log($\Delta R$ / m)") plt.grid() plt.subplot(212) plt.violinplot([np.log10(a) for a in diff_vs_energy.values()], [a for a in diff_vs_energy.keys()], points=60, widths=(Energy_edges[1] - Energy_edges[0]) / 1.5, showextrema=False, showmedians=True) plt.xlabel(r"log(Energy / GeV)") plt.ylabel(r"log($\Delta R$ / m)") plt.grid() plt.tight_layout() if args.write: save_fig('{}/reco_dist_vs_E_NTel_{}'.format(args.plots_dir, args.mode)) plt.show()
def plot_hex_and_violin(abscissa, ordinate, bin_edges, extent=None, xlabel="", ylabel="", zlabel="", do_hex=True, do_violin=True, cm=plt.cm.inferno, axis=None, v_padding=.015, **kwargs): """ takes two arrays of coordinates and creates a 2D hexbin plot and a violin plot (or just one of them) Parameters ---------- abscissa, ordinate : arrays the coordinates of the data to plot bin_edges : array bin edges along the abscissa extent : 4-tuple of floats (default: None) extension of the abscissa, ordinate; given as is to plt.hexbin xlabel, ylabel : strings (defaults: "") labels for the two axes of either plot zlabel : string (default: "") label for the colorbar of the hexbin plot do_hex, do_violin : bools (defaults: True) whether or not to do the respective plots cm : colour map (default: plt.cm.inferno) colour map to be used for the hexbin plot kwargs : args dictionary more arguments to be passed to plt.hexbin """ if axis: if do_hex and do_violin: from matplotlib.axes import Axes from matplotlib.transforms import Bbox axis_bbox = axis.get_position() axis.axis("off") else: plt.sca(axis) # make a normal 2D hexplot from the given data if do_hex: # if we do both plot types, if do_violin: if axis: ax_hex_pos = axis_bbox.get_points().copy() # [[x0, y0], [x1, y1]] ax_hex_pos[0, 1] += np.diff(ax_hex_pos, axis=0)[0, 1]*(.5+v_padding) ax_hex = Axes(plt.gcf(), Bbox.from_extents(ax_hex_pos)) plt.gcf().add_axes(ax_hex) plt.sca(ax_hex) ax_hex.set_xticklabels([]) else: plt.subplot(211) plt.hexbin(abscissa, ordinate, gridsize=40, extent=extent, cmap=cm, **kwargs) cb = plt.colorbar() cb.set_label(zlabel) plt.xlabel(xlabel) plt.ylabel(ylabel) if extent: plt.xlim(extent[:2]) plt.ylim(extent[2:]) # prepare and draw the data for the violin plot if do_violin: # if we do both plot types, open a subplot if do_hex: if axis: ax_vio_pos = axis_bbox.get_points().copy() # [[x0, y0], [x1, y1]] ax_vio_pos[1, 1] -= np.diff(ax_vio_pos, axis=0)[0, 1]*(.5+v_padding) ax_vio = Axes(plt.gcf(), Bbox.from_extents(ax_vio_pos)) plt.gcf().add_axes(ax_vio) plt.sca(ax_vio) else: plt.subplot(212) # to plot the violins, sort the ordinate values into a dictionary # the keys are the central values of the bins given by `bin_edges` val_vs_dep = {} bin_centres = (bin_edges[1:]+bin_edges[:-1])/2. for dep, val in zip(abscissa, ordinate): # get the bin number this event belongs into # outliers are put into the first and last bin accordingly ibin = np.clip(np.digitize(dep, bin_edges)-1, 0, len(bin_centres)-1) # the central value of the bin is the key for the dictionary if bin_centres[ibin] not in val_vs_dep: val_vs_dep[bin_centres[ibin]] = [val] else: val_vs_dep[bin_centres[ibin]] += [val] keys = [k[0] for k in sorted(val_vs_dep.items())] vals = [k[1] for k in sorted(val_vs_dep.items())] # calculate the widths of the violins as 90 % of the corresponding bin width widths = [] for cen, wid in zip(bin_centres, (bin_edges[1:]-bin_edges[:-1])): if cen in keys: widths.append(wid*.9) plt.violinplot(vals, keys, points=60, widths=widths, showextrema=False, showmedians=True) plt.xlabel(xlabel) plt.ylabel(ylabel) if extent: # adding a colour bar to the hexbin plot reduces its width by 1/5 # adjusting the extent of the violin plot to sync up with the hexbin plot plt.xlim([extent[0], (5.*extent[1] - extent[0])/4. if do_hex else extent[1]]) # for good measure also sync the vertical extent plt.ylim(extent[2:]) plt.grid()
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) all_variants = ap.get_variants() variants = -np.ones(N_VARIANTS) for v, variant in enumerate(all_variants): disable_constraints, additional_disabled = get_disabled_constraints( variant) if additional_disabled is None: variants[0] = variant elif len(additional_disabled) == 0: variants[1] = variant elif ADDITIONAL_DISABLED_CONSTRAINTS == set(additional_disabled): variants[2] = variant if np.any(variants < 0): print('Not enough variants to analyze') return with open( os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) all_yields = [] for variant in variants: yields = [] for sim_dir in ap.get_cells(variant=[variant]): sim_out_dir = os.path.join(sim_dir, 'simOut') # Listeners used fba_reader = TableReader( os.path.join(sim_out_dir, 'FBAResults')) main_reader = TableReader(os.path.join(sim_out_dir, 'Main')) mass_reader = TableReader(os.path.join(sim_out_dir, 'Mass')) # Load data time_step_sec = main_reader.readColumn('timeStepSec') external_fluxes = fba_reader.readColumn( 'externalExchangeFluxes') external_molecules = fba_reader.readAttribute( 'externalMoleculeIDs') dry_mass = MASS_UNITS * mass_reader.readColumn('dryMass') growth = GROWTH_UNITS * mass_reader.readColumn( 'growth') / time_step_sec # Calculate growth yield on glucose glc_idx = external_molecules.index(GLUCOSE_ID) glc_flux = FLUX_UNITS * external_fluxes[:, glc_idx] glc_mw = sim_data.getter.getMass([GLUCOSE_ID])[0] glc_mass_flux = glc_flux * glc_mw * dry_mass glc_mass_yield = growth / -glc_mass_flux yields += list(glc_mass_yield[1:].asNumber()) all_yields += [yields] for i, v1 in enumerate(variants): for j, v2 in enumerate(variants[i + 1:]): t, p = stats.ttest_ind(all_yields[i], all_yields[i + j + 1], equal_var=False) print('p={:.2e} for variant {} vs variant {}'.format( p, v1, v2)) plt.figure(figsize=(4, 4)) xticks = range(N_VARIANTS) # Plot data plt.violinplot(all_yields, xticks, showmeans=False, showextrema=False) plt.axhline(VALIDATION_YIELD, linestyle='--', color='#eb7037') # Format axes ax = plt.gca() ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.xticks(xticks, VARIANT_LABELS) plt.ylabel('Glucose Yield\n(g cell / g glucose)') plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_analyse(feature1, feature2, feature3): """ 1) Analyze GaussianNB, SVC and KNN without adjusting their parameters - on all the features of the dataset - on 2 chosen features of the dataset 2) Plot a comparison boxplot of the cross_val_scores of the results grouped by algorithm 3) Analyze GaussianNB, SVC and KNN with optimization - on all the features of the dataset - on 2 chosen features of the dataset 4) Plot visualization of the predicted areas in 2-D space 5) Plot a comparison boxplot of the cross_val_scores of the results grouped by the algorithm """ folder = "results_{0}".format(dataset_name) if not os.path.exists(folder): os.makedirs(folder) # Performing all the models without tuning on both 30 and 2 features and plotting box plots # prepare configuration for cross validation test harness seed = 7 # prepare models models = [] models.append(('NB', GaussianNB())) models.append(('SVM', SVC(gamma='auto'))) models.append(('KNN', KNeighborsClassifier())) # evaluate each model in turn results1 = [] results2 = [] names = [] scoring = 'accuracy' #for 30 features: X = dataset.data y = dataset.target X = normalize(X, axis=0) #for 2 features: X2, y2, features = set_data_analyse(feature1, feature2, feature3) def set_box_color(bp, color): plt.setp(bp['boxes'], color=color) plt.setp(bp['whiskers'], color=color) plt.setp(bp['caps'], color=color) plt.setp(bp['medians'], color=color) for name, model in models: kfold = model_selection.KFold(n_splits=5, random_state=seed) cv_results1 = model_selection.cross_val_score(model, X, y, cv=kfold, scoring=scoring) results1.append(cv_results1) cv_results2 = model_selection.cross_val_score(model, X2, y2, cv=kfold, scoring=scoring) results2.append(cv_results2) names.append(name) # Comparison box plot of NOT tuned algorithms fig = plt.figure(figsize=(7, 6)) bp1 = plt.violinplot(results1, positions=np.array(range(len(results1)))*2.0-0.4, showmeans=True, widths=0.6) bp2 = plt.violinplot(results2, positions=np.array(range(len(results2)))*2.0+0.4, showmeans=True, widths=0.6) #set_box_color(bp1, '#D7191C') #set_box_color(bp2, '#2C7BB6') plt.xticks(range(0, len(names) * 2, 2), names) plt.xlim(-2, len(names)*2) plt.ylim(0.3, 1) plt.tight_layout() plt.plot([], c='#2C7BB6', label='30 features') plt.plot([], c='#D7191C', label='2 features') plt.legend() plt.title('Comparison of untuned algorithms on 30 an 2 features') #plt.show() plt.savefig(("./{0}/Comparison_NOT_optimized.png".format(folder)), bbox_inches='tight') plt.close('all') results1 = [] results2 = [] names = [] # Performing GaussianNB on all the features print('/////////////////////////////////////////////') print('Performing GaussianNB on all the features\n') clf = GaussianNB() X = dataset.data y = dataset.target X = normalize(X, axis=0) kfold = model_selection.KFold(n_splits=10, random_state=seed) cvs = model_selection.cross_val_score(clf, X, y, cv=kfold, scoring=scoring) results1.append(cvs) names.append('NB') X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=0) clf.fit(X_train,y_train) y_pred = clf.predict(X_test) print('GaussianNB score: ', metrics.f1_score(y_test,y_pred,average="macro")) print('cross_val_score mean: ', np.mean(cvs)) print(metrics.confusion_matrix(y_test, y_pred)) # Performing Gaussian on two chosen features print('/////////////////////////////////////////////') print('Performing Gaussian on features:\n', feature1, '\n', feature2, '\n', feature3) X, y, features = set_data_analyse(feature1, feature2, feature3) #print('Performing Gaussian on', num_PCA, ' features from PCA\n') #X, y = set_data_analyse_PCA(num_PCA) classifier_name = 'GaussianNB' clf = GaussianNB() kfold = model_selection.KFold(n_splits=10, random_state=seed) cvs = cross_val_score(clf, X, y, cv=kfold, scoring=scoring) results2.append(cvs) X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=0) clf.fit(X_train, y_train) if not feature3: plot_results_2D(X_test, y_test, features, classifier_name, clf, np.mean(cvs)) y_pred = clf.predict(X_test) print('GaussianNB on 2 features score: ', metrics.f1_score(y_test,y_pred,average="macro")) print('cross_val_score mean: ', np.mean(cvs)) print(metrics.confusion_matrix(y_test, y_pred)) # Performing SVC on all the features print('/////////////////////////////////////////////') print('Performing SVC on all the features\n') clf = SVC(C=100, kernel='rbf', gamma='scale') X = dataset.data y = dataset.target X = normalize(X, axis=0) kfold = model_selection.KFold(n_splits=10, random_state=seed) cvs = cross_val_score(clf, X, y, cv=kfold, scoring=scoring) results1.append(cvs) names.append('SVC') X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=0) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) #kfold = model_selection.KFold(n_splits=5, random_state=seed) print('SVC score: ', metrics.f1_score(y_test,y_pred,average="macro")) print('cross_val_score mean: ', np.mean(cvs)) print(metrics.confusion_matrix(y_test, y_pred)) # Performing SVC on PCA two chosen features print('/////////////////////////////////////////////') print('Performing SVC on features:\n', feature1, '\n', feature2, '\n', feature3) X, y, features = set_data_analyse(feature1, feature2, feature3) #print('Performing SVC on', num_PCA, ' features from PCA\n') #X, y = set_data_analyse_PCA(num_PCA) classifier_name = 'SVC' clf = SVC(C=100, kernel='rbf', gamma='scale', random_state=None) kfold = model_selection.KFold(n_splits=10, random_state=seed) cvs = model_selection.cross_val_score(clf, X, y, cv=kfold, scoring=scoring) results2.append(cvs) X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=0) #finding best parameters for SVC '''from sklearn.model_selection import GridSearchCV print("Fitting the classifier to the training set") param_grid = {'C': [0.01, 0.1, 1, 10, 100], 'kernel': ['rbf', 'linear']} clf = GridSearchCV(SVC(class_weight='balanced'), param_grid) clf = clf.fit(X_train, y_train) print("Best estimator found by grid search:") print(clf.best_estimator_)''' clf.fit(X_train, y_train) if not feature3: plot_results_2D(X_test, y_test, features, classifier_name, clf, np.mean(cvs)) y_pred = clf.predict(X_test) print('SVC on 2 features score: ', metrics.f1_score(y_test,y_pred,average="macro")) print('cross_val_score mean: ', np.mean(cvs)) print(metrics.confusion_matrix(y_test, y_pred)) # Performing KNeighborsClassifier on all the features print('/////////////////////////////////////////////') print('Performing KNeighborsClassifier on all the features\n') clf = KNeighborsClassifier(n_neighbors=1, weights='uniform') X = dataset.data y = dataset.target X = normalize(X, axis=0) kfold = model_selection.KFold(n_splits=10, random_state=seed) cvs = model_selection.cross_val_score(clf, X, y, cv=kfold, scoring=scoring) print('mean cvs: ', np.mean(cvs)) results1.append(cvs) names.append('KNN') X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=0) clf.fit(X_train,y_train) y_pred = clf.predict(X_test) '''for n in range(1,11): clf = KNeighborsClassifier(n_neighbors=n).fit(X_train,y_train) y_pred = clf.predict(X_test) print('KNeighborsClassifier with {0} neighbors score: '.format(n), metrics.f1_score(y_test,y_pred,average="macro"))''' print('KNeighborsClassifier score: ', metrics.f1_score(y_test,y_pred,average="macro")) print('cross_val_score mean: ', np.mean(cvs)) print(metrics.confusion_matrix(y_test, y_pred)) # Performing KNeighborsClassifier for the two chosen columns print('/////////////////////////////////////////////') print('Performing KNN on features:\n', feature1, '\n', feature2, '\n', feature3) X, y, features = set_data_analyse(feature1, feature2, feature3) #print('Performing KNN on', num_PCA, ' features from PCA\n') #X, y = set_data_analyse_PCA(num_PCA) classifier_name = 'KN' clf = KNeighborsClassifier(n_neighbors=5, weights='uniform') kfold = model_selection.KFold(n_splits=10, random_state=seed) cvs = model_selection.cross_val_score(clf, X, y, cv=kfold, scoring=scoring) results2.append(cvs) X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=0) clf.fit(X_train,y_train) if not feature3: plot_results_2D(X_test, y_test, features, classifier_name, clf, np.mean(cvs)) y_pred = clf.predict(X_test) '''for n in range(1,11): clf = KNeighborsClassifier(n_neighbors=n, weights='uniform').fit(X_train,y_train) y_pred = clf.predict(X_test) print('KNeighborsClassifier score: ', 'k = ', n, ': ', metrics.f1_score(y_test,y_pred,average="macro")) print(metrics.confusion_matrix(y_test, y_pred)) print('KNeighborsClassifier with {0} neighbors score: '.format(n), metrics.f1_score(y_test,y_pred,average="macro"))''' print('KNeighborsClassifier score: ', metrics.f1_score(y_test,y_pred,average="macro")) print('cross_val_score mean: ', np.mean(cvs)) print(metrics.confusion_matrix(y_test, y_pred)) #print(metrics.classification_report(y_test, y_pred)) # Comparison box plot of tuned algorithms fig = plt.figure(figsize=(7, 6)) bp1 = plt.violinplot(results1, positions=np.array(range(len(results1)))*2.0-0.4, showmeans=True, widths=0.6) bp2 = plt.violinplot(results2, positions=np.array(range(len(results2)))*2.0+0.4, showmeans=True, widths=0.6) #set_box_color(bp1, '#D7191C') #set_box_color(bp2, '#2C7BB6') #m = max([max(results1[i]) for i in range(len(results1))]) #plt.hlines(m, xmin=-2, xmax=len(names)*2, colors='k', linestyles='solid', label='best score') plt.xticks(range(0, len(names) * 2, 2), names) plt.xlim(-2, len(names)*2)
grid.get_obs(), target_policy, behaviour_policy, double=False, step_size=0.1) dq_agent = GeneralQ(grid._layout.size, 4, grid.get_obs(), target_policy, behaviour_policy, double=True, step_size=0.1) mean_reward_q_learning.append(run_experiment(grid, q_agent, int(2e5))) mean_reward_double_q_learning.append( run_experiment(grid, dq_agent, int(2e5))) plt.violinplot([mean_reward_q_learning, mean_reward_double_q_learning]) plt.xticks([1, 2], ["Q-learning", "Double Q-learning"], rotation=60, size=12) plt.ylabel("average reward during learning", size=12) ax = plt.gca() ax.set_axis_bgcolor('white') ax.grid(0) q = q_agent.q_values.reshape(grid._layout.shape + (4, )) plot_action_values(q, vmin=-5) q = dq_agent.q_values.reshape(grid._layout.shape + (4, )) plot_action_values(q, vmin=-5) """The plots above show 1) the distributions of average rewards (over all learning steps) over the 20 experiments per algorithm, 2) the action values for Q-learning, and 3) the action values for Double Q-learning. **[10 pts]** Explain why Double Q-learning has a higher average reward. Use at most four sentences, and discuss at least a) the dynamics of the algorithm, b) how this affects behaviour, and c) why the resulting behaviour yields higher rewards for Double Q-learning than for Q-learning.
n_lines = len(self.lines) self.param_names = self.lines[0].strip().split(' ') self.params = [] for idx in range(1,n_lines): self.params.append([float(num) for num in self.lines[idx].split()]) param_reader = ParameterFileReader(fname_param_in=fname_out) #------------------------------------------------------------------------------ #%% make density plots #norm_pareto_set = pareto_set #norm_pareto_set[:,0] = norm_pareto_set[:,0]/0.02 #norm_pareto_set[:,1] = norm_pareto_set[:,1]/27.2 #norm_pareto_set[:,2] = norm_pareto_set[:,2]/39.5 #norm_pareto_set[:,3] = norm_pareto_set[:,3]/7.2 #norm_pareto_set[:,4] = norm_pareto_set[:,4] #norm_pareto_set[:,5] = norm_pareto_set[:,5] # plot violin plot plt.violinplot(sim_results.pareto_set[:,0], showmeans=False, showmedians=True) plt.violinplot(sim_results.pareto_set[:,1], showmeans=False, showmedians=True) plt.violinplot(sim_results.pareto_set[:,2], showmeans=False, showmedians=True) plt.violinplot(sim_results.pareto_set[:,3], showmeans=False, showmedians=True) plt.violinplot(sim_results.pareto_set[:,4], showmeans=False, showmedians=True) plt.violinplot(sim_results.pareto_set[:,5], showmeans=False, showmedians=True) min_error = np.zeros((1,6)) min_error[:,1] = 1 min_error[:,2] = 1 min_error[:,3] = 1
with open("max_lifetime_" + suffix + ".dat") as fp: data2 = json.load(fp) lifetime = data1["lifetime"] lifetime2 = data2["lifetime"] consumption1 = data1["consumption"] consumption2 = data2["consumption"] print("Minimum lifetime: ", min(lifetime[:-1])) print("Minimum lifetime: ", min(lifetime2[:-1])) print("Global consumption: ", 1000 * sum(consumption1)) print("Global consumption: ", 1000 * sum(consumption2)) plt.violinplot([lifetime[:-1], lifetime2[:-1]], [1, 3], widths=0.7, showmeans=True, showextrema=True, showmedians=True) plt.show() xs, ys = data1["xs"], data1["ys"] plt.scatter(xs[:-1], ys[:-1], s=20) plt.scatter(xs[-1], ys[-1], s=50, marker="s") plt.scatter(xs[11], ys[11], s=100, marker="p") plt.scatter(xs[28], ys[28], s=100, marker="h") plt.scatter(xs[39], ys[39], s=100, marker="*") plt.scatter(xs[50], ys[50], s=100, marker="^") plt.scatter(xs[61], ys[61], s=100, marker="<") plt.scatter(xs[95], ys[95], s=100, marker=">")
def plot_correction_effect(dataframes: dict): """Makes violin plots to compare corrected and uncorrected ET""" plt.violinplot([[dataframes[df].E.mean() * 365 for df in dataframes], [dataframes[df].E_cor.mean() * 365 for df in dataframes]])
cdict = dict(zip(cells, colors)) yclabels = [''.join([l for l in yc if l.isupper()]) for yc in yclass] plt.figure(figsize=(len(yclass), 4)) patches = [] for cell in cells: C = [] for yc in yclass: fname = 'train/mSelect'+yc+cell+'.pkl' metrics = pickle.load(open(fname, 'rb')) this_C = metrics['C'] if np.array_equal(this_C, np.array([this_C[0]]*len(this_C))): this_C[0] -= this_C[0]*0.01 C.append(this_C) violin = plt.violinplot(C, widths=0.7, showmeans=True, showextrema=True) color = cdict[cell] patches.append(mpatches.Patch(color=color, alpha=0.5, label=cell)) plt.setp(violin['bodies'], facecolor=color, edgecolor=color) for key in ['cbars', 'cmins', 'cmeans', 'cmaxes']: plt.setp(violin[key], color=color, linewidth='1', alpha=0.5) plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., prop={'size':8}) plt.xticks(np.arange(len(yclass))+1, yclabels, fontsize=10, color='k') plt.yscale('log') plt.yticks(fontsize=10, color='k') plt.xlabel('Label Class', fontsize=12, color='k') plt.ylabel('Cost', fontsize=12, color='k') plt.savefig('figures/CSelect'+ttype+'.svg', bbox_inches='tight')