def plot_results(self): """ A simple script to plot the balance of the portfolio, or "equity curve", as a function of time. It requires OUTPUT_RESULTS_DIR to be set in the project settings. """ sns.set_palette("deep", desat=0.6) sns.set_context(rc={"figure.figsize": (8, 4)}) equity_file = os.path.join(settings.OUTPUT_DIR, "output.csv") equity = pd.io.parsers.read_csv(equity_file, parse_dates=True, header=0, index_col=0) # Plot three charts: Equity curve, period returns, drawdowns fig = plt.figure() fig.patch.set_facecolor("white") # Set the outer colour to white # Plot the equity curve ax1 = fig.add_subplot(311, ylabel="Portfolio value") equity["Equity"].plot(ax=ax1, color=sns.color_palette()[0]) # Plot the returns ax2 = fig.add_subplot(312, ylabel="Period returns") equity["Returns"].plot(ax=ax2, color=sns.color_palette()[1]) # Plot the returns ax3 = fig.add_subplot(313, ylabel="Drawdowns") equity["Drawdown"].plot(ax=ax3, color=sns.color_palette()[2]) # Plot the figure plt.show()
def _pred_corr(self, human, value='accuracy', method='corr'): nname = models.NICE_NAMES[self.model_name].lower() acc = self.acc_single() acc[value] = acc[value].astype(np.float) # import pdb; pdb.set_trace() # sel = acc.kind!='unknown' sel = acc.sel.copy() acc = acc[sel][value] sns.set_palette(sns.color_palette('Set2')[1:]) df = [] human = human[sel.values] if method == 'corr': f = lambda machine, human: (1 + stats.corr(machine, human)) / 2. elif method == 'diff': f = lambda machine, human: 1 - np.mean(np.abs(machine-human)) elif method == 'euclidean': f = lambda machine, human: 1 - scipy.spatial.distance.sqeuclidean(machine, human) / len(machine) else: raise Exception('Method {} not recognized'.format(method)) corr = f(acc, human) if self.bootstrap: print('bootstrapping stats...') bf = stats.bootstrap_resample(acc, human, func=f, ci=None, seed=0) c = np.vstack([np.repeat(corr, len(bf)), np.arange(len(bf)), bf]) df.extend(c.T.tolist()) else: df.append([corr, 0, np.nan]) df = pandas.DataFrame(df, columns=['consistency', 'iter', 'bootstrap']) # self.save(df, pref='pred_corr') return df
def compare_month_daily(theme): df_month = pd.read_csv(master_dir + 'energy_info_monthly.csv') df_month.rename(columns={'Electricity (kBtu)': 'Electric (kBtu)'}, inplace=True) df_month = df_month[['Building Number', 'year', 'month', theme + ' (kBtu)']] df_e = read_interval(theme) df_e.rename(columns={theme + ' (kBtu)': theme + ' (kBtu)_daily_aggregated'}, inplace=True) df_e_all = pd.merge(df_month, df_e, on=['Building Number', 'year', 'month'], how='inner') df_e_all.reset_index(inplace=True) df_e_all['Date'] = df_e_all.apply(lambda r: datetime.strptime('{0}-{1}-{2}'.format(int(r['year']), int(r['month']), 1), "%Y-%m-%d"), axis=1) df_e_all.sort(columns=['Building Number', 'year', 'month'], inplace=True) df_e_all.to_csv(master_dir + 'EUAS_interval_{0}.csv'.format(theme), index=False) gr = df_e_all.groupby('Building Number') sns.set_style("whitegrid") sns.set_palette("Set2", 8) sns.set_context("talk", font_scale=1) for name, group in list(gr): print name group.reset_index(inplace=True) group.set_index(pd.to_datetime(group['Date']), inplace=True) total = group.sum() ratio = round(total[theme + ' (kBtu)']/total[theme + '(kBtu)_daily_aggregated'], 3) line1, = plt.plot(group.index, group[theme + ' (kBtu)']/1e3, marker="o") line2, = plt.plot(group.index, group[theme + ' (kBtu)_daily_aggregated']/1e3, marker="o") plt.title('Building: {0}, (total EUAS/total Skysparke) = {1}'.format(name, ratio)) plt.legend([line1, line2], ['monthly', 'daily aggregated'], loc='center left', bbox_to_anchor=(1, 0.5), prop={'size':13}) plt.ylabel('Monthly Total {0} (Million Btu)'.format(theme)) plt.gca().set_ylim(bottom=0) path = os.getcwd() + '/input/FY/interval/plot/{0}_{1}.png'.format(name, theme) P.savefig(path, dpi = my_dpi, figsize = (2000/my_dpi, 500/my_dpi), bbox_inches='tight') plt.close() print 'end'
def UseSeaborn(palette='deep'): """Call to use seaborn plotting package """ import seaborn as sns #No Background fill, legend font scale, frame on legend sns.set(style='whitegrid', font_scale=1.5, rc={'legend.frameon': True}) #Mark ticks with border on all four sides (overrides 'whitegrid') sns.set_style('ticks') #ticks point in sns.set_style({"xtick.direction": "in","ytick.direction": "in"}) # sns.choose_colorbrewer_palette('q') #Nice Blue,green,Red # sns.set_palette('colorblind') if palette == 'xkcd': #Nice blue, purple, green sns.set_palette(sns.xkcd_palette(xkcdcolors)) else: sns.set_palette(palette) #Nice blue, green red # sns.set_palette('deep') # sns.set_palette('Accent_r') # sns.set_palette('Set2') # sns.set_palette('Spectral_r') # sns.set_palette('spectral') #FIX INVISIBLE MARKER BUG sns.set_context(rc={'lines.markeredgewidth': 0.1})
def main(): runResults = [] # Traverse files, extract matrix, architecture and params for f in [f for f in os.listdir(".") if os.path.isfile(f)]: if f.startswith("run_Spmv"): runResults.append(RunResult(f)) df = pd.DataFrame([[r.prj, r.matrix, r.gflops_est] for r in runResults]) grouped = df.groupby(0) groups = [] names = [] for name, group in grouped: group.set_index(1, inplace=True) # group.sort_index(inplace=True) groups.append(group[2]) names.append(name) new_df = pd.concat(groups, axis=1) new_df.columns = names sns.set_style("white") sns.set_palette(sns.color_palette("cubehelix", 13)) bar = new_df.plot(kind="bar") sns.despine() fig = bar.get_figure() fig.set_size_inches(15, 15) fig.tight_layout() fig.savefig("est_gflops.pdf")
def PlotBBNResponseCurve(self, bbnResponseProb, measure, unit=[], filePath=[], attn=False): """ Plots measure for multiple frequencies and intensities an a contour plot. :param stResponseProb: DataFrames results of Bayesian response analysis for multiple tone stimulus intensities :type stResponseProb: pandas DataFrame :param measure: Bayesian response analysis measure ['resProb', 'vocalResMag', 'vocalResMag_MLE', 'effectSize', 'effectSize_MLE', 'spontRate', 'spontRateSTD', 'responseLatency', 'responseLatencySTD', 'responseDuration'] :type measure: integer [0-9] :param unit: Unique identifier for cell :type unit: str :param filePath: Path to directory where results will be saved :type filePath: str :returns: Handle to plot """ measureName = ['resProb', 'vocalResMag', 'vocalResMag_MLE', 'effectSize', 'effectSize_MLE', 'spontRate', 'spontRateSTD', 'responseLatency', 'responseLatencySTD', 'responseDuration'] tuningData = bbnResponseProb sns.set_palette(sns.color_palette("bright", 8)) sns.set_context(rc={"figure.figsize": (5, 3)}) sns.set_style("white") sns.set_style("ticks") if attn: ax = bbnResponseProb.loc[::-1,measure].fillna(0).plot(figsize=(6,4)) else: ax = bbnResponseProb.loc[:,measure].fillna(0).plot(figsize=(6,4)) sns.despine() plt.grid(False) plt.title(unit, fontsize=14) plt.xlabel('SPL (dB)', fontsize=12) plt.ylabel(measureName[measure], fontsize=12) plt.ylim(0.5,1.0) # plt.gca().invert_xaxis() if len(filePath)>0: plt.savefig(self.dirPath + filePath + 'bbn_'+measureName[measure]+'_'+unit+'.pdf') plt.close() else: plt.show() return ax
def clust_stability(log2_expdf_gene, iterations=16): sns.set(context='poster', font_scale = 1) sns.set_palette("RdBu_r") stability_ratio = [] total_genes = len(log2_expdf_gene.columns.tolist()) end_num = 1000 iter_list = range(100,int(round(end_num)),int(round(end_num/iterations))) for gene_number in iter_list: title= str(gene_number)+' genes plot.' top_pca = plot_PCA(log2_expdf_gene, num_genes=gene_number, title=title) top_pca_by_gene = log2_expdf_gene[top_pca] top_pca_by_cell = top_pca_by_gene.transpose() cell_linkage, plotted_df_by_gene, col_order = clust_heatmap(top_pca, top_pca_by_gene, num_to_plot=gene_number, title=title) if gene_number == 100: s1 = col_order s0 = col_order else: s2= col_order sm_running = difflib.SequenceMatcher(None,s1,s2) sm_first = difflib.SequenceMatcher(None,s0,s2) stability_ratio.append((sm_running.ratio(), sm_first.ratio())) s1=col_order plt.close() x= iter_list[1:] f, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8), sharex=True) y1= [m[0] for m in stability_ratio] y2= [m[1] for m in stability_ratio] sns.barplot(x, y1, palette="RdBu_r", ax=ax1) ax1.set_ylabel('Running ratio (new/last)') sns.barplot(x, y2, palette="RdBu_r", ax=ax2) ax2.set_ylabel('Ratio to 100') plt.savefig(os.path.join(filename,'clustering_stability.pdf'), bbox_inches='tight') plt.show() plt.close() return stability_ratio
def plotfitstats(p1a, p2a, p1d, p2d, rmsda, biasa, title, fname): """image plot for rmsda with contour lines of bias""" sns.set_style("white", {"image.cmap": "Greens"}) sns.set_color_palette("pastel") sns.set_palette("pastel") x, y = np.meshgrid(p1a, p2a) fig, ax = plt.subplots() # im = ax.imshow(rmsda) # im.set_interpolation('bilinear') p = ax.pcolor(x, y, rmsda.T) # p = ax.pcolor(rmsda, cmap = sns.color_palette('pastel', 100, as_cmap = True)) cb = fig.colorbar(p, ax=ax) cnt = ax.contour( x, y, biasa.T, colors="black", # colors = sns.color_palette('bright', 100), # origin = 'upper', origin="lower", linewidth=3, ) plt.clabel(cnt, inline=1, fontsize=10, fmt="%.2f") ax.set_title( "%s, p1_default = %.3g, p2_default = %.3g\nBias (line) and RMSE (image) -- min(RMSE) = %.1f" % (title, p1d, p2d, np.min(rmsda)) ) ax.set_xlabel("p1") ax.set_ylabel("p2") fig.savefig(fname)
def _print_results(scores, score_errs, metric, out_folder): sns.set_palette('deep', desat=.6) sns.set_context(rc={'figure.figsize': (8, 4)}) ymin, ymax = 0., 1. for a in scores.iterkeys(): plot_path = os.path.join(out_folder, 'alpha_%.1f_%s.png' % (a, metric)) idx = np.arange(len(scores[a].keys())) fig = pl.figure() plt.ylim(ymin, ymax) plt.xlabel('Classifier') plt.ylabel(metric) plt.title('%s for alpha=%f' % (metric, a)) plt.xticks(idx, scores[a].keys()) plt.grid(True) plt.hold(True) plt.errorbar(x=idx, y=scores[a].values(), yerr=score_errs[a].values()) fig.savefig(plot_path) plt.close(fig)
def clust_heatmap(gene_list, df_by_gene, num_to_plot=len(gene_list), title='', plot=False, label_map=False): if num_to_plot >175: sns.set(context= 'poster', font_scale = 0.65/(num_to_plot/100)) else: sns.set(context= 'poster', font_scale = .80, font ='Verdana') sns.set_palette('RdBu',4,0.1) cell_list = df_by_gene.index.tolist() cg = sns.clustermap(df_by_gene[gene_list[0:num_to_plot]].transpose(), metric=metric, method=method, z_score=0, figsize=(30, 25)) col_order = cg.dendrogram_col.reordered_ind cg.ax_heatmap.set_title(title) if label_map: Xlabs = [cell_list[i] for i in col_order] colors = [label_map[cell][0] for cell in Xlabs] for xtick, color in zip(cg.ax_heatmap.get_xticklabels(), colors): xtick.set_color(color) xtick.set_rotation(270) if plot: plt.show() cell_linkage = cg.dendrogram_col.linkage link_mat = pd.DataFrame(cell_linkage, columns=['row label 1', 'row label 2', 'distance', 'no. of items in clust.'], index=['cluster %d' %(i+1) for i in range(cell_linkage.shape[0])]) if title != '': save_name = '_'.join(title.split(' ')[0:2]) cg.savefig(os.path.join(filename, save_name+'_heatmap.pdf'), bbox_inches='tight') else: cg.savefig(os.path.join(filename,'Non_group_heatmap_z1_deleted.pdf'), bbox_inches='tight') plt.close() return cell_linkage, df_by_gene[gene_list[0:num_to_plot]], col_order
def run_plot(num_pts=100, maximize=False, interval_secs=5, xaxis_fmt='%I:%M'): """Runs the interactive plot of potato load""" matplotlib.rcParams['toolbar'] = 'None' if maximize: mng = plt.get_current_fig_manager() mng.resize(*mng.window.maxsize()) plt.gcf().canvas.set_window_title(' ') plt.xkcd() plt.ion() plt.show() data = [collections.deque([load], num_pts) for load in get_loads()] times = collections.deque([datetime.datetime.now()], num_pts) seaborn.set_palette('Set2', len(data)) while True: for loads, new_load in zip(data, get_loads()): loads.append(new_load) times.append(datetime.datetime.now()) plt.clf() for loads in data: plt.plot(times, loads) plt.title('AML Lab Cluster Loads', fontsize=60) plt.gca().xaxis.set_major_formatter(dates.DateFormatter(xaxis_fmt)) plt.draw() time.sleep(interval_secs)
def show_poisson_views(): """Show different views of a Poisson distribution""" sns.set_palette(sns.color_palette('muted')) fig, ax = plt.subplots(3,1) k = np.arange(25) pd = stats.poisson(10) setFonts(12) ax[0].plot(k, pd.pmf(k),'x-') ax[0].set_title('Poisson distribution', fontsize=24) ax[0].set_xticklabels([]) ax[0].set_ylabel('PMF (X)') ax[1].plot(k, pd.cdf(k)) ax[1].set_xlabel('X') ax[1].set_ylabel('CDF (X)') y = np.linspace(0,1,100) ax[2].plot(y, pd.ppf(y)) ax[2].set_xlabel('X') ax[2].set_ylabel('PPF (X)') plt.tight_layout() plt.show()
def shifted_normal(): '''PDF, scatter plot, and histogram.''' # Generate the data # Plot a normal distribution: "Probability density functions" myMean = [0,0,0,-2] mySD2 = [0.2,1,5,0.5] t = frange(-5,5,0.02) sns.set_palette('husl', 4) for mu,sigma in zip(myMean, np.sqrt(mySD2)): y = stats.norm.pdf(t, mu, sigma) plt.plot(t,y, label='$\mu={0}, \; \t\sigma={1:3.1f}$'.format(mu,sigma)) plt.legend() plt.xlim([-5,5]) plt.title('Normal Distributions') outFile = 'Normal_Distribution_PDF.png' saveTo = os.path.join(outDir, outFile) plt.savefig(saveTo, dpi=200) print('OutDir: {0}'.format(outDir)) print('Figure saved to {0}'.format(outFile)) plt.show() # Generate random numbers with a normal distribution myMean = 0 mySD = 3 numData = 500 data = stats.norm.rvs(myMean, mySD, size = numData) plt.scatter(np.arange(len(data)), data) plt.title('Normally distributed data') plt.xlim([0,500]) plt.ylim([-10,10]) plt.show() plt.close()
def plot_total_eui(theme): sns.set_style("whitegrid") sns.set_palette("Set2") sns.set_context("talk", font_scale=1.5) df_eui = read_eui_cnt('', theme, 'All Building') df_eui_wecm = read_eui_cnt('_wecm', theme, 'Building with ECM') df_eui_woutecm = read_eui_cnt('_woutecm', theme, 'Building without ECM') df_all = reduce(lambda x, y: pd.merge(x, y, on='Fiscal Year', how='inner'), [df_eui, df_eui_wecm, df_eui_woutecm]) df_all = df_all[df_all['Fiscal Year'] < 2016] lines = [] cols = list(df_all) cols.remove('Fiscal Year') bx = plt.axes() for x in cols: line, = plt.plot(df_all['Fiscal Year'], df_all[x], ls='-', lw=2, marker='o') lines.append(line) plt.legend(lines, cols, loc='center left', bbox_to_anchor=(1, 0.5), prop={'size':13}) ylimit = 90 plt.ylim((0, ylimit)) plt.fill_between([2004.5, 2006.5], 0, ylimit, facecolor='gray', alpha=0.2) plt.title('GSA Portfolio (A + I) Average EUI Trend') plt.xlabel('Fiscal Year') plt.ylabel(lb.ylabel_dict[theme]) P.savefig(os.getcwd() + '/plot_FY_annual/ave_eui.png', dpi = 300, bbox_inches='tight') plt.close()
def plot_train_vs_validation(scores, amt): sns.set_palette("deep", desat=.6) sns.set_context(rc={"figure.figsize": (8, 4)}) d1 = pd.DataFrame( [(i[1], i[2]) for i in scores['training']], index=[i[0] for i in scores['training']], columns=['correct', 'total'] ) d2 = pd.DataFrame( [(i[1], i[2]) for i in scores['validation']], index=[i[0] for i in scores['validation']], columns=['correct', 'total'] ) d1['percent'] = d1['correct'] / d1['total'] d2['percent'] = d2['correct'] / d2['total'] ax = plt.figure(figsize=(12, 6), dpi=120).add_subplot(111) d1.plot(kind='line', label='training', ax=ax, y='percent') d2.plot(kind='line', label='validation', ax=ax, y='percent') lines = ax.get_lines() ax.legend(lines, [line.get_label() for line in lines], loc='lower right') ax.set_title('train vs validation for 80/20 split on {} rows'.format(amt)) plt.show()
def create_plot(self): print(self.__class__.__name__ + ": updating plot.") # xfmt = md.DateFormatter('%Y-%m-%d %H:%M') xfmt = md.DateFormatter('%H:%M') for ahrs_param in self.data.keys(): fig, ax = plt.subplots(figsize=(16, 6)) sns.set_palette("husl", 18) ax.set_title("AHRS {} Calibration on DU{}\n{}".format( ahrs_param, self.du, datetime.utcnow())) ax.set_xlabel("UTC time") ax.xaxis.set_major_formatter(xfmt) ax.set_ylabel(ahrs_param) with self.lock: for floor in sorted(self.data[ahrs_param].keys()): ax.plot( self.times[floor], self.data[ahrs_param][floor], marker='.', linestyle='none', label="Floor {}".format(floor)) lgd = plt.legend( bbox_to_anchor=(1.005, 1), loc=2, borderaxespad=0.) fig.tight_layout() plt.savefig( os.path.join(self.plots_path, ahrs_param + '_calib.png'), bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.close('all')
def box_and_whisker(ax, med,sem,std,yloc,boxw=0.2,legend_lab_ea_color=False): sem_low,sem_hi= sem std_low,std_hi= std #get colors sns.set_palette('colorblind') c_med='k' #median c_sem=sns.color_palette()[2] #error on median box 'r', bootstraped c_std= sns.color_palette()[0] #std dev of pdf 'b', percentiled #legend labels (if called) ll= ['Median','Std. Error of Median','Std. Dev. of PDF'] #std error med box if legend_lab_ea_color: print '-------------------------- adding legend -------------' ax.plot([med]*2,[yloc-boxw,yloc+boxw],c=c_med,label=ll[0]) else: ax.plot([med]*2,[yloc-boxw,yloc+boxw],c=c_med) ax.plot([med-sem_low]*2,[yloc-boxw,yloc+boxw],c=c_sem) ax.plot([med+sem_hi]*2,[yloc-boxw,yloc+boxw],c=c_sem) ax.plot([med-sem_low,med+sem_hi],[yloc-boxw]*2,c=c_sem) if legend_lab_ea_color: ax.plot([med-sem_low,med+sem_hi],[yloc+boxw]*2,c=c_sem,label=ll[1]) else: ax.plot([med-sem_low,med+sem_hi],[yloc+boxw]*2,c=c_sem) #pdf std dev ax.plot([med-std_low,med-sem_low],[yloc]*2,c=c_std) ax.plot([med+sem_hi,med+std_hi],[yloc]*2,c=c_std) ax.plot([med-std_low]*2,[yloc-boxw,yloc+boxw],c=c_std) if legend_lab_ea_color: ax.plot([med+std_hi]*2,[yloc-boxw,yloc+boxw],c=c_std,label=ll[2]) else: ax.plot([med+std_hi]*2,[yloc-boxw,yloc+boxw],c=c_std)
def multipleLineGraph(cls, xdata, ydataList, ydataLabels, fileLocation, xlabel='', ylabel=''): import matplotlib.pyplot as plt import seaborn as sns lineCount = len(ydataList) labelCount = len(ydataLabels) sns.set_style("darkgrid") sns.set_palette(sns.color_palette("Blues_d", len(ydataLabels))) points, axes = plt.subplots() axes.set_xlabel(xlabel) axes.set_ylabel(ylabel) for i in range(0, lineCount): if lineCount == labelCount: plt.plot(xdata, ydataList[i], label=ydataLabels[i]) else: plt.plot(xdata, ydataList[i]) if lineCount == labelCount: handles, labels = axes.get_legend_handles_labels() axes.legend(handles[::-1], labels[::-1], prop={'size': 11}, loc=1) cls.saveFigure(points, fileLocation)
def plot_energy_temp_byyear_2015(theme): sns.set_palette(sns.color_palette('Set2', 27)) sns.mpl.rc("figure", figsize=(10,5)) cat_df = pd.read_csv(os.getcwd() + '/csv_FY/join_cal/join_2015.csv') cat_dict = dict(zip(cat_df['Building Number'].tolist(), cat_df['Cat'].tolist())) filelist = glob.glob(os.getcwd() + '/csv_FY/energy_temperature_select/*_{0}.csv'.format(title_dict[theme])) def getname(dirname): id1 = dirname.find('select') + len('select') + 1 return dirname[id1: id1 + 8] buildings = [getname(f) for f in filelist] dfs = [pd.read_csv(csv) for csv in filelist] dfs = [df[df['Fiscal Year'] == 2015] for df in dfs] euis = [round(df[theme].sum(), 2) for df in dfs] sorted_bedf = sorted(zip(buildings, euis, dfs), key=lambda x: x[1], reverse=True) buildings = [x[0] for x in sorted_bedf] euis = [x[1] for x in sorted_bedf] dfs = [x[2] for x in sorted_bedf] lines = [] for i in range(len(buildings)): df = dfs[i] df.sort(['temperature', theme], inplace=True) line, = plt.plot(df['temperature'], df[theme]) lines.append(line) labels = ['{0}: {1} kBtu/sq.ft*year_{2}'.format(b, e, cat_dict[b]) for (b, e) in zip(buildings, euis)] plt.title('Temperature-{0} plot: 27 Building, Fiscal Year 2015'.format(title_dict[theme])) plt.xlabel('Temperature / F', fontsize=12) plt.ylabel(ylabel_dict[theme], fontsize=12) plt.legend(lines, labels, bbox_to_anchor=(0.2, 1), prop={'size':6}) plt.ylim((0, 9)) P.savefig(os.getcwd() + '/plot_FY_weather/27building_{0}_2015_trunc.png'.format(theme), dpi = 150) #P.savefig(os.getcwd() + '/plot_FY_weather/27building_{0}_2015.png'.format(theme), dpi = 150) plt.close() return
def plot_vcf_stats(vcf_stats_df, ax=None, palette_name='husl', legend_title=None, figsize=(15, 5)): """Expects a DataFrame of VCF stats produced by the function vcf_stats. Will plot the values and draw lines to separate the chromosomes. Returns a matplotlib axes.""" sns.set_palette(palette_name) sns.set_style('white') if not ax: _, ax = plt.subplots(1) chromosomes = vcf_stats_df.index.get_level_values(0) xticks_per_chrom = chromosomes.value_counts().sort_index() xlabels_offsets = xticks_per_chrom.cumsum() - (xticks_per_chrom / 2) # ^ Puts the chromsome labels in the middle of the chromosome section # of the plot. vcf_stats_df.plot(ax=ax, figsize=figsize, linewidth=2) sample_names = [col[0] for col in vcf_stats_df.columns] # ^ Columns are a multi-index, the first level is the sample name. ax.legend(sample_names, ncol=2, loc='best', title=legend_title) ax.set_xticks(xlabels_offsets) ax.set_xticklabels(xlabels_offsets.index) ax.set_xlabel('Chromosome') for limit in xticks_per_chrom.cumsum(): ax.axvline(limit, color='#d9d9d9') sns.despine(left=True) return ax
def graphMetricDistn(self,metric,normType,plotType,resiType,save): # histogram/kde plot of density metric per atom # plotType is 'histogram' or 'kde' # resiType is 'all' or list of residue types # save is Boolian to save or not if plotType not in ('hist','kde'): return 'Unknown plotting type selected.. cannot plot..' if self.checkMetricPresent(self.atomList[0],metric,normType) is False: return # check metric valid sns.set_palette("deep", desat=.6) sns.set_context(rc={"figure.figsize": (10, 6)}) fig = plt.figure() for i in range(self.getNumDatasets()): if resiType == 'all': datax = [atm.densMetric[metric][normType]['values'][i] for atm in self.atomList] self.plotHist(plotType,datax,'Dataset {}'.format(i)) else: for res in resiType: datax = [atm.densMetric[metric][normType]['values'][i] for atm in self.atomList if atm.basetype == res] self.plotHist(plotType,datax,'Dataset {},{}'.format(i,res)) plt.legend() plt.xlabel('{} D{} per atom'.format(normType,metric)) plt.ylabel('Frequency') plt.title('{} D{} per atom, residues: {}'.format(normType,metric,resiType)) if not save: plt.show() else: fig.savefig('{}{}_D{}_{}.png'.format(self.outputDir,normType,metric,resiType))
def program_eui(): df_eng = read_energy('good_energy') df_pro = pd.read_csv(master_dir + 'ecm_program_tidy.csv') programs = list(set(df_pro['ECM program'].tolist())) dfs = [] sns.set_style("whitegrid") sns.set_palette("Set2", 8) sns.set_context("talk", font_scale=1.5) bx = plt.axes() lines = [] labels = [] programs.remove('Energy Star') for p in programs: buildings = df_pro[df_pro['ECM program'] == p]['Building Number'].unique() df_temp = df_eng.copy() df_temp = df_temp[df_temp['Building Number'].isin(buildings)] df_temp = df_temp[['Building Number', 'Fiscal Year', 'Gross Sq.Ft', 'Total Electric + Gas']] line = plot_eui_trend(df_temp, bx) lines.append(line) labels.append('{0} (n={1})'.format(p, len(df_temp['Building' ' Number'].unique()))) plt.title('Energy Program EUI Trend') plt.ylabel(lb.ylabel_dict['eui']) plt.xlabel('Fiscal Year') plt.gca().set_ylim(bottom=0) ylimit = bx.get_ylim() plt.fill_between([2004.5, 2006.5], 0, ylimit, facecolor='gray', alpha=0.2) plt.legend(lines, labels, loc='center left', bbox_to_anchor=(1, 0.5), prop={'size':13}) P.savefig(os.getcwd() + '/plot_FY_annual/program_trend.png', dpi = 300, bbox_inches='tight') plt.close()
def iv(db, sample, mXYs=(('mesa', 1, 1),), inst='suss_test', xlim=(-1.0, 1.0), normalize=False, console=False, legend=True, window_length=9, polyoder=4, deriv=0): sns.set_palette('coolwarm', len(mXYs)) for (mesa, X, Y) in mXYs: sql = 'SELECT VI_param_id FROM v05_VI_param ' \ 'WHERE sample=%s AND mesa=%s AND X=%s AND Y=%s AND inst=%s' ids = db.q_col_abs(sql, (sample, mesa, X, Y, inst,)) ids = map(str, ids) sql = 'SELECT V, I FROM VI WHERE VI_param_id in ({})'. \ format(','.join(ids)) vis = db.q_all_abs(sql, None) if normalize: vis = np.array(vis) vis[:, 1] /= abs(vis[:, 1]).max() vis = al.savgol_vis(vis, window_length, polyoder, deriv) if console: print(mesa, X, Y) print(vis) plt.plot(*zip(*vis), '.-') plt.xlim(xlim) plt.xlabel('Voltage (V)') if deriv == 0: ylabel = 'Current (A)' elif deriv == 1: ylabel = 'dI/dV (A/V)' else: ylabel = 'd^I/d^V (A/V^2)' plt.ylabel(ylabel) if legend: pass # TODO plt.show()
def plot_progress(self): try: sns.set_palette('Set2') sns.set_style("darkgrid", {"axes.facecolor": ".95"}) except: pass self.logger.debug('plotting...') plt.figure(figsize=(15,10)) plt.subplot(211) plt.plot(self.trials.losses(), '.', markersize=12) plt.title('Per-Iteration Outer Loss', fontsize=16) plt.ylabel('Outer loss function value') if self.outer_loss_function in ['logloss']: plt.yscale('log') xticks = [int(i) for i in np.linspace(plt.xlim()[0], plt.xlim()[1], min(len(self.trials.losses()), 11))] plt.xticks(xticks, xticks) plt.subplot(212) plt.plot(np.minimum.accumulate(self.trials.losses()), '.', markersize=12) plt.title('Cumulative Minimum Outer Loss', fontsize=16) plt.xlabel('Iteration number') plt.ylabel('Outer loss function value') xticks = [int(i) for i in np.linspace(plt.xlim()[0], plt.xlim()[1], min(len(self.trials.losses()), 11))] plt.xticks(xticks, xticks) plt.tight_layout() plt.savefig(self.hyperopt_progress_plot) self.logger.info('The diagnostic hyperopt progress plot is saved: %s' % self.hyperopt_progress_plot)
def scatter_plot_matrix(): pth = "/Users/jonathan/PycharmProjects/networkclassifer/saved_clf" classifier = pickle.load(open("../saved_clf", "rb")) # cs = ['none','b','r','k','grey','grey'] import seaborn as sns import pandas as pd sns.palplot(sns.color_palette("hls", 8)) sns.color_palette("hls", 8) mc2 = [ (0.14901960784313725, 0.13725490196078433, 0.13725490196078433), (0.8235294117647058, 0.34509803921568627, 0.34509803921568627), (0.30196078431372547, 0.4588235294117647, 0.7019607843137254), (0.7725490196078432, 0.7764705882352941, 0.7803921568627451), ] sns.palplot(mc2) X = classifier.iss_features[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]] fig = plt.figure(figsize=(8, 11)) sns.set_palette(mc2) df = pd.DataFrame(X, columns=["Feature" + str(i + 1) for i in range(X.shape[1])]) print classifier.labels.shape df["Network States"] = classifier.labels pg = sns.PairGrid(df, vars=["Feature" + str(i + 1) for i in range(X.shape[1])], hue="Network States", size=2) # pg = sns.pairplot(df)# hue="Network States") pg.map(plt.scatter) # pg.map_lower(plt.scatter) # pg.map_upper(plt.scatter) # pg.map_diag(plt.scatter) # plt.savefig(static+'scattermatrix.pdf') plt.show()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--style', choices=('mmu', 'mut'), help='Plot style') args = parser.parse_args() rows = [line.split('\t') for line in sys.stdin] table = [[col[0]] + list(map(float, col[1:])) for col in zip(*rows)] if args.style == 'mut': # Continuous palette (has to happen before plt.subplots) sns.set_palette("Blues_r") fig, ax = plt.subplots(1, 1) if args.style in ('mmu', 'mut'): ax.set_xscale('log') ax.set_ylim(bottom=0, top=1) ax.xaxis.set_major_formatter(tickerSec) ax.set_xlabel(table[0][0]) for col in table[1:]: ax.plot(table[0][1:], col[1:], label=col[0]) ax.legend() if args.style == 'mut': # Reverse legend order handles, labels = ax.get_legend_handles_labels() ax.legend(handles[::-1], labels[::-1]) plt.show()
def plot_results(self): """ A simple script to plot the balance of the portfolio, or "equity curve", as a function of time. """ sns.set_palette("deep", desat=.6) sns.set_context(rc={"figure.figsize": (8, 4)}) # Plot two charts: Equity curve, period returns fig = plt.figure() fig.patch.set_facecolor('white') df = pd.DataFrame() df["equity"] = pd.Series(self.equity, index=self.timeseries) df["equity_returns"] = pd.Series(self.equity_returns, index=self.timeseries) df["drawdowns"] = pd.Series(self.drawdowns, index=self.timeseries) # Plot the equity curve ax1 = fig.add_subplot(311, ylabel='Equity Value') df["equity"].plot(ax=ax1, color=sns.color_palette()[0]) # Plot the returns ax2 = fig.add_subplot(312, ylabel='Equity Returns') df['equity_returns'].plot(ax=ax2, color=sns.color_palette()[1]) # drawdown, max_dd, dd_duration = self.create_drawdowns(df["Equity"]) ax3 = fig.add_subplot(313, ylabel='Drawdowns') df['drawdowns'].plot(ax=ax3, color=sns.color_palette()[2]) # Rotate dates fig.autofmt_xdate() # Plot the figure plt.show()
def plot_results(self, filename=None): """ Plot the Tearsheet """ rc = { 'lines.linewidth': 1.0, 'axes.facecolor': '0.995', 'figure.facecolor': '0.97', 'font.family': 'serif', 'font.serif': 'Ubuntu', 'font.monospace': 'Ubuntu Mono', 'font.size': 10, 'axes.labelsize': 10, 'axes.labelweight': 'bold', 'axes.titlesize': 10, 'xtick.labelsize': 8, 'ytick.labelsize': 8, 'legend.fontsize': 10, 'figure.titlesize': 12 } sns.set_context(rc) sns.set_style("whitegrid") sns.set_palette("deep", desat=.6) if self.rolling_sharpe: offset_index = 1 else: offset_index = 0 vertical_sections = 5 + offset_index fig = plt.figure(figsize=(10, vertical_sections * 3.5)) fig.suptitle(self.title, y=0.94, weight='bold') gs = gridspec.GridSpec(vertical_sections, 3, wspace=0.25, hspace=0.5) stats = self.get_results() ax_equity = plt.subplot(gs[:2, :]) if self.rolling_sharpe: ax_sharpe = plt.subplot(gs[2, :]) ax_drawdown = plt.subplot(gs[2 + offset_index, :]) ax_monthly_returns = plt.subplot(gs[3 + offset_index, :2]) ax_yearly_returns = plt.subplot(gs[3 + offset_index, 2]) ax_txt_curve = plt.subplot(gs[4 + offset_index, 0]) ax_txt_trade = plt.subplot(gs[4 + offset_index, 1]) ax_txt_time = plt.subplot(gs[4 + offset_index, 2]) self._plot_equity(stats, ax=ax_equity) if self.rolling_sharpe: self._plot_rolling_sharpe(stats, ax=ax_sharpe) self._plot_drawdown(stats, ax=ax_drawdown) self._plot_monthly_returns(stats, ax=ax_monthly_returns) self._plot_yearly_returns(stats, ax=ax_yearly_returns) self._plot_txt_curve(stats, ax=ax_txt_curve) self._plot_txt_trade(stats, ax=ax_txt_trade) self._plot_txt_time(stats, ax=ax_txt_time) # Plot the figure plt.show() if filename is not None: fig.savefig(filename, dpi=150, bbox_inches='tight')
def plotAnArray(array_obj): import matplotlib.pyplot as plt import seaborn as sns sns.set_style('whitegrid') sns.set_context("paper") sns.set_palette(sns.cubehelix_palette(5, start=2, rot=0.45, dark=0.2, light=.8, reverse=True)) for i,row in enumerate(array_obj[1:,1:]): plt.plot(np.arange(array_obj.shape[1]-1),row,marker='o',label='q = '+str(i))
def test_fig_legend(): sns.set_palette(COLOR_LIST) fig = plt.figure() ax = fig.add_subplot(111) make_legend(['a','b','c'], fig, legend_type=PATCH) set_fig_to_bw(fig, style=GREYSCALE) plt.draw()
-0.05, '"The Data So Far" from xkcd by Randall Monroe', ha='center') plt.show() # %% [markdown] {"slideshow": {"slide_type": "slide"}, "toc-hr-collapsed": false} # # Seaborn # Wrapper around Matplotlib that makes plotting attractive figures easier. # %% [markdown] {"slideshow": {"slide_type": "subslide"}} # ## Changing color palette # %% pal = sns.color_palette("husl", 8) # optionally set number of colors sns.set_palette(pal) sns.palplot(sns.color_palette()) # %% [markdown] {"slideshow": {"slide_type": "subslide"}} # ## Default Seaborn color palette # %% sns.set_palette("tab10") sns.palplot(sns.color_palette()) # %% [markdown] {"slideshow": {"slide_type": "subslide"}} # ## Defining custom color palette # %% flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"] sns.set_palette(flatui)
import matplotlib.pyplot as plt import matplotlib.ticker as ticker import seaborn as sns pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) pd.set_option('display.max_colwidth', 1000) pd.set_option('display.float_format', '{:,.3f}'.format) flatui = [ "#2e86de", "#ff4757", "#feca57", "#2ed573", "#ff7f50", "#00cec9", "#fd79a8", "#a4b0be" ] flatui_palette = sns.color_palette(flatui) sns.palplot(flatui_palette) sns.set_palette(flatui_palette) sns.palplot(sns.light_palette(flatui_palette[0])) sns.palplot(sns.light_palette(flatui_palette[1])) sns.palplot(sns.light_palette(flatui_palette[3])) sns.set_style( "darkgrid", { 'axes.edgecolor': '#2b2b2b', 'axes.facecolor': '#2b2b2b', 'axes.labelcolor': '#919191', 'figure.facecolor': '#2b2b2b', 'grid.color': '#545454', 'patch.edgecolor': '#2b2b2b', 'text.color': '#bababa', 'xtick.color': '#bababa', 'ytick.color': '#bababa',
# -*- coding: utf-8 -*- from __future__ import division from IPython.display import display import matplotlib.pyplot as plt import numpy as np import pandas as pd from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor import seaborn as sns from statsmodels.nonparametric.smoothers_lowess import lowess from treeinterpreter import treeinterpreter as ti sns.set_palette('colorblind') color_l = sns.color_palette('colorblind') blue = color_l[0] green = color_l[0] red = color_l[0] purple = color_l[0] yellow = color_l[0] cyan = color_l[0] def plot_obs_feature_contrib(clf, contributions, features_df, labels, index, class_index=0,
from torch.autograd import Variable from torch.utils.data import DataLoader import cv2 import seaborn as sns from easydict import EasyDict as edict from mot_evaluation.bbox import bbox_overlap from mot_evaluation.io import (extract_valid_gt_data, print_metrics, read_seqmaps, read_txt_to_struct) from mot_evaluation.measurements import clear_mot_hungarian, idmeasures from sacred import Experiment from sklearn.utils.linear_assignment_ import linear_assignment from tracktor.config import cfg, get_output_dir from tracktor.datasets.factory import Datasets sns.set_palette('deep') sns.set(font_scale=1.5, rc={'text.usetex': True}) colors = [ 'aliceblue', 'black', 'green', 'red', 'aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'blanchedalmond', 'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue', 'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey', 'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen', 'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue', 'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite', 'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory', 'khaki', 'lavender', 'lavenderblush', 'lawngreen',
import pandas as pd import matplotlib import matplotlib.pyplot as plt import matplotlib.dates as mdates import matplotlib.patches as patches import matplotlib.gridspec as gridspec from matplotlib import lines, markers import seaborn as sns import numpy as np import os import glob import re from collections import defaultdict from scipy import stats import subprocess from itertools import cycle from math import floor, ceil, sqrt from ipywidgets import interact, interactive, fixed, interact_manual import ipywidgets as widgets matplotlib.rcParams['pdf.fonttype'] = 42 matplotlib.rcParams['ps.fonttype'] = 42 matplotlib.rcParams['hatch.linewidth'] = 0.2 matplotlib.rcParams['xtick.labelsize'] = 10 sns.set_palette(sns.color_palette('Set2', n_colors=14, desat=0.9)) sns.set_style("ticks")
import seaborn as sns from typing import Set, List, Dict import functools from collections import Counter, OrderedDict import csv import pathlib import textgrid import sox from pathlib import Path import pickle import multiprocessing import word_extraction sns.set() sns.set_palette("bright") # sns.set(font_scale=1.6) # %% NUM_WAVS = 2200 per_lang = {} frequent_words = Path("/home/mark/tinyspeech_harvard/frequent_words/") for lang in os.listdir(frequent_words): per_lang[lang] = [] clips = frequent_words / lang / "clips" words = os.listdir(clips) raise ValueError( "do we need to make words a set? what caused the bug with duplicate words in commands??"
for j in range(len(inter_in_B)): line2 = [ center_B[0], center_B[1], inter_in_B[j][0], inter_in_B[j][1] ] error_cor_set.append(cross_point(line1, line2)) error_x = [error_cor_set[i][0] for i in range(len(error_cor_set))] error_y = [error_cor_set[i][1] for i in range(len(error_cor_set))] print("协标准差矩阵为:\n", np.sqrt(np.cov(error_x, error_y, bias=True))) print("相关系数为:\n", np.corrcoef(error_x, error_y)) cor_set = {"X": error_x, "Y": error_y} data_st = DataFrame(cor_set) # print(data_st) print(np.mean(error_x), np.std(error_x), np.ptp(error_x)) print(np.mean(error_y), np.std(error_y), np.ptp(error_y)) # plt.scatter(error_x, error_y, marker = 'x',color = 'red', s = 40 ,label = 'First') sns.jointplot(x=data_st['X'], y=data_st['Y'], data=data_st, kind='hex', height=5) plt.show() sns.set_palette("hls") sns.distplot(error_y, color="r", bins=30, kde=True) plt.show() else: print("因为重叠区域不包括 interested point (原点),所以计算无效") else: print("他们没有相交,计算无效")
import os import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import corner from . import global_sampler as gs sns.set_palette('pastel') sns.set_style('whitegrid') def plot_sim(freqs, fg=None, signal=None, noise=None, outname=None): """Plot a simulated sky signal, consisting of a required frequency array and optionally foreground, signal, and noise.""" xlabel = r'$\nu\ [\rm MHz]$' ylabel = r'$T_B\ [\rm K]$' fig, (ax1, ax2, ax3) = plt.subplots(figsize=(12, 4), ncols=3) total = np.sum([arr for arr in [fg, signal, noise] if arr is not None], axis=0) if fg is not None: ax1.set_title('Foreground and total') ax1.plot(freqs, fg, label='Foreground') ax1.plot(freqs, total * 1.1, label='Total x 1.1')
import seaborn as sns import matplotlib.pyplot as plt import pandas as pd bn01 = pd.read_csv("training_lr0.1_bn.csv") bn005 = pd.read_csv("training_lr0.05_bn.csv") bn001 = pd.read_csv("training_lr0.01_bn.csv") sns.set_style("whitegrid") sns.set_palette(sns.color_palette("Paired")) fig = plt.figure() sns.lineplot(x='epochs', y=" train_acc", ci=None, data=bn01) sns.lineplot(x='epochs', y=" val_acc", ci=None, data=bn01) sns.lineplot(x='epochs', y=" train_acc", ci=None, data=bn005) sns.lineplot(x='epochs', y=" val_acc", ci=None, data=bn005) sns.lineplot(x='epochs', y=" train_acc", ci=None, data=bn001) sns.lineplot(x='epochs', y=" val_acc", ci=None, data=bn001) plt.legend([ "Train 0.1", "Val 0.1", "Train 0.05", "Val 0.05", "Train 0.01", "Val 0.01" ]) plt.xlabel("Epochs") plt.ylabel("Accuracy") plt.show() fig = plt.figure() sns.lineplot(x='epochs', y=" train_loss", ci=None, data=bn01) sns.lineplot(x='epochs', y=" val_loss", ci=None, data=bn01) sns.lineplot(x='epochs', y=" train_loss", ci=None, data=bn005) sns.lineplot(x='epochs', y=" val_loss", ci=None, data=bn005)
@author: hi """ import urllib from IPython.display import Image, display, clear_output from collections import Counter import matplotlib.pyplot as plt import seaborn as sns %matplotlib inline import json from sklearn.metrics import classification_report, confusion_matrix sns.set_palette("cubehelix") sns.set_style('whitegrid') import os import h5py import numpy as np import pandas as pd import keras keras.__version__ np.__version__ from keras.applications.vgg16 import VGG16 from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img #from keras.regularizers import l2, activity_l2, l1, activity_l1 from keras.models import Sequential, load_model m_damage=load_model("C:/Users/hi/POC/Model_Fine/Damage/epoch_2.h5") from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from matplotlib import pyplot as plt import pandas as pd import seaborn as sns sns.set_palette("Paired") # Load the datasets all_data = pd.read_csv("all_data.csv") # Inspect the data #print(all_data.head()) #print(all_data.Country.unique()) #print(all_data.Year.unique()) #Rename Life expectancy at birth (years)" all_data.rename(columns={"Life expectancy at birth (years)": "LEABY"}, inplace=True) #print(all_data.head()) # Examine to compare life expectancy and GDP for each country countries = all_data.Country.unique() countries[4] = "USA" # Plot GDP for each country fig = plt.subplots(figsize=(15, 10)) ax1 = plt.subplot(1, 2, 1) sns.barplot(data=all_data, x="Country", y="GDP", ci=False) ax1.set_xticks(range(len(countries))) ax1.set_xticklabels(countries, rotation=30) plt.ylabel("GDP in Trillions of U.S. Dollars") plt.title("GDP for each Country")
bosch_gray_palette = ['#525F6B', '#BFC0C2'] colors_gry = sns.color_palette(bosch_gray_palette) # standard colors as defined by style guide bosch_standard_palette = [toHEX((168,1,99)), toHEX((208,103,173)), toHEX((63,19,108)), toHEX((150,124,177)), toHEX((8,66,126)), toHEX((109,154,188)), toHEX((14,120,197)), toHEX((111,185,226)), toHEX((19,153,160)), toHEX((111,201,204)), toHEX((103,180,25)), toHEX((174,219,125)), toHEX((10,81,57)), toHEX((110,162,147))] colors_std = sns.color_palette('#E20015 #B90276 #50237F #005691 #008ECF #00A8B0 #78BE20 #006249'.split()) colors_stdr = sns.color_palette(bosch_standard_palette) # optimized standard palette: remove red, start with light blue (i.e. plots with two colors: ligth blue + turquoise) colors_opt = colors_stdr[:-1] colors_opt = colors_opt[2:] + colors_opt[:2] sns.set_palette(colors_stdr) sns.set_style("whitegrid") params_savefig = {'figure.figsize': (8, 4), 'figure.dpi': 125, 'font.sans-serif': ['Arial', 'Bitstream Vera Sans', 'Arial'], 'legend.fontsize': 'x-large', 'legend.columnspacing': 1, 'legend.labelspacing': 0.25, 'legend.fancybox': True, 'xtick.labelsize': 'x-large', 'ytick.labelsize': 'x-large',
def create_plots(name, save=False): ''' ''' dset = ds.load_dataset(name) train = dset sns.set_style("whitegrid") sns.set_palette(sns.color_palette('bright', 12)) fig = plt.figure(figsize=(40, 27)) gs = gridspec.GridSpec(4, 4) ax00 = plt.subplot(gs[0, 0]) ax01 = plt.subplot(gs[0, 1]) ax02 = plt.subplot(gs[0, 2]) ax03 = plt.subplot(gs[0, 3]) ax10 = plt.subplot(gs[1, 0]) ax11 = plt.subplot(gs[1, 1]) ax12 = plt.subplot(gs[1, 2]) ax13 = plt.subplot(gs[1, 3]) ax20 = plt.subplot(gs[2, 0]) ax21 = plt.subplot(gs[2, 1]) ax22 = plt.subplot(gs[2, 2]) ax23 = plt.subplot(gs[2, 3]) ax30 = plt.subplot(gs[3, 0:2]) ax31 = plt.subplot(gs[3, 2:4]) bins = np.arange(4000, 26001, 500) # fig, ax = plt.subplots(nrows = 4, ncols = 4, figsize=(40, 20)) sns.distplot(train['Attendance'], ax=ax00, kde=False, norm_hist=True, bins=bins) ax00.set_xlabel('Attendance (# of people)', fontsize=20) ax00.set_ylabel('Percent per person', fontsize=20) ax00.tick_params(labelsize=15) ax00.set_title(label="Overall Attendance", fontsize=25) days = train['Day of Week'].unique() for day in days: sns.distplot(train.loc[train['Day of Week'] == day]['Attendance'], ax=ax01, kde=False, norm_hist=True, bins=bins) ax01.set_title(label="Attendance per Day", fontsize=25) ax01.set_xlabel('Attendance (# of people)', fontsize=20) ax01.set_ylabel('Percent per person', fontsize=20) ax01.tick_params(labelsize=15) ax01.legend(days, loc="upper right", fontsize=15) grouped = train[['Day of Week', 'Attendance']].groupby('Day of Week').mean() order = [ 'Monday', "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday" ] sns.barplot(x='Attendance', y=grouped.index, data=grouped, order=order, ci=None, orient='h', saturation=1, ax=ax20, palette=sns.color_palette("cubehelix", 7)) ax20.set_xlim(16500, 19000) ax20.set_xticks(range(16500, 19001, 500)) ax20.set_xlabel('Average Attendance (# of people)', fontsize=20) ax20.set_ylabel('Day of the Week', fontsize=20) ax20.tick_params(labelsize=15) ax20.set_title(label="Average Attendance per Day", fontsize=25) months = train['Month'].unique() for month in months: sns.distplot(train.loc[train['Month'] == month]['Attendance'], ax=ax02, kde=False, norm_hist=True, bins=bins) ax02.set_title(label="Attendance per Month", fontsize=25) ax02.set_xlabel('Attendance (# of people)', fontsize=20) ax02.set_ylabel('Percent per person', fontsize=20) ax02.tick_params(labelsize=15) ax02.legend(months, loc="upper right", fontsize=15) grouped = train[['Month', 'Attendance']].groupby('Month').mean() order = [ 'October', 'November', 'December', 'January', 'February', 'March', 'April', 'May', 'June' ] sns.barplot(x='Attendance', y=grouped.index, data=grouped, order=order, ci=None, orient='h', saturation=1, ax=ax21, palette=sns.color_palette("cubehelix", 9)) ax21.set_xlim(16500, 20000) ax21.set_xticks(range(16500, 20001, 500)) ax21.set_xlabel('Average Attendance (# of people)', fontsize=20) ax21.set_ylabel('Month', fontsize=20) ax21.tick_params(labelsize=15) ax21.set_title(label="Average Attendance per Month", fontsize=25) train['Year'] = train.index.year grouped = train[['Year', 'Attendance']].groupby('Year').mean() order = np.sort(train.index.year.unique()) sns.barplot(x='Attendance', y=grouped.index, data=grouped, order=order, ci=None, orient='h', saturation=1, ax=ax22, palette=sns.color_palette("cubehelix", 13)) ax22.set_xlim(16500, 18500) ax22.set_xticks(range(16500, 18501, 500)) ax22.set_xlabel('Average Attendance (# of people)', fontsize=20) ax22.set_ylabel('Year', fontsize=20) ax22.tick_params(labelsize=15) ax22.set_title(label="Average Attendance per Year", fontsize=25) years = np.arange( max(train['Year'].values) - 4, max(train['Year'].values) + 1) for year in years: sns.distplot(train.loc[train.index.year == year]['Attendance'], ax=ax03, kde=False, norm_hist=True, bins=bins) ax03.set_title(label="Attendance per Year", fontsize=25) ax03.set_xlabel('Attendance (# of people)', fontsize=20) ax03.set_ylabel('Percent per person', fontsize=20) ax03.tick_params(labelsize=15) ax03.legend(years, loc="upper right", fontsize=20) for j in [0, 1]: sns.distplot(train.loc[train['Playoffs?'] == j]['Attendance'], ax=ax10, kde=False, norm_hist=True, bins=bins) ax10.set_title(label="Attendance for Regular and Playoff Games", fontsize=25) ax10.set_xlabel('Attendance (# of people)', fontsize=20) ax10.set_ylabel('Percent per person', fontsize=20) ax10.tick_params(labelsize=15) ax10.legend(['Regular Season', 'Playoffs'], loc="upper right", fontsize=15) win_percent = np.arange(0, 1, 0.10) for i in win_percent: sns.distplot( train.loc[(train['Curr Win %'] >= i) & (train['Curr Win %'] < i + 0.1)]['Attendance'], ax=ax11, kde=False, norm_hist=True, bins=bins) ax11.set_title(label="Attendance for Different Win Percentages", fontsize=25) ax11.set_xlabel('Attendance (# of people)', fontsize=20) ax11.set_ylabel('Percent per person', fontsize=20) ax11.tick_params(labelsize=15) ax11.legend([ '0 %', '10 %', '20 %', '30 %', '40 %', '50 %', '60 %', '70 %', '80 %', '90 %', '100 %' ], loc="upper right", fontsize=17) last_five = np.arange(0, 6) for i in last_five: sns.distplot(train.loc[train['Last Five'] == i]['Attendance'], ax=ax12, kde=False, norm_hist=True, bins=bins) ax12.set_title(label="Attendance by Last Five Record", fontsize=25) ax12.set_xlabel('Attendance (# of people)', fontsize=20) ax12.set_ylabel('Percent per person', fontsize=20) ax12.tick_params(labelsize=15) ax12.legend(['0 Wins', '1 Win', '2 Wins', "3 Wins", "4 Wins", "5 Wins"], loc="upper right", fontsize=20) grouped = train[['Last Five', 'Attendance']].groupby('Last Five').mean() order = np.arange(0, 6) sns.barplot(x='Attendance', y=grouped.index, data=grouped, order=order, ci=None, orient='h', saturation=1, ax=ax31, palette=sns.color_palette("cubehelix", 5)) ax31.set_xlim(16500, 18500) ax31.set_xticks(range(16500, 18501, 500)) ax31.set_xlabel('Average Attendance (# of people)', fontsize=20) ax31.set_ylabel('Record Over Last Five Games', fontsize=20) ax31.tick_params(labelsize=15) ax31.set_title(label="Average Attendance per Last Five Record", fontsize=25) num_numerical = ds.get_number_numerical()[name] train_num = train.iloc[:, 0:num_numerical] heat = sns.heatmap(train_num.corr(), annot=True, ax=ax13, fmt='.2f', cbar=True, square=True, xticklabels=True, yticklabels=True, annot_kws={'size': 16}, cmap='coolwarm', center=0, vmin=-1, vmax=1, cbar_kws={"shrink": 1}) ax13.set_title('Heatmap of Numerical Variable Correlation', size=25) ax13.set_xticklabels(ax13.xaxis.get_majorticklabels(), rotation=60, size=15) ax13.set_yticklabels(ax13.yaxis.get_majorticklabels(), rotation=0, size=15) ax13.collections[0].colorbar.ax.tick_params(labelsize=15) # Make annotations larger if abs(correlation) above 0.2 num_corrs = len(np.unique(train_num.corr().values.flatten())) bigs = [] for i in np.arange(2, num_corrs + 1): val = round( np.sort(np.abs(np.unique(train_num.corr().values.flatten())))[-i], 2) if val > 0.2: bigs = np.append(bigs, val) for text in heat.texts: num = pd.to_numeric(text.get_text()) i = np.where(bigs == abs(num))[0] if i.size > 0: text.set_color('white') text.set_size(27 - (i[0] * 3)) train.loc[train['Playoffs?'] == 0, "Playoffs?"] = 'Regular Season' train.loc[train['Playoffs?'] == 1, "Playoffs?"] = 'Playoffs' grouped = train[['Playoffs?', 'Attendance']].groupby('Playoffs?').mean() order = ['Regular Season', 'Playoffs'] sns.barplot(x='Attendance', y=grouped.index, data=grouped, order=order, ci=None, orient='h', saturation=1, ax=ax23, palette=sns.color_palette("cubehelix", 5)) ax23.set_xlim(16500, 19500) ax23.set_xticks(range(16500, 19501, 500)) ax23.set_xlabel('Average Attendance (# of people)', fontsize=20) ax23.set_ylabel('Game Type', fontsize=20) ax23.tick_params(labelsize=15) ax23.set_title(label="Average Attendance per Game Type", fontsize=25) train[['Curr Win %']] = np.round(train[['Curr Win %']], 1) * 100 grouped = train[['Curr Win %', 'Attendance']].groupby('Curr Win %').mean() order = np.arange(0, 101, 10) sns.barplot(x='Attendance', y=grouped.index, data=grouped, order=order, ci=None, orient='h', saturation=1, ax=ax30, palette=sns.color_palette("cubehelix", 5)) ax30.set_xlim(16500, 19500) ax30.set_xticks(range(16500, 19501, 500)) ax30.set_xlabel('Average Attendance (# of people)', fontsize=20) ax30.set_ylabel('Current Win %', fontsize=20) ax30.tick_params(labelsize=15) ax30.set_title(label="Average Attendance per Current Win %", fontsize=25) # ax[3][2] = sns.pairplot(data = train_num) # ax[3][2].set_title('Pairplot of Numerical Variable Correlation', size=25) # ax[3][2].set_xticklabels(ax[3][2].xaxis.get_majorticklabels(), rotation=60, size = 15) # ax[3][2].set_yticklabels(ax[3][2].yaxis.get_majorticklabels(), rotation=0, size = 15) gs.tight_layout(fig) # plt.tight_layout() plt.show() if save: to_save = Path().resolve().joinpath('visualizations', 'all_plots_{}.png'.format(name)) fig.savefig(to_save, dpi=300)
# Adverserial Validation from scipy import sparse from sklearn.metrics import roc_auc_score from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import SGDClassifier from sklearn.model_selection import StratifiedKFold import matplotlib.pyplot as plt from sklearn.metrics import precision_recall_curve, f1_score, accuracy_score, roc_auc_score, confusion_matrix import seaborn as sns import numpy as np sns.set_palette("muted") def adversarial_validation(X, Y, n_splits=10): # Combine both datasets sparse_merge = sparse.vstack((X, Y)) # Label the datasets y = np.array([0 for _ in range(X.shape[0])] + [1 for _ in range(Y.shape[0])]) # Do 10 Fold CV kfold = StratifiedKFold(n_splits=n_splits, shuffle=True) lr_auc = np.array([]) rf_auc = np.array([]) for train_idx, test_idx in kfold.split(sparse_merge, y): #Run Log Reg
child_study_data.head(n = 16) # In[3]: child_study_data.dtypes # The main variable of interest is a measure of auditory processing, located in the column `M100LatCorr`. There are three groups (`Case`) for the comparison we are interested in. Let's first examine the distribution of values across groups. # In[4]: get_ipython().magic(u'matplotlib inline') import matplotlib.pyplot as plt from IPython.core.pylabtools import figsize sns.set_palette("deep", desat = 0.6) # taken from examples on Seaborn documentation page # In[5]: control_data = [] deletion_data = [] duplication_data = [] for idx in range(len(child_study_data.Case)): if child_study_data.ix[idx, 'Case'] == 'control': control_data.append(child_study_data.ix[idx, 'M100LatCorr']) elif child_study_data.ix[idx, 'Case'] == 'deletion': deletion_data.append(child_study_data.ix[idx, 'M100LatCorr']) elif child_study_data.ix[idx, 'Case'] == 'duplication': duplication_data.append(child_study_data.ix[idx, 'M100LatCorr'])
# # HISTOGRAMS import numpy import pandas from matplotlib import pyplot import seaborn as sns # recommended standard import pylab sns.set_palette("deep", desat=.6) sns.set_context(rc={"figure.figsize": (8, 4)}) titanic = pandas.read_csv("titanic.csv") # print titanic.head() server = pandas.read_csv("serverdata.csv") # print server.head() mlb = pandas.read_csv("mlbsalaries.csv") # print mlb.head() # # 1. histogram # # Titanic_Age_pyplot-hist_noNAs # pyplot.hist(titanic.Age.dropna()) # drop blank values in age; error if blank # pylab.show() # # Titanic_Age_pyplot-hist_noNAs-bins20 # pyplot.hist(titanic.Age.dropna(),bins=20); pylab.show() # # Titanic_Age_distribplot-Hist_KDE # sns.distplot(titanic.Age.dropna()); pylab.show() # # Server_Time_hist-bins25_color
mean_duration.append(nanmean(t50_at_v0)) df = pd.DataFrame({ '-75': t50_per_v0[0], '-70': t50_per_v0[1], '-65': t50_per_v0[2], '-60': t50_per_v0[3], '-55': t50_per_v0[4], '-50': t50_per_v0[5], '-45': t50_per_v0[6], '-40': t50_per_v0[7] }) sns.boxplot(data=df, color='gray', ax=ax5) sns.set_palette(sns.color_palette(cols)) sns.swarmplot(data=df, ax=ax5) ax5.set_ylabel('$t_{50}$ (ms)') ax5.set_xlabel('$V_0$ (mV)') ax5.set_ylim(0, 2) ax5.legend(frameon=False, fontsize=8) ax5.annotate("E", xy=(0, 1.1), xycoords="axes fraction", xytext=(5, -5), textcoords="offset points", ha="left", va="top", fontsize=12, weight='bold')
__author__ = 'Thurston' import numpy as np import seaborn as sns import pandas as pd import pickle from scipy.spatial.distance import pdist, cdist, squareform from pyDOE import lhs from scipy.misc import logsumexp pal = sns.color_palette('Dark2', n_colors=2, desat=.6) sns.set_palette(pal) sns.set_context(context='paper', font_scale=1.5) sns.set_style('ticks') from sklearn.preprocessing import MinMaxScaler def cuml_like(arr1, arr2): arr1 = MinMaxScaler().fit_transform(arr1.astype(float).reshape(-1, 1)) arr2 = MinMaxScaler().fit_transform(arr2.astype(float).reshape(-1, 1)) if arr1.size != arr2.size: raise Exception('must be equal-sized arrays arr1 and arr2') new = np.zeros_like(arr1) for n in range(new.size): new[n] = arr1[:n + 1].sum() + arr2[n + 1:].sum() return new def get_Ls(lam, iters): inits = np.arange(1, iters)
def stackedbars(dflist, repreCols, groupCol, graphTitle, save=True, **kwargs): r"""Function to represent stacked bar graph normaliced to 100%. Data from multiple datasets with the same columns. A column will be used to aggregate the rows in multiple groups. Parameters ---------- dflist : list of pandas DataFrames list of dataframes containing the data, must all have the same columns. repreCols : list of strings list of string with the dataframe columns to represent. groupCol : string column of the dataframe to use graphTitle : string title of the graph and filename for saving. save : boolean, optional If True, the figure is saved into a file. Returns ------- fig : matplotlib figure object. figure that stores the generated graph. Other parameters ---------------- yLabel : string, optional. label for the y axis. yTicksNum : integer, optional Number of ticks in the y axis. width : float, optional. width betwwen 0 and 1 of the bars. colorDict : dict, optional. dictionary with label:color being labels, groupCol column elements. legendLoc : string, optional, default "best" Legend location, accepts standart legend locations for matplotlib, plus "out top right","out center right", "out lower right" for placing the legend out of the plotting area and "none" for hidding the legend. inverse_order : boolean, optional. inverse the order of the bars. xSize : float, optional. x size of the figure in inches. ySize : float, optional. y size of the figure in inches. palette : seaborn compatible palette, Can be the name of a seaborn palette, a seaborn palette or a list interpretable as a palette by seaborn. fileName : string, optional File name to save, if not provided graphHeader will be used. Non alphanumeric values will be deleted. Examples -------- >>> df_test1 = pd.DataFrame.from_dict( ... {"type":["S1", "S2", "S1", "S2", "S1", "S1", "S1", "S1"], ... "var0":[1.2, 1.1, 1.3, 1, 1.2, 1.1, 1.2, 1.3], ... "var1":[4, 5, 8, 3, 5, 2, 7, 3], ... "var2":[0.13, 0.12, 0.13,0.12, 0.12, 0.13, 0.13, 0.12], ... "var3":[0.16, 0.15, 0.16, 0.15, 0.15, 0.14, 0.16, 0.16]}) >>> df_test2 = pd.DataFrame.from_dict( ... {"type":["S1new", "S2new", "S1new", "S2new", ... "S1new", "S1new", "S1new", "S1new"], ... "var0":[2.2, 1.1, 1.3, 2, 1.2, 2.1, 1.2, 2.3], ... "var1":[4, 5, 6, 3, 5, 6, 7, 6], ... "var2":[0.13, 0.16, 0.63,0.12, 0.12, 0.6, 0.13, 0.6], ... "var3":[1.16, 0.16, 0.16, 0.15, 0.15, 1.14, 0.16, 0.16]}) >>> dflist = [df_test1, df_test2] >>> stackedbars(dflist, ... repreCols=['var0', 'var1', 'var2', 'var3'], ... groupCol="type", ... graphTitle="metric by type", ... save = True, ... normalize=True, ... yLabel= "Test label y", ... colorDict = {"S1":"darkgreen", "S1new": "green", ... "S2":"darkblue", "S2new": "blue"}, ... inverse_order=False, ... legendLoc="out top right", ... fileName="stacked Bars Example") """ """Version 1.0""" class functionError(Exception): """Custom error class defined to differentiate our errors from the standard errors""" def __init__(self, *args): if args: self.message = args[0] else: self.message = None def __str__(self): if self.message: return 'functionError, {0} '.format(self.message) else: return 'functionError, undefined function error' ############kwargs####################### normalize = kwargs.get("normalize", True) yLabel = kwargs.get("yLabel", None) yTicksNum = kwargs.get("yTicksNum", 5) width = kwargs.get("width", None) colorDict = kwargs.get("colorDict", None) legendLoc = kwargs.get("legendLoc", "best") inverse_order = kwargs.get("inverse_order", True) xSize = kwargs.get("xSize", 6) ySize = kwargs.get("ySize", 4) palette = kwargs.get("palette", "deep") fileName = kwargs.get("fileName", graphTitle) #########################Params Validation######################### #Check for type match and particularities in every param #not needed in python but helps comunicating errors to the user for df in dflist: if not isinstance(df, pd.DataFrame): raise functionError("an element of dflist is" " not a pandas dataframe") for dfCount, df in enumerate(dflist): for col in repreCols: if not col in df.columns.values.tolist(): raise functionError( "repreCols item " "{} is not a dflist[{}]" " column header name.".format(col, dfCount)) if not groupCol in df.columns.values.tolist(): raise functionError("groupCol is not a dflist[{}]" " column header name".format(dfCount)) if not isinstance(save, bool): raise functionError("save is not a boolean") if not yLabel is None: if not isinstance(yLabel, str): raise functionError("yLabel is not a string") if not isinstance(yTicksNum, int): raise functionError("yTicksNum is not an integer") if not width is None: if not isinstance(width, (float, int)): raise functionError("width is not a float") #possible legendLoc values. posLeyeLocValues=['best', 0, 'upper right', 1, 'upper left', 2, 'lower left', 3, 'lower right', 4,'right', 5, 'center left', 6, 'center right', 7, 'lower center', 8, 'upper center', 9, 'center', 10, "none", "out top right", "out center right", "out lower right"] if legendLoc: if not legendLoc in posLeyeLocValues: raise functionError("legendLoc is not a valid value.") if not isinstance(inverse_order, bool): raise functionError("inverse_order is not a boolean") if not isinstance(xSize, (int, float)): raise functionError("xSize is not a float or a integer.") if not isinstance(ySize, (int, float)): raise functionError("ySize is not a float or a integer.") if not isinstance(fileName, str): try: fileName = str(fileName) except: raise functionError("fileName is not a string.") ################initial settings############### sns.set_palette(palette) cols = repreCols + [groupCol,] if isinstance(dflist, pd.DataFrame): dflist = [dflist, ] if width is None: width = 1/(len(dflist) + 1) #group by groupCol values for dfCount, df in enumerate(dflist): df1 = df[cols].groupby(groupCol).sum().reset_index().set_index(groupCol) df1 = df1.sort_index(ascending=inverse_order) dflist[dfCount] = df1 #normalize to percentaje of column sum if normalize is True: for df in dflist: for col in repreCols: suma = df[col].sum() for index in df.index: if suma == 0: df.loc[index, col] = 0 else: df.loc[index, col] = (df.loc[index, col] / suma)*100 x = np.arange(len(repreCols)) # the label locations #create list with the decalage of the locations of the bars. if len(dflist) == 1: decalist = [0,] else: decalist = [] decal0 = - width*len(dflist)/2 + width/2 for dfcount, _ in enumerate(dflist): decalist.append(decal0 + width*dfcount) #figure creation fig, ax = plt.subplots(figsize=(xSize, ySize)) for dfCount, df in enumerate(dflist): bar=[] for idxCount, idx in enumerate(df.index): if not colorDict is None: color = colorDict[idx] else: color = None if idxCount == 0: bar.append(ax.bar(x=(x + decalist[dfCount]), height=df.loc[idx], width=width, label=idx, color=color)) left = df.loc[idx] else: bar.append(ax.bar(x=(x + decalist[dfCount]), height=df.loc[idx], width=width, label=idx, bottom=left, color=color)) left = [x + y for x, y in zip(left, list(df.loc[idx]))] if normalize is True: ax.yaxis.set_major_formatter(mtick.PercentFormatter()) ax.set_ylim(0, 100) ax.yaxis.set_major_locator(plt.MaxNLocator(yTicksNum)) ax.set_ylabel(yLabel) ax.set_title(graphTitle) ax.set_xticks(x) ax.set_xticklabels(repreCols) #######Legend creation############################################# #list with the standart valid matplotlib legend values stdLeyeLocList = ['best', 0, 'upper right', 1, 'upper left', 2, 'lower left', 3, 'lower right', 4, 'right', 5, 'center left', 6, 'center right', 7, 'lower center', 8, 'upper center', 9, 'center', 10] # put legend as usual if is a standart name, # nothing if "none", custom if others if legendLoc in stdLeyeLocList: ax.legend(loc=legendLoc) elif legendLoc == "out top right": ax.legend(loc='upper left', bbox_to_anchor=(1, 1)) elif legendLoc == "out center right": ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) elif legendLoc == "out lower right": ax.legend(loc='lower left', bbox_to_anchor=(1, 0)) elif legendLoc == "none": pass ############Plot saving############################################ if save is True: #Path to save in, path of the caller of the function fullpath = os.path.abspath(sys.modules['__main__'].__file__) path, callerfilename = os.path.split(fullpath) # To create the file name, remove non alphanumeric chars filename = re.sub(r'\W+', '', fileName.title()) plt.savefig(pathlib.Path(path,filename).with_suffix(".png"), bbox_inches='tight',dpi=600) plt.show() return fig
from collections import defaultdict from textwrap import wrap from torch import nn, optim from torch.utils.data import Dataset, DataLoader import torch.nn.functional as F import os os.environ["CUDA_VISIBLE_DEVICES"] = "1, 2, 3" BATCH_SIZE = 8 # plot setting sns.set(style='whitegrid', palette='muted', font_scale=1.2) HAPPY_COLORS_PALETTE = [ "#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF" ] sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE)) rcParams['figure.figsize'] = 12, 8 # random seed RANDOM_SEED = 42 np.random.seed(RANDOM_SEED) torch.manual_seed(RANDOM_SEED) # device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load data # df = pd.read_csv("data/test.csv") print("Loading Data......") df = pd.read_csv("../data/arxiv.cs.ai_2007-2017.csv") class_names = ['reject', 'accept']
def plot_fit(data, model): p = FormatParams(model.params, data) #FIXME sns.set_palette("muted") palette = sns.color_palette("muted", data.nvisit) #ind = model.phase > 0.5 #model.phase[ind] -= 1. #calculate a range of times at higher resolution to make model look nice phase_hr = np.linspace(model.phase.min() - 0.05, model.phase.max() + 0.05, 1000) t_hr = phase_hr * p.per[0] + p.t0[0] #plot data plt.subplot(211) #plot best fit model from first visit #plt.plot(phase_hr, calc_astro(t_hr, model.params, data, model.myfuncs, 0)) idx = data.vis_num == 0 plt.plot(model.phase[idx], calc_astro(idx, model.params, data, model.myfuncs, 0)) colors = ['blue', 'red'] #plot systematics removed data for i in range(data.nvisit): ind = data.vis_num == i plt.plot(model.phase[ind], model.data_nosys[ind], color=colors[i], marker='o', markersize=3, linestyle="none") #add labels/set axes #xlo, xhi = np.min(model.phase)*0.9, np.max(model.phase)*1.1 xlo, xhi = 0.35, 0.65 plt.xlim(xlo, xhi) plt.ylabel("Relative Flux") #annotate plot with fit diagnostics ax = plt.gca() ax.text(0.85, 0.29, '$\chi^2_\\nu$: ' + '{0:0.2f}'.format(model.chi2red) + '\n' + 'obs. rms: ' + '{0:0d}'.format(int(model.rms)) + '\n' + 'exp. rms: ' + '{0:0d}'.format(int(model.rms_predicted)), verticalalignment='top', horizontalalignment='left', transform=ax.transAxes, fontsize=12) #plot fit residuals plt.subplot(212) plt.axhline(0, zorder=1, color='0.2', linestyle='dashed') for i in range(data.nvisit): ind = data.vis_num == i plt.plot(model.phase[ind], 1.0e6 * model.norm_resid[ind], color=colors[i], marker='o', markersize=3, linestyle="none") #add labels/set axes plt.xlim(xlo, xhi) plt.ylabel("Residuals (ppm)") plt.xlabel("Orbital phase") plt.show()
import matplotlib.pyplot as plt import seaborn as sns df = sns.load_dataset("tips") sns.scatterplot(x=df["total_bill"], y=df["tip"]) fig = plt.figure(figsize=(36, 27)) with sns.axes_style("whitegrid"): fig.add_subplot() sns.set_palette("random_thing") sns.color_palette("Accent") sns.set_theme(context="talk", style="ticks", palette="Accent") plt.figure(figsize=(10, 6)) ax: sns.categorical = sns.violinplot(x="day", y="total_bill", data=df) ax.set_title("Bill Amount by Day")
import numpy as np import matplotlib.pyplot as plt import seaborn as sns import os cwd = os.getcwd() sns.set() sns.set_style("white") from matplotlib import rcParams rcParams['font.family'] = 'sans-serif' rcParams['font.sans-serif'] = ['Arial'] sns.set_palette("GnBu_d") SAVEDIR = "../plots/stoner" if not os.path.exists(SAVEDIR): os.makedirs(SAVEDIR) # Density of states for a 1D tight binding model def DOS(E): return 1 / (np.pi * np.sqrt(4 - E**2)) dE = 0.001 Emax = 1.9 Es = np.arange(-Emax, Emax + dE, dE) Efermi = 1 deltaE = 0.1 fig1, ax1 = plt.subplots()
def check_hourly(b, measure_type, status): conn = uo.connect('interval_ion') euas_dict = {'electric': 'Electricity_(KWH)', 'gas': 'Gas_(Cubic_Ft)'} ion_dict = {'electric': 'Electric_(KWH)', 'gas': 'Gas_(CubicFeet)'} with conn: if status == 'raw': df1 = pd.read_sql( 'SELECT * FROM {1} WHERE Building_Number = \'{0}\''.format( b, measure_type), conn) else: df1 = pd.read_sql( 'SELECT * FROM {1}_outlier_tag WHERE Building_Number = \'{0}\' AND outlier == \'0\'' .format(b, measure_type), conn) conn.close() if len(df1) == 0: print 'building {b} not in db ...' df1['Date'] = pd.DatetimeIndex(pd.to_datetime(df1['Timestamp'])) df1.set_index(df1['Date'], inplace=True) df1_re = df1.resample('M', 'sum') df1_re['month'] = df1_re.index.month df1_re['year'] = df1_re.index.year df1_re.reset_index(inplace=True) conn = uo.connect('all') with conn: df2 = pd.read_sql( 'SELECT Building_Number, year, month, [{1}] FROM EUAS_monthly WHERE Building_Number = \'{0}\' AND year != 2016.0' .format(b, euas_dict[measure_type]), conn) if len(df1) == 0 or len(df2) == 0: return df_all = pd.merge(df1_re, df2, on=['year', 'month'], how='left') df_all.set_index(pd.DatetimeIndex(pd.to_datetime(df_all['Date'])), inplace=True) df_all.drop('Date', axis=1, inplace=True) df_all.rename(columns={ ion_dict[measure_type]: 'ION', euas_dict[measure_type]: 'EUAS' }, inplace=True) df_inn = pd.merge(df1_re, df2, on=['year', 'month'], how='inner') df_inn.set_index(pd.DatetimeIndex(pd.to_datetime(df_inn['Date'])), inplace=True) df_inn.drop('Date', axis=1, inplace=True) df_inn.rename(columns={ ion_dict[measure_type]: 'ION', euas_dict[measure_type]: 'EUAS' }, inplace=True) df_inn[b] = df_inn['ION'] / df_inn['EUAS'] # df_inn.to_csv(homedir + 'temp/{0}_{1}_ion_euas.csv'.format(b, measure_type)) # temp check the data dsc = df_inn[[b]].describe().transpose() dsc['overall'] = df_inn['ION'].sum() / df_inn['EUAS'].sum() sns.set_context("talk", font_scale=1.0) sns.set_palette(sns.color_palette('Set2')) line1, = plt.plot(df_inn.index, df_inn['ION'], '-o') line2, = plt.plot(df_inn.index, df_inn['EUAS'], '-o') plt.legend([line1, line2], ['ION', 'EUAS'], loc='center left', bbox_to_anchor=(1, 0.5)) plt.title('{0} {1} ION vs EUAS monthly'.format(b, measure_type), fontsize=30) if measure_type == 'electric': plt.ylabel('KWH') else: plt.ylabel('Cubic Feet') # plt.show() # plt.xlim((datetime(2013, 9, 1), datetime(2016, 1, 1))) path = os.getcwd( ) + '/input/FY/interval/ion_0627/cmp_euas/{0}_{1}.png'.format( b, measure_type) P.savefig(path, dpi=my_dpi, figsize=(2000 / my_dpi, 500 / my_dpi), bbox_inches='tight') plt.close() return dsc
import seaborn as sns import pandas as pd import numpy as np from pathlib import Path from matplotlib import pyplot as plt import matplotlib as mpl parser = argparse.ArgumentParser(description='Plot statistics.') parser.add_argument('input', nargs='+', type=Path, help='Input files with statistics.') options = parser.parse_args() plt.figure(figsize=(25, 15)) sns.set_palette("deep") sns.set(style="ticks") concat = pd.DataFrame() for result_file in options.input: app = str(result_file).split('/')[-1] print(app) with open(result_file) as f: statistics = json.load(f) df = pd.DataFrame(statistics).T df['time'] = df.index.map(lambda x: int(x.split('/')[-1].split('_')[-2])) df['strategy'] = df.index.map( lambda x: '_'.join(x.split('/')[-1].split('_')[:-2])) df = df[df['strategy'].isin([ 'events_count', 'possible_events', 'reverse_possible_events',
from sklearn.metrics.pairwise import pairwise_distances from sklearn.manifold.t_sne import (_joint_probabilities, _kl_divergence) # Random state. RS = 20150101 # We'll use matplotlib for graphics. import matplotlib.pyplot as plt import matplotlib.patheffects as PathEffects import matplotlib # %matplotlib inline # We import seaborn to make nice plots. import seaborn as sns import os sns.set_style('darkgrid') sns.set_palette('muted') sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5}) # We'll generate an animation with matplotlib and moviepy. from moviepy.video.io.bindings import mplfig_to_npimage import moviepy.editor as mpy def load_data(file_name): # digits = load_digits() # digits.data.shape # 1797 x 64 # print(digits.data.shape) # print(digits['DESCR']) # return digits features = np.load(file_name, allow_pickle='TRUE').item() return features
__version__ = "1.0.0" __email__ = "*****@*****.**" __status__ = "Development" import json import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import matplotlib.cbook as cbook import numpy as np import os import seaborn as sns import sys sns.set_palette('dark') plt.style.use('seaborn-white') def makedirs(summary_path, year): if not os.path.exists('{}/dist'.format(summary_path)): os.makedirs('{}/dist'.format(summary_path)) if not os.path.exists('{}/stats'.format(summary_path)): os.makedirs('{}/stats'.format(summary_path)) if not os.path.exists('{}/dist/{}'.format(summary_path, year)): os.makedirs('{}/dist/{}'.format(summary_path, year)) if not os.path.exists('{}/stats/{}'.format(summary_path, year)): os.makedirs('{}/stats/{}'.format(summary_path, year))
import argparse from scipy import stats import numpy as np import pandas as pd from cometml_api import api import matplotlib.pyplot as plt import seaborn as sns import os import ipdb sns.set() sns.set_palette("colorblind") def plot_from_comet(experiment_id='98Hhyb58cThYVpxaOvbL3Yu8S', metric_name='trH1Cte', metric_name_x=None, plot_name=''): os.makedirs("plots/" + plot_name, exist_ok=True) #experiment = Experiment(api_key='98Hhyb58cThYVpxaOvbL3Yu8S', project_name='trhic', workspace='valthom') experiments = api.get_experiments(experiment_id) metric_names = api.get_metrics(experiments[0]["experiment_key"]).keys() metricsx = [] metricsy = [] #metric_namex = 'trace/H1Ctr' metric_namex = 'trace/H1Cte' metric_namey = 'loss/te' #metric_namey = 'loss/te'
#Advanced Time Series Plots in Python #----------------------------- #%http://www.blackarbs.com/blog/advanced-time-series-plots-in-python/1/6/2017 import pandas as pd import pandas_datareader.data as web import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt %matplotlib inline import seaborn as sns sns.set_style('white', {"xtick.major.size": 2, "ytick.major.size": 2}) flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71","#f4cae4"] sns.set_palette(sns.color_palette(flatui,7)) import missingno as msno p=print save_loc = 'e:/pYwork/output/' logo_loc = 'e:/pYwork/output/' #----------- #Get time series data from Yahoo finance and recession data from FRED. f1 = '8d53c7024b4413ae3180a39ab9437931' # recession data from FRED #https://research.stlouisfed.org/docs/api/api_key.html%5d(http:/api.stlouisfed.org/api_key.html start = pd.to_datetime('1999-01-01') end = pd.datetime.today() mkt = '^GSPC' # month start b/c FED data is month start
def plot_per_arbor_result(analysis_results, morphology, options, figure_name=None, figure_title=None, figure_xlabel=None, add_percentage=False): """Plot the analysis result per arbor. :param analysis_results: A data structure containing the result. :param morphology: A given morphology file. :param options: System options. :param figure_name: The prefix of the figure image. :param figure_title: The title that will be written on the figure. :param figure_xlabel: The X-axis label of the figure. :param add_percentage: If this flag is True, a percentage text will be added on the right side of each bar. """ # Verify the presence of the plotting packages nmv.utilities.verify_plotting_packages() # Plotting imports import numpy import seaborn import matplotlib matplotlib.use('agg') # To resolve the tkinter issue import matplotlib.pyplot as pyplot from matplotlib import font_manager # Clean the figure pyplot.clf() # X-axis data x_data = list() # Y-axis data y_data = list() # Color palette palette = [] # Apical dendrite if analysis_results.apical_dendrites_result is not None: for i, result in enumerate(analysis_results.apical_dendrites_result): x_data.append(morphology.apical_dendrites[i].label) y_data.append(result) palette.append(morphology.apical_dendrites_colors[i]) # Basal dendrites if analysis_results.basal_dendrites_result is not None: for i, result in enumerate(analysis_results.basal_dendrites_result): x_data.append(morphology.basal_dendrites[i].label) y_data.append(result) palette.append(morphology.basal_dendrites_colors[i]) # Collecting the lists, Axon if analysis_results.axons_result is not None: for i, result in enumerate(analysis_results.axons_result): x_data.append(morphology.axons[i].label) y_data.append(result) palette.append(morphology.axons_colors[i]) # Total number of bars, similar to arbors total_number_of_bars = len(x_data) # The width of each bar bar_width = 0.65 # Adjust seaborn configuration seaborn.set_style("white") # The color palette seaborn.set_palette(palette=palette) # Adjusting the matplotlib parameters pyplot.rcParams['axes.grid'] = 'False' pyplot.rcParams['font.family'] = 'NimbusSanL' pyplot.rcParams['axes.linewidth'] = 0.0 pyplot.rcParams['axes.labelsize'] = bar_width * 10 pyplot.rcParams['axes.labelweight'] = 'regular' pyplot.rcParams['xtick.labelsize'] = bar_width * 10 pyplot.rcParams['ytick.labelsize'] = bar_width * 10 pyplot.rcParams['legend.fontsize'] = 10 pyplot.rcParams['axes.titlesize'] = bar_width * 1.25 * 10 pyplot.rcParams['axes.axisbelow'] = True pyplot.rcParams['axes.edgecolor'] = '0.1' # numpy array from the lists x = numpy.asarray(x_data) y = numpy.asarray(y_data) # Adjusting the figure size pyplot.figure(figsize=(bar_width * 4, total_number_of_bars * 0.5 * bar_width)) # Plot the bar plot ax = seaborn.barplot(x=y, y=x, edgecolor='none') # Title ax.set(xlabel=figure_xlabel, title=figure_title) ax.spines['left'].set_linewidth(0.5) ax.spines['left'].set_color('black') # Add percentage on the right side of the bar for bar in ax.patches: # Current Y center y = bar.get_y() # Current bar height height = bar.get_height() # Current center centre = y + height / 2.0 # Set the new center bar.set_y(centre - bar_width / 2.0) # Set the new height bar.set_height(bar_width) # Create a list to collect the plt.patches data totals = [] # Find the values and append to list for i in ax.patches: totals.append(i.get_width()) # Set individual bar labels using above list total = sum(totals) # Set individual bar labels using above list for i, patch in enumerate(ax.patches): # Get the width of the bar and then add a little increment x = patch.get_width() y = patch.get_y() + (bar_width / 2.0) + (bar_width / 8.0) # Compute the percentage if total > 0: percentage = round((patch.get_width() / total) * 100, 2) if add_percentage: if 'float' in str(type(y_data[i])): value = ' %2.1f (%2.1f%%)' % (y_data[i], percentage) else: value = ' %d (%2.1f%%)' % (y_data[i], percentage) else: if 'float' in str(type(y_data[i])): value = ' %2.1f' % y_data[i] else: value = ' %d' % y_data[i] ax.text(x, y, value, fontsize=bar_width * 10, color='dimgrey') # Save a PNG figure pyplot.savefig( '%s/%s/%s.png' % (options.io.analysis_directory, morphology.label, figure_name), bbox_inches='tight', transparent=True, dpi=600) # Save a PDF figure pyplot.savefig( '%s/%s/%s.pdf' % (options.io.analysis_directory, morphology.label, figure_name), bbox_inches='tight', transparent=True, dpi=600) # Close the figures pyplot.close()