def plot_convergence_iterations(stoch, batch, models, nb_documents, batch_size): nb_mini_batches = len(gen_batches(nb_documents, batch_size)) mini_batch_ll = [iter for epoch in stoch for iter in epoch[:-1]] epoch_ll = [epoch[-1:][0] for epoch in stoch] mini_batch_iter = [ (idx + 1) + np.arange(5, nb_mini_batches, 5) / nb_mini_batches for idx, epoch in enumerate(stoch) ] flat_mini_batch_iter = [ iter for epoch in mini_batch_iter for iter in epoch ] fig = plt.figure() ax = fig.add_subplot(111) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) plt.plot(flat_mini_batch_iter, mini_batch_ll, lw=1, marker='+') plt.plot(np.arange(1, len(batch) + 1) + 1, epoch_ll, marker='o', linestyle='None', markersize=4) plt.plot(np.arange(1, len(batch) + 1) + 1, batch, lw=1, marker='*', markerfacecolor='None') plt.legend(models) plt.xlabel("number of passes") plt.ylabel("heldout log-likelihood per word") plt.show()
def main(): try: #read input file with words fileName = input("Enter filename with words: ") inputFile = open(fileName, "r", encoding='ascii') words = inputFile.read().strip().split() #read file with excluded words excludeFileName = input("Enter exclude filename with words: ") excludeFile = open(excludeFileName, "r") excludeWords = excludeFile.read().strip().split() #build hashmap with input word list wordsMap = buildHashmap(words) #delete excluded words from hashmap for key in excludeWords: delete(wordsMap, key.lower()) #get top 10 list top10 = sorted(wordsMap.items(), key=operator.itemgetter(1), reverse=True)[:10] #prepare output table with Word and Count column df = pd.DataFrame(top10, columns=['Word', 'Count']) #find total count which is required to calculate % total = 0 c3 = [] for key in wordsMap: total = total + wordsMap[key] #create % column c3 = get_column(top10, 1, total) #add % column to existing table df['Count %'] = c3 #print final result as a table print(df) #draw bar chart ax = plt.figure().gca() #to show wholenumbers on the y-axis ax.yaxis.set_major_locator(MaxNLocator(integer=True)) plt.bar(range(len(top10)), [val[1] for val in top10], align='center', color='gray') plt.xticks(range(len(top10)), [val[0] for val in top10]) plt.xticks(rotation=90) plt.xlabel('Words') plt.ylabel('Frequency') plt.title('Top 10 Words') plt.show() finally: inputFile.close() excludeFile.close()
def plot_corr(draw, vars=(0, 1)): from pylab import axes, setp, MaxNLocator _, _ = vars # Make sure vars is length 2 labels = [draw.labels[v] for v in vars] values = [draw.points[:, v] for v in vars] # Form kernel density estimates of the parameters xmin, xmax = min(values[0]), max(values[0]) density_x = KDE1D(values[0]) x = linspace(xmin, xmax, 100) px = density_x(x) density_y = KDE1D(values[1]) ymin, ymax = min(values[1]), max(values[1]) y = linspace(ymin, ymax, 100) py = density_y(y) nbins = 50 ax_data = axes([0.1, 0.1, 0.63, 0.63]) # x,y,w,h #density_xy = KDE2D(values[vars]) #dxy = density_xy(x,y)*points.shape[0] #ax_data.pcolorfast(x,y,dxy,cmap=cm.gist_earth_r) #@UndefinedVariable ax_data.plot(values[0], values[1], 'k.', markersize=1) ax_data.set_xlabel(labels[0]) ax_data.set_ylabel(labels[1]) ax_hist_x = axes([0.1, 0.75, 0.63, 0.2], sharex=ax_data) ax_hist_x.hist(values[0], nbins, orientation='vertical', normed=1) ax_hist_x.plot(x, px, 'k-') ax_hist_x.yaxis.set_major_locator(MaxNLocator(4, prune="both")) setp( ax_hist_x.get_xticklabels(), visible=False, ) ax_hist_y = axes([0.75, 0.1, 0.2, 0.63], sharey=ax_data) ax_hist_y.hist(values[1], nbins, orientation='horizontal', normed=1) ax_hist_y.plot(py, y, 'k-') ax_hist_y.xaxis.set_major_locator(MaxNLocator(4, prune="both")) setp(ax_hist_y.get_yticklabels(), visible=False)
def plot_convergence_epochs(nb_iterations, inspectors, models): markers = ['o', 's'] iterations = np.arange(nb_iterations) + 1 fig = plt.figure() for insp, marker in zip(inspectors, markers): ax = fig.add_subplot(111) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.plot(iterations, insp, lw=1, marker=marker, markerfacecolor='None') plt.xlabel("number of passes") plt.ylabel("heldout log-likelihood per word") plt.legend(models) plt.show()
def create_end_graphs(acc, val_acc, loss, val_loss): plt.figure(figsize=(10, 4)) sp = plt.subplot(1, 2, 1) # noinspection PyUnresolvedReferences sp.yaxis.set_major_formatter(mlp.ticker.StrMethodFormatter('{x}%')) sp.xaxis.set_major_locator(MaxNLocator(integer=True)) plt.title("Accuracy") plt.xlabel("Epoch") plt.plot(acc, 'b-', label='training') plt.plot(val_acc, 'g-', label='test') plt.legend(loc='lower right') sp = plt.subplot(1, 2, 2) plt.title("Loss") plt.xlabel("Epoch") sp.xaxis.set_major_locator(MaxNLocator(integer=True)) plt.plot(loss, 'b-', label='training') plt.plot(val_loss, 'g-', label='test') plt.legend(loc='upper right') plt.show()
def create_random_bar_pot(): # parameters for random plot data N = randint(2, 2) x0 = randint(1950, 1995) y_mu = gauss(6, 3) y_sd = expovariate(0.7) # parameterd for random plot style plotwidth = 4 * 0.5 plotheight = 3 * 0.5 barwidth_sd = plotwidth*2/N + gauss(0, 0.05) # random data X = np.linspace(start=x0, stop=x0+N-1, num=N).astype(int) Y = [gauss(y_mu, y_sd) for x in range(N)] fig = plt.figure(figsize=(plotwidth, plotheight), facecolor=bgcolor) ax = plt.subplot(1,1,1, facecolor=bgcolor) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_visible(False) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) ax.tick_params(colors=labelcolor) ax.yaxis.tick_right() ax.yaxis.set_label_position("right") ax.yaxis.set_ticks_position('none') ax.set_axisbelow(True) ax.yaxis.grid(color=linecolor, linestyle=':', linewidth=1) ax.bar(X, Y, width=0.5, facecolor=fill, edgecolor=fill) return fig
def plot(self, plot_num, data, xlabel, ylabel, txt_position=None): """Create histogram for statistic.""" self.axis = self.fig.add_subplot(self.rows, self.cols, plot_num) weights = np_ones_like(data) / float(len(data)) counts, bins, patches = self.axis.hist(data, bins=min( 20, len(set(data)) - 1), rwidth=0.9, weights=weights, color='#fdae6b', align='mid') self.axis.set_xlabel(xlabel) self.axis.set_ylabel(ylabel) self.axis.xaxis.set_major_locator(MaxNLocator(integer=True)) self.axis.yaxis.set_major_formatter( FuncFormatter(lambda y, _: '{:.1%}'.format(y))) # report summary statistics stat_txt = f'median = {np_median(data):.1f}\n' stat_txt += f'mean = {np_mean(data):.1f}\n' stat_txt += f'std = {np_std(data):.1f}' if txt_position == 'left': self.axis.text(0.05, 0.95, stat_txt, transform=self.axis.transAxes, fontsize=self.options.tick_font_size, verticalalignment='top') elif txt_position == 'right': self.axis.text(0.95, 0.95, stat_txt, transform=self.axis.transAxes, fontsize=self.options.tick_font_size, verticalalignment='top', horizontalalignment='right') self.prettify(self.axis) for loc, spine in self.axis.spines.items(): if loc in ['right', 'top']: spine.set_color('none') self.fig.tight_layout(pad=0.1, w_pad=1.0, h_pad=1.0) self.draw()
(len(all_nodes), t)) infection_times_per_k.append(t) net_increases_per_k.append(net_increase_per_round) if t > longest_process_len: longest_process_len = t k_avgInfecTime_map[k] = round( (sum(infection_times_per_k) / len(infection_times_per_k))) k_netIncrease_map[k] = net_increases_per_k k += 1 fig1 = plt.figure(fig_idx, figsize=(9, 7.2)) plt.plot(k_avgInfecTime_map.keys(), k_avgInfecTime_map.values(), 'b--', label='average infection time') #plt.plot(k_avgInfecTime_map.keys(), [math.log2(n)**2/ math.log2(i)**2 for i in k_avgInfecTime_map.keys()], 'r-', label='$\log^2\ n\ /\ \log^2\ k$') plt.yscale('log') plt.yticks(y_ticks, y_ticks_labels) xa = plt.gca().get_xaxis() xa.set_major_locator(MaxNLocator(integer=True)) plt.xlabel('Branching factor (k)') plt.ylabel('Average infection time (in rounds)\nlogarithmic scale') plt.title('k-BIPS on hypercube graph with degree %d and %d nodes' % (d, n)) plt.grid(True) #plt.legend(loc='best') fig1.savefig('bips_hypercube_fixed-d_n' + str(len(all_nodes)) + '.png', bbox_inches='tight') plt.close(fig1)
def main(pdf, db, grp, s_data): """ Generate page of the group report pdf that contains: - bar plots of the number of acquired volumes for each subject - violin plots for outlier distributions - violin plots for absolute and relative motion - violin plots for CNR and SNR Arguments: - pdf: qc pdf file - db: dictionary database - grp: optional grouping variable - s_data: single subject dictionary to update pdf """ #================================================ # Prepare figure #================================================ plt.figure(figsize=(8.27, 11.69)) # Standard portrait A4 sizes plt.suptitle("SQUAD: Group report", fontsize=10, fontweight='bold') # Groups and acquired volumes if grp is not False: ax1_00 = plt.subplot2grid((3, 4), (0, 0), colspan=1) g = seaborn.distplot( grp[grp.dtype.names[0]][1:], vertical=True, bins=np.arange(-1.5 + round(min(grp[grp.dtype.names[0]][1:])), 1.5 + round(max(grp[grp.dtype.names[0]][1:]))), norm_hist=False, kde=False, ax=ax1_00) ax1_00.set_ylabel(grp.dtype.names[0]) ax1_00.set_xlabel("N") # ax1_00.set_xlim([-1+round(min(grp[grp.dtype.names[0]][1:])),1+round(max(grp[grp.dtype.names[0]][1:]))]) # ax1_00.set_xticks(np.unique(np.round(grp[grp.dtype.names[0]]))) ax1_00.xaxis.set_major_locator(MaxNLocator(integer=True)) ax1_00.set_xticks([0, np.max(ax1_00.get_xticks())]) ax1_01 = plt.subplot2grid((3, 4), (0, 1), colspan=3) else: ax1_01 = plt.subplot2grid((3, 4), (0, 0), colspan=4) seaborn.barplot(x=np.arange(1, 1 + db['data_no_subjects']), y=np.sum(db['data_protocol'], axis=1), color='blue', ax=ax1_01) n_vols, counts = np.unique(np.sum(db['data_protocol'], axis=1), return_counts=True) n_vols_mode = n_vols[np.argmax(counts)] n_vols_ol = 1 + np.where( np.sum(db['data_protocol'], axis=1) != n_vols_mode)[0] ax1_01.set_xticks(n_vols_ol) ax1_01.set_xticklabels(n_vols_ol) ax1_01.tick_params(labelsize=6) plt.setp(ax1_01.get_xticklabels(), rotation=90) ax1_01.set_ylim(bottom=0) ax1_01.set_xlabel("Subject") ax1_01.set_ylabel("No. acquired volumes") # MOTION # Absolute ax2_00 = plt.subplot2grid((3, 4), (1, 0), colspan=1) seaborn.violinplot(data=db['qc_motion'][:, 0], scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax2_00) seaborn.despine(left=True, bottom=True, ax=ax2_00) ax2_00.set_ylabel("mm (avg)") ax2_00.set_ylim(bottom=0) ax2_00.set_title("Abs. motion") ax2_00.set_xticklabels([""]) # Relative ax2_01 = plt.subplot2grid((3, 4), (1, 1), colspan=1) seaborn.violinplot(data=db['qc_motion'][:, 1], scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax2_01) seaborn.despine(left=True, bottom=True, ax=ax2_01) ax2_01.set_ylabel("mm (avg)") ax2_01.set_ylim(bottom=0) ax2_01.set_title("Rel. motion") ax2_01.set_xticklabels([""]) # Check if needs to update single subject reports if s_data is not None: ax2_00.scatter(0, s_data['qc_mot_abs'], s=100, marker='*', c='w', edgecolors='k', linewidths=1) ax2_01.scatter(0, s_data['qc_mot_rel'], s=100, marker='*', c='w', edgecolors='k', linewidths=1) # EDDY PARAMETERS if db['par_flag']: # Translations ax2_02 = plt.subplot2grid((3, 4), (1, 2), colspan=1) seaborn.violinplot(data=db['qc_parameters'][:, 0:3], scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax2_02) seaborn.despine(left=True, bottom=True, ax=ax2_02) ax2_02.set_ylabel("mm (avg)") ax2_02.set_title("Translations") ax2_02.set_xticklabels(["x", "y", "z"]) # Rotations ax2_03 = plt.subplot2grid((3, 4), (1, 3), colspan=1) seaborn.violinplot(data=np.rad2deg(db['qc_parameters'][:, 3:6]), scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax2_03) seaborn.despine(left=True, bottom=True, ax=ax2_03) ax2_03.set_ylabel("deg (avg)") ax2_03.set_title("Rotations") ax2_03.set_xticklabels(["x", "y", "z"]) # Eddy currents ec_span = 4 vd_span = 0 if db['susc_flag']: ec_span = ec_span - 1 vd_span = 1 if db['s2v_par_flag']: ec_span = ec_span - 2 ax3_00 = plt.subplot2grid((3, 4), (2, 0), colspan=ec_span) seaborn.violinplot(data=db['qc_parameters'][:, 6:9], scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax3_00) seaborn.despine(left=True, bottom=True, ax=ax3_00) ax3_00.set_title("EC linear terms") ax3_00.set_ylabel("Hz/mm (std)") ax3_00.set_xticklabels(["x", "y", "z"]) ax3_00.set_ylim(bottom=0) # Check if needs to update single subject reports if s_data is not None: ax2_02.scatter([0, 1, 2], s_data['qc_params_avg'][0:3], s=100, marker='*', c='w', edgecolors='k', linewidths=1) ax2_03.scatter([0, 1, 2], np.rad2deg(s_data['qc_params_avg'][3:6]), s=100, marker='*', c='w', edgecolors='k', linewidths=1) ax3_00.scatter([0, 1, 2], s_data['qc_params_avg'][6:9], s=100, marker='*', c='w', edgecolors='k', linewidths=1) # Susceptibility if db['susc_flag']: ax3_00 = plt.subplot2grid((3, 4), (2, ec_span), colspan=vd_span) seaborn.violinplot(data=db['qc_susceptibility'], scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax3_00) seaborn.despine(left=True, bottom=True, ax=ax3_00) ax3_00.set_title("Susceptibility") ax3_00.set_ylabel("Vox (std)") ax3_00.set_xticklabels([""]) ax3_00.set_ylim(bottom=0) if s_data is not None: ax3_00.scatter(0, s_data['qc_vox_displ_std'], s=100, marker='*', c='w', edgecolors='k', linewidths=1) # S2V motion if db['s2v_par_flag']: # Translations ax3_00 = plt.subplot2grid((3, 4), (2, ec_span + vd_span), colspan=1) seaborn.violinplot(data=db['qc_s2v_parameters'][:, 0:3], scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax3_00) seaborn.despine(left=True, bottom=True, ax=ax3_00) ax3_00.set_title("S2V translations") ax3_00.set_ylabel("mm (std)") ax3_00.set_xticklabels(["x", "y", "z"]) ax3_00.set_ylim(bottom=0) if s_data is not None: ax3_00.scatter([0, 1, 2], s_data['qc_s2v_params_avg_std'][0:3], s=100, marker='*', c='w', edgecolors='k', linewidths=1) # Rotations ax3_00 = plt.subplot2grid((3, 4), (2, ec_span + vd_span + 1), colspan=1) seaborn.violinplot(data=db['qc_s2v_parameters'][:, 3:6], scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax3_00) seaborn.despine(left=True, bottom=True, ax=ax3_00) ax3_00.set_title("S2V rotations") ax3_00.set_ylabel("deg (std)") ax3_00.set_xticklabels(["x", "y", "z"]) ax3_00.set_ylim(bottom=0) if s_data is not None: ax3_00.scatter([0, 1, 2], s_data['qc_s2v_params_avg_std'][3:6], s=100, marker='*', c='w', edgecolors='k', linewidths=1) #================================================ # Format figure, save and close it #================================================ plt.tight_layout(h_pad=1, pad=4) plt.savefig(pdf, format='pdf') plt.close() # OUTLIERS AND CNR if db['ol_flag'] or db['cnr_flag']: plt.figure(figsize=(8.27, 11.69)) # Standard portrait A4 sizes plt.suptitle("SQUAD: Group report", fontsize=10, fontweight='bold') # Look for shared b-values and PE directions if updating single subject reports if s_data is not None: b_db = (np.array(db['data_unique_bvals'])).reshape(-1, 1) b_sub = (np.array(s_data['data_unique_bvals'])).reshape(-1, 1) common_b = np.array( np.all((np.abs(b_db[:, None, :] - b_sub[None, :, :]) < 100), axis=-1).nonzero()).T pe_db = np.reshape(np.atleast_2d(db['data_unique_pes']), (-1, 4))[:, 0:3] pe_sub = np.reshape(np.atleast_2d(s_data['data_eddy_para']), (-1, 4))[:, 0:3] common_pe = np.array( np.all((pe_db[:, None, :] == pe_sub[None, :, :]), axis=-1).nonzero()).T # OUTLIERS if db['ol_flag']: # Total ax1_00 = plt.subplot2grid((2, 3), (0, 0), colspan=1) seaborn.violinplot(data=db['qc_outliers'][:, 0], scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax1_00) seaborn.despine(left=True, bottom=True, ax=ax1_00) ax1_00.set_title("Total outliers") ax1_00.set_ylabel("%") ax1_00.set_ylim(bottom=0) ax1_00.set_xticklabels([""]) # b-shell ax1_01 = plt.subplot2grid((2, 3), (0, 1), colspan=1) seaborn.violinplot(data=db['qc_outliers'][:, 1:1 + db['data_no_shells']], scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax1_01) seaborn.despine(left=True, bottom=True, ax=ax1_01) ax1_01.set_ylabel("%") ax1_01.set_ylim(bottom=0) ax1_01.set_title("b-value outliers") ax1_01.set_xticklabels(db['data_unique_bvals']) ax1_01.set_xlabel("b-value") # PE direction ax1_02 = plt.subplot2grid((2, 3), (0, 2), colspan=1) seaborn.violinplot(data=db['qc_outliers'][:, 1 + db['data_no_shells']:], scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax1_02) seaborn.despine(left=True, bottom=True, ax=ax1_02) ax1_02.set_title("PE dir. outliers") ax1_02.set_ylabel("%") ax1_02.set_ylim(bottom=0) ax1_02.set_xlabel("PE direction") # Check if needs to update single subject reports if (s_data is not None and s_data['qc_ol_flag']): ax1_00.scatter(0, s_data['qc_outliers_tot'], s=100, marker='*', c='w', edgecolors='k', linewidths=1) ax1_01.scatter(common_b[:, 0], np.array(s_data['qc_outliers_b'])[common_b[:, 1]], s=100, marker='*', c='w', edgecolors='k', linewidths=1) ax1_02.scatter(common_pe[:, 0], np.array( s_data['qc_outliers_pe'])[common_pe[:, 1]], s=100, marker='*', c='w', edgecolors='k', linewidths=1) if db['cnr_flag']: vox_volume = np.prod(np.array(db['data_vox_size'])) # SNR ax2_01 = plt.subplot2grid((2, 3), (1, 0), colspan=1) # seaborn.violinplot(data=np.sqrt(db['data_no_b0_vols'])*db['qc_cnr'][:,0]/np.sqrt(vox_volume), scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax2_01) seaborn.violinplot(data=db['qc_cnr'][:, 0], scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax2_01) seaborn.despine(left=True, bottom=True, ax=ax2_01) ax2_01.set_ylim(bottom=0) ax2_01.set_title("SNR (avg)") ax2_01.set_xticklabels("0") ax2_01.set_xlabel("b-value") # CNR ax2_02 = plt.subplot2grid((2, 3), (1, 1), colspan=2) # seaborn.violinplot(data=np.sqrt(db['data_no_dw_vols']/db['data_no_shells'])*db['qc_cnr'][:,1:]/np.sqrt(vox_volume), scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax2_02) seaborn.violinplot(data=db['qc_cnr'][:, 1:], scale='width', width=0.5, palette='Set3', linewidth=1, inner='point', ax=ax2_02) seaborn.despine(left=True, bottom=True, ax=ax2_02) ax2_02.set_ylim(bottom=0) ax2_02.set_title("CNR (avg)") ax2_02.set_xlabel("b-value") ax2_02.set_xticklabels(db['data_unique_bvals']) # Check if needs to update single subject reports if (s_data is not None and s_data['qc_cnr_flag']): ax2_01.scatter(0, s_data['qc_cnr_avg'][0], s=100, marker='*', c='w', edgecolors='k', linewidths=1) ax2_02.scatter(common_b[:, 0], np.array(s_data['qc_cnr_avg'][1:])[common_b[:, 1]], s=100, marker='*', c='w', edgecolors='k', linewidths=1) #================================================ # Format figure, save and close it #================================================ plt.tight_layout(h_pad=1, pad=4) plt.savefig(pdf, format='pdf') plt.close()
def format_yax(ax): ax.yaxis.set_major_locator(MaxNLocator(5)) ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%i')) pass
else: ax4.plot(freq[freq_idx], QY_fit[ant][chan_sel], '-k') # --------------------------------------------------------------------------------- # #legend(loc=[0,0.85],prop=legendfont) ax1.legend(prop=legendfont) ax1.set_title("Jones matrix element P <- X") ax2.set_title("Jones matrix element P <- Y") ax3.set_title("Jones matrix element Q <- X") ax4.legend(prop=legendfont) ax4.set_title("Jones matrix element Q <- Y") for ax in [ax1, ax2, ax3, ax4]: ax.xaxis.set_major_locator(MaxNLocator(4)) ax.yaxis.set_major_locator(MaxNLocator(5)) if plot_chan: if not args.outname: # This makes the plot a bit busier but much easier to identify channels that need flagging ax.xaxis.set_major_locator(MultipleLocator(1)) ax.xaxis.set_major_formatter(FormatStrFormatter("%d")) ax.set_xlabel("dumb chan # (doesn't skip flagged channels)") else: ax.set_xlabel("MHz") if args.phases: ax.set_ylabel("degrees") else: ax.set_ylabel("gain relative to band average") ax.grid(True)
def statistic(trainFrames, testFrames, denoise=False, bagging=0, weakness=0.3, detail_show=False, roc_plot=False, global_plot=False): accuracy = [] # 准确度列表 feature = [] # 特征列表 confusionBase = np.array([0, 0, 0, 0]) # 混淆矩阵 for i in range(len(trainFrames)): clf = GaussianNaiveBayesClassfier(has_denoise=denoise, bagging_rate=bagging, bagging_weakness=weakness) clf.fit(trainFrames[i]) voiceProb = clf.predict(testFrames[i].iloc[:,0:-1], 'prob') #voicePredict = voiceProb.argmax(axis=1) label = testFrames[i].loc[:, 'label'] #print(label) p = Performance(list(label), list(voiceProb[:, 1])) confusion = p.get_confusion_matrix() confusionBase = confusionBase + np.array(confusion) #print(confusion) acc = p.ACC() accuracy.append(acc) fea = trainFrames[i].columns.values.tolist() feature.append(fea) if detail_show: # 单次统计数据 prevalence = p.Prevalence() ppv = p.PPV() npv = p.NPV() tpr = p.TPR() tnr = p.TNR() plr = p.PLR() nlr = p.NLR() dor = p.DOR() f1 = p.F_score(1.0) #print((confusion)) matDict = {'男声': [int(confusion[0]), int(confusion[2])], '女声': [int(confusion[1]), int(confusion[3])]} matrix = pd.DataFrame(matDict, index=['预测男声', '预测女声']) print('\n------------第%d次测试------------' % (i+1)) print('特征:', fea) print('动态降噪:', denoise) print('集成:', bagging) print('混淆矩阵:') print(matrix) print('总男声比例:%.2f%%' % (prevalence * 100)) print('总体准确度:%.2f%%' % (acc * 100)) print('男声查准率:%.2f%%' % (ppv * 100)) print('男声查全率: %.2f%%' % (tpr * 100)) print('女声查准率:%.2f%%' % (npv * 100)) print('女声查全率: %.2f%%' % (tnr * 100)) print('男声似然比:%.4f' % plr) print('女声似然比:%.4f' % (1 / nlr)) print('判别男声相关:') print('诊断比值比:%.4f' % dor) print('F1分数:%.4f' % f1) if roc_plot: # 以男声为阳例的ROC曲线 p.roc_plot() # 总体统计数据 res = Performance([0,1], [0.2,0.8]) res.set(list(confusionBase)) prevalence = res.Prevalence() acc = res.ACC() ppv = res.PPV() npv = res.NPV() tpr = res.TPR() tnr = res.TNR() plr = res.PLR() nlr = res.NLR() dor = res.DOR() f1 = res.F_score(1.0) max_acc = max(accuracy) max_fea = feature[accuracy.index(max_acc)] min_acc = min(accuracy) min_fea = feature[accuracy.index(min_acc)] var = np.var(accuracy) #print(confusionBase) matDict = {'男声':[int(confusionBase[0]), int(confusionBase[2])], '女声':[int(confusionBase[1]), int(confusionBase[3])]} matrix = pd.DataFrame(matDict, index=['预测男声','预测女声']) print('\n\n------------测试总次数=%d------------' % len(trainFrames)) print('动态降噪:', denoise) print('集成:', bagging) print('累积混淆矩阵:') print(matrix) print('总男声比例:%.2f%%' % (prevalence * 100)) print('最大准确度:%.2f%%' % (max_acc * 100)) print('最大准确度使用特征:', max_fea) print('最低准确度:%.2f%%' % (min_acc * 100)) print('最低准确度使用特征:', min_fea) print('平均准确度:%.2f%%' % (acc * 100)) print('准确度方差:%g' % var) print('平均男声查准率:%.2f%%' % (ppv * 100)) print('平均男声查全率:%.2f%%' % (tpr * 100)) print('平均女声查准率:%.2f%%' % (npv * 100)) print('平均女声查全率:%.2f%%' % (tnr * 100)) print('平均男声似然比:%.4f' % plr) print('平均女声似然比:%.4f' % (1 / nlr)) print('判别男声相关:') print('平均诊断比值比:%.4f' % dor) print('平均F1分数:%.4f' % f1) if global_plot: # 显示各次试验的准确率图 ax = plt.figure().gca() ax.xaxis.set_major_locator(MaxNLocator(integer=True)) plt.xticks(np.arange(1, len(accuracy) + 1, 1)) #plt.title("Accuracy for %d turns" % (len(accuracy))) plt.title("Accuracy for %d turns(denoise=%d,bagging=%d)" % (len(accuracy), denoise, bagging)) plt.ylabel("Accuracy") plt.xlabel("turns") if len(accuracy) > 1: plt.plot(list(range(1, len(accuracy) + 1)), accuracy) else: plt.scatter(1, accuracy, c=2, cmap=plt.cm.spring, edgecolors='k') plt.show() return denoise, bagging, accuracy, feature, matrix, prevalence, max_acc, max_fea, min_acc, min_fea, acc, var, ppv, npv, tpr, tnr, plr, nlr, dor, f1
count_by_artist = count_by_artist.sort_values('song') count_by_artist = count_by_artist.tail(20) # top 20 df_list.append(count_by_artist) years = years + 1 from matplotlib.pyplot import figure from pylab import MaxNLocator yr = 1985 ax = plt.figure(num=None, figsize=(14, 10), dpi=80, facecolor='w', edgecolor='b').gca() ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.grid(False) ax.set_xlabel("# Of Times on Billboard's Year End Top 100") #ax.set_ylabel("Artist") plt.gcf().subplots_adjust(left=0.20) plt.gcf().subplots_adjust(bottom=0.14) for i in df_list: #figure(num=None, figsize=(20, 20), dpi=80, facecolor='w', edgecolor='k') plt.tight_layout() plt.grid(False) plt.gcf().subplots_adjust(left=0.20) plt.gcf().subplots_adjust(bottom=0.14) yr_str = 'Year %s' % yr plt.figtext(0.76,
def Graph_for_ind(pd, type, ys): series['postal_code'] = series['postal_code'].apply(str) pd_data = series.loc[series[type] == pd] pd_data['online_for'] = pd_data['online_for'].apply( get_timing_year) if ys else pd_data['online_for'].apply( get_timing_month) months = pd_data['online_for'].unique().tolist() if (ys == True): months.sort(key=lambda date: datetime.datetime.strptime(date, '%Y')) else: months.sort(key=lambda date: datetime.datetime.strptime(date, '%b %Y')) final_data = { 'Average Price': {}, 'Average Size': {}, 'Average €/qm': {}, 'Count Furnished': {}, 'Count Unfurnished': {}, 'Average Miscellaneous Cost': {}, 'Rent': {}, 'Average Utilities Cost': {}, 'Average Base Rent': {} } for month in months: month_data = pd_data.loc[pd_data['online_for'] == month] kpis = Analyze_data(month_data) index = months for kpi in kpis: final_data[kpi][month] = kpis[kpi] count = [ x + y for x, y in zip(list(final_data['Count Furnished'].values()), list(final_data['Count Unfurnished'].values())) ] count_map = Include_Empty_months(dict(zip(months, count)), ys) for k in final_data: final_data[k] = Include_Empty_months(final_data[k], ys) data = list(final_data[k].values()) index = list(final_data[k].keys()) width = len(index) * 0.3 if (width < 15): width = 15 fig, axs = pyplot.subplots(figsize=(width, 5)) dataFrame = pandas.DataFrame(data={k: data}, index=index) color = 'tab:blue' dataFrame.plot.bar( ax=axs, width=0.9, title= f"{'Yearly' if ys else 'Monthly'} analysis for {city}: {pd} ({k})", alpha=foreground_opacity) axs.tick_params(axis='y', colors=color) axs.set_ylim(ymin=0) yb = axs.get_yaxis() yb.set_major_locator(MaxNLocator(integer=True)) axs.set_ylabel('Measure\'s value', color=color, fontsize='large', fontweight='bold') axs.grid(axis='y') ax2 = axs.twinx() color = 'tab:red' ax2.set_xlabel('Month', fontsize='large', fontweight='bold') ax2.set_ylabel('Count', color=color, fontsize='large', fontweight='bold') ya = ax2.get_yaxis() ya.set_major_locator(MaxNLocator(integer=True)) ax2.plot(list(count_map.keys()), list(count_map.values()), color=color) ax2.set_ylim(ymin=0) ax2.format_xdata = mdates.DateFormatter('%b %Y') fig.autofmt_xdate() fig.tight_layout() pdf.savefig() pyplot.close()
def plot_pacf_stem(ds, rgi_df, xlim=None, path=True, nlags=200, slice_start=3000, plot_confint=True): """ Parameters ---------- ds rgi_df xlim path nlags slice_start plot_confint """ # iterate over all above selected glaciers for rgi_id, glacier in rgi_df.iterrows(): # select glacier rgi_id = rgi_id name = glacier['name'] log.info('PACF plots for {} ({})'.format(name, rgi_id)) # create figure and axes fig, ax = plt.subplots(1, 1) # compute acf over 1000 years lags = np.arange(0, nlags + 1) # select the complete dataset ds_sel = ds.sel(mb_model='random', normalized=False, rgi_id=rgi_id) # select time frame slice_end = None ds_sel = ds_sel.isel(time=slice(slice_start, slice_end)) xoffset = 0 # plot zero aux line ax.axhline(0, c='k', ls=':') for i, b in enumerate(np.sort(ds.temp_bias)): # get length data length = ds_sel.sel(temp_bias=b).length # FLOWLINE MODEL # -------------- # compute autocorrelation and confidence intervals acf, confint = stattools.pacf(length.sel(model='fl'), nlags=nlags, alpha=0.01, method='ywmle') # plot autocorrelation function ml, sl, bl = ax.stem(lags[1:] + xoffset, acf[1:], markerfmt=f'o', linefmt=':', basefmt='None', label='{:+.1f} °C'.format(b)) plt.setp(sl, 'color', fl_cycle[i]) plt.setp(ml, 'color', fl_cycle[i]) if plot_confint: # fill confidence interval ax.fill_between(lags[1:], confint[1:, 0] - acf[1:], confint[1:, 1] - acf[1:], color=fl_cycle[i], alpha=0.1) # V/A SCALING MODEL # ----------------- # compute autocorrelation and confidence intervals acf, confint = stattools.pacf(length.sel(model='vas'), nlags=nlags, alpha=0.01, method='ywmle') # plot autocorrelation function ml, sl, bl = ax.stem(lags[1:] - xoffset, acf[1:], markerfmt=f'o', linefmt=':', basefmt='None', label='{:+.1f} °C'.format(b)) plt.setp(sl, 'color', vas_cycle[i]) plt.setp(ml, 'color', vas_cycle[i]) if plot_confint: # fill confidence interval ax.fill_between(lags[1:], confint[1:, 0] - acf[1:], confint[1:, 1] - acf[1:], color=vas_cycle[i], alpha=0.1) # adjust axes if not xlim: xlim = [0, nlags] ax.set_xlim(xlim) ax.set_ylim([-1.1, 1.1]) xa = ax.get_xaxis() xa.set_major_locator(MaxNLocator(integer=True)) # add grid ax.grid() # get legend handles and labels handles, labels = ax.get_legend_handles_labels() title_proxy, = plt.plot(0, marker='None', linestyle='None', label='dummy') # create list of handles and labels in correct order my_handles = list([title_proxy]) my_handles.extend(handles[::2]) my_handles.extend([title_proxy]) my_handles.extend(handles[1::2]) my_labels = list(["$\\bf{Flowline\ model}$"]) my_labels.extend(labels[::2]) my_labels.extend(["$\\bf{VAS\ model}$"]) my_labels.extend(labels[1::2]) # add single two-column legend ax.legend(my_handles, my_labels, ncol=2) # labels, title, ... ax.set_xlabel('Lag [years]') ax.set_ylabel('Correlation coefficient') # store plot dir_path = '/Users/oberrauch/work/master/plots/final_plots/pacf/' f_name = '{}.pdf'.format(name.replace(' ', '_')) path = os.path.join(dir_path, f_name) plt.savefig(path, bbox_inches='tight')
def best_so_far(results_directory, num_iterations): """ Function that plots: 1) The best feasible value obtained so far as a function of the number of iterations 2) A scatterplot showing the data points collected :param results_directory: directory to save the plots to. :param num_iterations: the number of iterations for which data collection is being carried out. """ best_vals = [] # coordinates of collected data points x1_vals = [] x2_vals = [] counter = 0 first_find = 0 for iteration in range(num_iterations): # We monitor the best value obtained so far evaluations = load_object(results_directory + "/scores{}.dat".format(iteration)) best_value = min(evaluations) constraint_value = load_object(results_directory + "/con_scores{}.dat".format(iteration)) # We DON'T use the best value found in the training data if the first collected point is not feasible if constraint_value[0] == 1 and counter == 0: counter += 1 best_vals.append(best_value[0]) first_find += 1 if counter > 0: if first_find == 1: first_find += 1 else: counter += 1 if best_value[0] < min(best_vals): best_vals.append(best_value[0]) else: best_vals.append(min(best_vals)) # We collect the data points for plotting next_inputs = load_object(results_directory + "/next_inputs{}.dat".format(iteration)) for data_point in next_inputs: x1_vals.append(data_point[0]) x2_vals.append(data_point[1]) iterations = range((num_iterations - counter) + 1, num_iterations + 1) # We plot the best value obtained so far as a function of iterations plt.figure(2) axes = plt.figure(2).gca() xa, ya = axes.get_xaxis(), axes.get_yaxis() xa.set_major_locator( MaxNLocator(integer=True)) # force axis ticks to be integers ya.set_major_locator(MaxNLocator(integer=True)) plt.xlim((num_iterations - counter) + 1, num_iterations) plt.xlabel('Function Evaluations') plt.ylabel('Best Feasible Value') plt.plot(iterations, best_vals) pylab.savefig(results_directory + "/best_so_far.png") plt.close() save_object(iterations, results_directory + "/iterations.dat") save_object(best_vals, results_directory + "/best_vals.dat") # We plot the data points collected plt.figure(3) plt.title('Data Points Collected') plt.gca().set_aspect('equal') plt.xlim(-5, 10) plt.ylim(0, 15) plt.xlabel('x1') plt.ylabel('x2') plt.scatter(x1_vals, x2_vals) pylab.savefig(results_directory + "/data_collected.png") plt.close()
def main(pdf, data, eddy): """ Generate page of the single subject report pdf that contains: - Per-shell average CNR bar plots (error bars are standard deviations). - Per-volume mean squared residuals plots (one plot for each b-value, including 0). Outliers (MSR > mean + std for each shell) are marked as red stars with the number of corresponding volume (0-based) next to them. Arguments: - pdf: qc pdf file - data: data dictionary containg information about the dataset - eddy: EDDY dictionary containg useful qc information """ #================================================ # Prepare figure plt.figure(figsize=(8.27, 11.69)) # Standard portrait A4 sizes plt.suptitle('Subject ' + data['subj_id'], fontsize=10, fontweight='bold') # Divide the page in two sections. Top one will have the bar plots, bottom (and bigger) # one will have the MSR plots. gs0 = gridspec.GridSpec(2, 1, height_ratios=[0.16, 0.8], hspace=0.2) gs00 = gridspec.GridSpecFromSubplotSpec(1, 3, subplot_spec=gs0[0], wspace=1) ax1_00 = plt.subplot(gs00[0, 0]) sb = seaborn.barplot(y=eddy['avg_cnr'][0], ax=ax1_00) sb.errorbar(x=0, y=eddy['avg_cnr'][0], yerr=eddy['std_cnr'][0], ecolor='black', fmt="none") ax1_00.set_xlabel("b-value [s/mm$^2$]") ax1_00.set_ylabel("tSNR") ax1_00.set_ylim(0, eddy['avg_cnr'][0] + 2 * eddy['std_cnr'][0]) ax1_00.set_xticklabels([0]) ax2_00 = plt.subplot(gs00[0, 1:]) sb = seaborn.barplot(x=np.arange(1, 1 + data['unique_bvals'].size), y=eddy['avg_cnr'][1:], ci=3.0, ax=ax2_00) sb.errorbar(x=np.arange(0, data['unique_bvals'].size), y=eddy['avg_cnr'][1:], yerr=eddy['std_cnr'][1:], ecolor='black', fmt="none") ax2_00.set_xlabel("b-value [s/mm$^2$]") ax2_00.set_ylabel("CNR") ax2_00.set_xticklabels(data['unique_bvals']) if eddy['rssFlag']: gs01 = gridspec.GridSpecFromSubplotSpec(1 + data['unique_bvals'].size, 1, subplot_spec=gs0[1]) x = np.arange(data['bvals'].size) ax = plt.subplot(gs01[0, 0]) tmp_rss = eddy['avg_rss'][np.abs(data['bvals']) <= 100] x_rss = x[np.abs(data['bvals']) <= 100] idxs = np.array( np.where(tmp_rss > np.mean(tmp_rss) + 2 * np.std(tmp_rss))) ax.plot(np.arange(1, 1 + data['no_b0_vols']), tmp_rss, label="b=0") ax.scatter(idxs + 1, np.ones(idxs.size) * np.max(tmp_rss) + 200, s=50, c='r', marker='*', label='Outliers') ol_vols = x_rss[idxs] ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.set_title("Mean squared residuals (MSR)") ax.set_xlim(0, 1 + data['no_b0_vols']) ax.set_ylabel("MSR") ax.legend(loc='best', frameon=True, framealpha=0.5) for i in range(0, data['unique_bvals'].size): tmp_rss = eddy['avg_rss'][np.abs(data['bvals'] - data['unique_bvals'][i]) <= 100] x_rss = x[np.abs(data['bvals'] - data['unique_bvals'][i]) <= 100] idxs = np.array( np.where(tmp_rss > np.mean(tmp_rss) + 2 * np.std(tmp_rss))) ax = plt.subplot(gs01[i + 1, 0]) ax.set_ylabel("MSR") ax.plot(np.arange(1, 1 + data['bvals_dirs'][i]), tmp_rss, label="b=%d" % data['unique_bvals'][i]) ax.scatter(idxs + 1, np.ones(idxs.size) * np.max(tmp_rss) + 0.5 * np.max(tmp_rss), s=50, c='r', marker='*', label='Outliers') ol_vols = np.append(ol_vols, x_rss[idxs]) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.set_xlim(0, 1 + data['bvals_dirs'][i]) ax.legend(loc='best', frameon=True, framealpha=0.5) ax.set_xlabel("Volume") # Save volumes without outliers to text files. # If bvecs have been specified, then also save reduced bvals and bvecs vols_no_outliers = np.delete(x, ol_vols) np.savetxt(data['qc_path'] + '/vols_no_outliers.txt', np.reshape(vols_no_outliers, (1, -1)), fmt='%d', delimiter=' ') if data['bvecs'].size != 0: np.savetxt(data['qc_path'] + '/bvecs_no_outliers.txt', data['bvecs'][:, vols_no_outliers], fmt='%.5f', delimiter=' ') np.savetxt(data['qc_path'] + '/bvals_no_outliers.txt', np.reshape(data['bvals'][vols_no_outliers], (1, -1)), fmt='%f', delimiter=' ') # Format figure, save and close it plt.savefig(pdf, format='pdf') plt.close()
def plot_from_dicts(plot_dicts, out, title='Training Curve', x_label="Iterations", legend_box_pos=(1, 1)): ''' Making learning curve ''' max_y = max( [max(plot_dict.y) for plot_dict in plot_dicts if plot_dict.y != []]) if max_y > 2.0: # If Loss is too high make two plot - one with original loss and one with y_max = 2 so that Accuracy is clear y_maxes = [2.0, max_y] else: y_maxes = [2.0] # Always make minimum y_max 2.0 for legend box for y_max in y_maxes: fig, ax1 = plt.subplots() fig.set_size_inches(10, 10) ax1.xaxis.grid(True) plt.grid() ax1.get_yaxis().set_major_locator(MaxNLocator(integer=True)) ax1.get_xaxis().set_major_locator(MaxNLocator(integer=True)) ax1.set_ylim(0, y_max) ax1.set_xlabel(x_label, fontsize=15) ax1.tick_params(labelsize=10) legends_plots = [] legends_strs = [] x_ticks = [] for plot_dict in plot_dicts: if plot_dict.plot: if len(plot_dict.x) > len(x_ticks): x_ticks = plot_dict.x tmp, = ax1.plot(plot_dict.x, plot_dict.y, linewidth=2, color=plot_dict.color, marker=plot_dict.marker, markersize=plot_dict.markersize, markevery=plot_dict.markevery) legends_plots.append(tmp) legends_strs.append(plot_dict.legend) if y_max < 3.5: y_ticks_steps = 0.1 elif y_max < 5: y_ticks_steps = 0.5 elif y_max < 20: y_ticks_steps = 1 else: y_ticks_steps = 5 y_ticks_until_1 = np.arange(0, 1.05, 0.1) if y_max > 5: yticks = np.arange(0, y_max + (float(y_ticks_steps) / 2), y_ticks_steps) elif y_max > 1: yticks = np.concatenate( (y_ticks_until_1, np.arange(1, y_max + (float(y_ticks_steps) / 2), y_ticks_steps))) else: yticks = y_ticks_until_1 ax1.yaxis.set_ticks(yticks) ax1.xaxis.set_ticks(x_ticks) max_accuracy_line = plt.axhline(y=1, xmin=0, xmax=3, linewidth=1.5, zorder=0, color='green', linestyle='dashed') min_loss_line = plt.axhline(y=0, xmin=0, xmax=3, linewidth=1.2, zorder=0, color='m', linestyle='dashed') # Adding legend lgd = plt.legend(legends_plots + [min_loss_line, max_accuracy_line], legends_strs + ["Min Loss/Accuracy = 0", "Max Accuracy = 1"], bbox_to_anchor=legend_box_pos) plt.title(wrap_title(title), fontsize=18) # plt.tight_layout() # not good # Saving learning curve if out.endswith(".png"): out = out.replace(".png", "") if len(y_maxes) > 1: if y_max > 2.0: fig.set_size_inches(15, 10) plt.savefig(out, bbox_extra_artists=(lgd, ), bbox_inches='tight', pad_inches=0.5) else: plt.savefig(out + "_closeup", bbox_extra_artists=(lgd, ), bbox_inches='tight', pad_inches=0.5) else: plt.savefig(out, bbox_extra_artists=(lgd, ), bbox_inches='tight', pad_inches=0.5) if demo_mode: plt.show() if not demo_mode: plt.close()