def answerbox(responses): response_matrix = metrics.normalize_responses(metrics.responsematrix(responses)) pl.boxplot(np.squeeze(np.asarray(response_matrix))) pl.title("Responses") pl.ylabel("Numeric representation of question answers") pl.xlabel("Question number") pl.show()
def boxplot_poi(data, var_name): """ Makes box plot with variable "var_name" split into :param data: data dict with enron data :param var_name: name of variable to plot :return: plot object """ poi_v = [] no_poi_v = [] for p in data.itervalues(): value = p[var_name] if value == "NaN": value = 0 if p["poi"] == 1: poi_v.append(value) else: no_poi_v.append(value) plt.xlabel("POI") plt.ylabel(var_name) plt.boxplot([poi_v, no_poi_v]) plt.xticks([1, 2], ["POI", "Not a POI"]) # http://stackoverflow.com/a/29780292/1952996 for i, v in enumerate([poi_v, no_poi_v]): y = v x = np.random.normal(i+1, 0.04, size = len(y)) plt.plot(x, y, "r.", alpha=0.2)
def plot_box_plot(col, title=None, verbose=True): """Makes a box plot for a feature Parameters ---------- col : np.array title : str or None title of a plot verbose : boolean iff True, display the graph Returns ------- matplotlib.figure.Figure Figure containing plot """ col = utils.check_col(col) fig = plt.figure() boxplot(col) if title: plt.title(title) #add col_name to graphn if verbose: plt.show() return fig
def plot_feature(self, fid): """ boxplot the feature within the ROI """ f = self.get_feature(fid) import matplotlib.pylab as mp mp.figure() mp.boxplot(f) mp.title('Distribution of %s within %s'%(fid,self.id))
def create_and_save_boxplot(data, title, x_titles, metric, folder_to_save_to, display=False): fig = plt.figure() pylab.boxplot(data) pylab.xticks(range(1, len(x_titles)+1), x_titles) plt.ylabel(metric) plt.title(title) file_name = "_".join(title.split(" ")) fig.savefig('%s%s.png' % (folder_to_save_to, file_name) ) if display: plt.show() plt.close(fig)
def ca_box_plot_features(index): ''' 训练特征和外倾性的关系 :param index: 训练特征编号, 印象笔记存储 :return: ''' index += 1 data1 = pd.read_csv('data/split_class/large_IGNORE_404_NOR_+1.txt', sep=' ', header=None) data2 = pd.read_csv('data/split_class/large_IGNORE_404_NOR_-1.txt', sep=' ', header=None) col1 = data1[index] col2 = data2[index] plt.boxplot([col1, col2], showmeans=True, showfliers=False) plt.show()
def load(): # regular expression orgfiles = os.listdir('results') test = re.compile("S500") files = list(filter(test.search, orgfiles)) #files = orgfiles files.sort() #files = ['S5C1', 'S5C08', 'S5C06', 'S20C1', 'S20C08', 'S20C06', 'S50C1', 'S50C08', 'S50C06', 'S80C1', 'S80C08', 'S80C06'] #files = ['S30C06la'] #files = ['S100F2C3'] print("files:", files) #res = [] res_tri = [] res_auto = [] labels = [] for f_name in files: temp_tri = [] temp_auto = [] with open('results/' + f_name, 'r') as f: for i, l in enumerate(f): x = eval(l) temp_tri += [x['tri_score']] temp_auto += [x['auto_score']] #res += [temp_tri, temp_auto] #labels += ['tri', 'auto'] res_tri += [temp_tri] res_auto += [temp_auto] labels += [f_name[4:]] #print(res) # tri result pl.figure(figsize=(1366/96, 768/96), dpi=100) pl.title('tri ' + f_name[1:4] + ' points') pl.boxplot(res_tri, labels=labels) #pl.show() pl.savefig('tri'+f_name[1:4]+'.pdf') pl.close() # auto result pl.figure(figsize=(1366 / 96, 768 / 96), dpi=100) pl.title('auto ' + f_name[1:4] + ' points') pl.boxplot(res_auto, labels=labels) #pl.show() pl.savefig('auto'+f_name[1:4]+'.pdf') pl.close()
def plot_box(plot_data, top_key): """ Plots the passed plot_data[top_key] dict on a side by side box plot. """ plot_data = plot_data[top_key] data = [list_of_weeks for list_of_weeks in plot_data.values()] plt.title('Spam emails per week by ' + top_key, fontsize=20) plt.boxplot(data) plt.xticks([(i + 1) for i in range(len(plot_data.values()))], \ ['%s' % i for i in plot_data.keys()], rotation=80) plt.tight_layout() create_folder(top_key) fig = plt.gcf() fig.set_size_inches(20,14) plt.savefig(top_key + '/box_plot.png', format='png', dpi=100)
def makejitter(y1list,y2list,pointannotation,ylabel,xlabel,title,imagefilename): fig = plt.figure(figsize=(8, 8)) x1list=[0.8+0.4*x[0] for x in np.random.rand(len(y1list),1).tolist()] x2list=[1.8+0.4*x[0] for x in np.random.rand(len(y2list),1).tolist()] plt.boxplot([y1list,y2list],widths=0.4) plt.title(title) plt.ylabel(ylabel) plt.xlabel(xlabel) plt.plot(x1list,y1list,'b.') plt.plot(x2list,y2list,'r.') for label,x,y in pointannotation: plt.annotate(label,xy=(x,y),xytext=(80,-40), textcoords = 'offset points', ha = 'right', va = 'bottom', bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5), arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')) plt.axis([0.2,3.0,0.0,1.0]) frame1 = plt.gca() frame1.axes.get_xaxis().set_visible(False) plt.savefig(imagefilename)
def plotStoreDailyTrends(trainSet,storeData,storeID,savepath = None): thisStore = trainSet[trainSet['Store'] == storeID] thisStore = thisStore[thisStore['Open'] == 1] plt.figure() plt.violinplot( [thisStore[thisStore['DayOfWeek'] == dow]['Sales'] for dow in set(thisStore['DayOfWeek'])], showmeans=True) plt.boxplot( [thisStore[thisStore['DayOfWeek'] == dow]['Sales'] for dow in set(thisStore['DayOfWeek'])], notch=1) plt.xlabel('day of week') plt.ylabel('sales') storeCompetitionFlag = ~np.isnan(storeData[storeData['Store']==storeID]['CompetitionOpenSinceYear'].values) if storeCompetitionFlag: thisStore = thisStore
def violin_plot(ax, values_list, measure_name, group_names, fontsize, color='blue', ttest=False): ''' This is a little wrapper around the statsmodels violinplot code so that it looks nice :) ''' # IMPORTS import matplotlib.pylab as plt import statsmodels.api as sm import numpy as np # Make your violin plot from the values_list # Don't show the box plot because it looks a mess to be honest # we're going to overlay a boxplot on top afterwards plt.sca(ax) # Adjust the font size font = { 'size' : fontsize} plt.rc('font', **font) max_value = np.max(np.concatenate(values_list)) min_value = np.min(np.concatenate(values_list)) vp = sm.graphics.violinplot(values_list, ax = ax, labels = group_names, show_boxplot=False, plot_opts = { 'violin_fc':color , 'cutoff': True, 'cutoff_val': max_value, 'cutoff_type': 'abs'}) # Now plot the boxplot on top bp = plt.boxplot(values_list, sym='x') for key in bp.keys(): plt.setp(bp[key], color='black', lw=fontsize/10) # Adjust the power limits so that you use scientific notation on the y axis plt.ticklabel_format(style='sci', axis='y') ax.yaxis.major.formatter.set_powerlimits((-3,3)) plt.tick_params(axis='both', which='major', labelsize=fontsize) # Add the y label plt.ylabel(measure_name, fontsize=fontsize) # And now turn off the major ticks on the y-axis for t in ax.yaxis.get_major_ticks(): t.tick1On = False t.tick2On = False return ax
def plot_box_plot(col, col_name=None, verbose=True): """Makes a box plot for a feature comment Parameters ---------- col : np.array Returns ------- matplotlib.figure.Figure """ fig = plt.figure() boxplot(col) if col_name: plt.title(col_name) #add col_name to graphn if verbose: plt.show() return fig
def boxplot_feature(self, pid, fids): """ self.show_feature(pid,fids) This function makes a boxplot of the feature distribution in a given parcel across subjects Parameters ---------- pid = parcel identifier an integer within the [0..self.K] range fids = list of features of inetegers """ # 1. test that pid is coorect if pid < 0: raise ValueError, "Negative parcel id" if pid > self.k: raise ValueError, "Wrong parcel id" # 2. test that the feature(s) exist idx = [] for fid in fids: i = np.array([fid == f for f in self.fids]) i = np.nonzero(i) i = np.reshape(i, np.size(i)) if np.size(i) == 0: raise ValueError, "The feature does not exist yet" idx.append(i) # 3 get the data and make the figure dataplot = [] for j in idx: dataplot.append(np.transpose(self.features[j][:, pid])) dataplot = np.transpose(np.concatenate(dataplot)) print np.shape(dataplot) import matplotlib.pylab as mp mp.figure() mp.boxplot(dataplot)
def plot_error_bar(self,MT_results,average_case): ''' plot the figure of total annual releases with error bar ''' year_distribution = [] round = len(MT_results) num_year = len(MT_results[0]) plt.figure() final_results = [] for each_year in range(num_year): this_year_resutls = [] for each_ana in range(round): this_round_year = MT_results[each_ana][each_year] this_year_resutls.append(this_round_year) final_results.append(this_year_resutls) plt.plot(self.years[40::],average_case[40::],label='Average Release Case') plt.boxplot(final_results[40::],positions=self.years[40::],labels=self.years[40::],widths=0.2) plt.xlabel('Year') plt.ylabel('Total Releases From Coating Products (Ton)') plt.legend() plt.show()
lr_ae_supp = np.dot(mat_V1, mat_W0) lr_ae_supp_z = zscore(lr_ae_supp, axis=1) for i in np.arange(18): r1, p1 = pearsonr(lr_ae_supp_z[i, :], mean_supp_z[i, :]) print('r/lrae: %.4f' % r1) corr_means_lr_ae[ilamb, i] = r1 r2, p2 = pearsonr(lr_supp_z[i, :], mean_supp_z[i, :]) print('r/lr: %.4f' % r2) corr_means_lr[ilamb, i] = r2 # boxplot plt.figure() corrs = np.vstack((corr_means_lr[0, :], corr_means_lr_ae)) plt.boxplot(corrs.T) plt.ylabel('correlation r') plt.title('Support Recovery: normal versus low-rank logistic regression\n' '%i components' % n_comp) tick_strs = [u'normal'] + [u'low-rank lambda=%.2f' % val for val in lambs] plt.xticks(np.arange(6) + 1, tick_strs, rotation=320) plt.ylim(0, 1.0) plt.yticks(np.linspace(0, 1., 11), np.linspace(0, 1., 11)) plt.tight_layout() out_path = op.join(WRITE_DIR, 'supp_recov_comp=%i.png' % n_comp) plt.savefig(out_path) # barplot plt.figure() ind = np.arange(5)
0.15433815, 0.2244887 , 0.27699527 , 0.14800338], '$Vertebral3C$':[ 0.11300019, 0.06446632 , 0.08426391 , 0.06303025, 0.08434364, 0.09018135, 0.09996641 , 0.07146446, 0.09008181 , 0.08094153], '$Cancer$': [ 0.03437089, 0.06196577, 0.04087629 , 0.06653635 , 0.06513942 , 0.03501766, 0.03562236, 0.11285626 , 0.0667102 , 0.18861101], '$Diabetes$': [ 0.25870847 , 0.27619998 , 0.24180374, 0.26761697, 0.23557992, 0.22850167, 0.26946416, 0.24228854 , 0.2434516 , 0.25255786], '$Card$': [ 0.31023136 , 0.3207549, 0.28652662, 0.24727198, 0.26294096, 0.30268156, 0.29924656, 0.2828805, 0.2497709 , 0.27087237], '$Heart$': [ 0.26676818, 0.26147714 , 0.22306759, 0.3132863, 0.28896025 , 0.29202858, 0.2508732, 0.25777515 , 0.28629689, 0.2796618 ], }; fig, ax1 = plt.subplots(figsize=(15,8)) plt.boxplot(results.values(), labels = results.keys()); plt.title('$Big\ Benchmark$'); plt.ylabel('$Error(MSE)$'); plt.ylim(0.0, 0.37) plt.xticks(rotation=25) ax1.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.45) plt.savefig('boxplot_bigbench.png',dpi =100); plt.show();
regmodel.xi *= forg_factor regmodel.nu *= forg_factor # update regmodel.update(yt, xt) regmodel.log() #%% Ebeta_log = np.array(regmodel.Ebeta_log) plt.figure(1, figsize=(15, 5)) plt.plot(e2) plt.plot(yt_pred, '+') plt.figure(2, figsize=(15, 5)) plt.subplot(3, 1, 1) plt.plot(Ebeta_log[:,0]) plt.subplot(3, 1, 2) plt.plot(Ebeta_log[:,1]) #plt.subplot(3, 1, 3) #plt.plot(Ebeta_log[:,2]) #%% errors = e2[3:] - yt_pred[3:] plt.figure(3) plt.hist(errors, bins=100) print('RMSE: ', np.sqrt(np.mean((e2[3:] - yt_pred[3:])**2))) #%% plt.figure(4) plt.boxplot(errors, showfliers=False)
csize=csize).mean()/ngroups sent.append(seni) sens.append(np.array(sent)) kap.append(np.array(kappa)) clt.append(np.array(cls)) pk.append(np.array(peaks)) ################################################################################ # Visualize the results import scipy.stats as st aux = st.norm.sf(thresholds) import matplotlib.pylab as mp a = mp.figure() mp.subplot(1, 3, 1) mp.boxplot(kap) mp.title('voxel-level reproducibility', fontsize=12) mp.xticks(range(1,1+len(thresholds)),thresholds) mp.xlabel('threshold') mp.subplot(1, 3, 2) mp.boxplot(clt) mp.title('cluster-level reproducibility', fontsize=12) mp.xticks(range(1,1+len(thresholds)),thresholds) mp.xlabel('threshold') mp.subplot(1,3,3) mp.boxplot(pk,notch=1) mp.title('peak-level reproducibility', fontsize=12) mp.xticks(range(1,1+len(thresholds)),thresholds) mp.xlabel('threshold') a.set_figwidth(10.) a.set_size_inches(12, 5)
def plot_exon_coverage(filename, exons=None, exons_per_gene = None, target_folder = None, whitelist=None): sample = filename.split("/")[-1][:-13] header = ["chr", "start", "stop", "amplicon", "na", "strand", "amplicon_pos", "dp"] rawdf = pd.read_csv(filename, sep="\t", names=header) rawdf["pos"] = rawdf.start +rawdf.amplicon_pos rawdf["chrompos"] = rawdf.apply(lambda x : "_".join([str(x["chr"]), str(x["pos"]) ]), axis = 1 ) rawdf["name"] = rawdf.amplicon rawdf["gene"] = rawdf.apply(lambda x : x["amplicon"].split("_")[0].upper(), axis = 1 ) ######################################################################################## ## parse the complete list of human exons # ##exon_filename = "/home/andreas/bioinfo/core/general/data/HumanExons_Ensembl_v65_merged.tsv" #exon_filename = "/home/andreas/bioinfo/core/general/data/HumanExons_Ensembl_v75_merged.tsv" # #header = ["chrom", "exon_start", "exon_stop", "gene", "strand", "exon_no"] #exons = pd.read_csv(exon_filename, sep="\t", names=header) #exons["gene_upper"] = exons.gene.str.upper() #exons = exons.sort(columns = ["gene", "exon_start", "exon_stop"]) # #exons_per_gene = {} #for _, row in exons.iterrows(): # gene = row["gene"].upper() # start, stop = int(row["exon_start"]), int(row["exon_stop"]) # exon_no = row["exon_no"] # # if not exons_per_gene.get(gene): # exons_per_gene[gene] = [] # # exons_per_gene[gene].append((start, stop, exon_no))$ ####################################################################################### # df: per base df = dict(chrom = rawdf.chr.groupby(rawdf.chrompos).min(), pos = rawdf.pos.groupby(rawdf.chrompos).min(), gene = rawdf.gene.groupby(rawdf.chrompos).min(), start = rawdf.start.groupby(rawdf.chrompos).min(), stop = rawdf.stop.groupby(rawdf.chrompos).min(), minus_dp = rawdf[rawdf.strand == "-"].dp.groupby(rawdf.chrompos).max(), plus_dp = rawdf[rawdf.strand == "+"].dp.groupby(rawdf.chrompos).max(), dp = rawdf.dp.groupby(rawdf.chrompos).sum(), ) df = pd.DataFrame(df).reset_index() df["gene"] = df.gene.str.upper() ####################################################################################### def find_matching_exon_from_dict(row): gene = row["gene"] pos = row["pos"] exon_no = 0 try: for region in exons_per_gene[gene]: if region[0] < pos < region[1]: exon_no = region[2] break except: pass return exon_no df["exon_no"] = df.apply(find_matching_exon_from_dict, axis = 1) ####################################################################################### def plot_exon(row): start = int(row["exon_start"]) stop = int(row["exon_stop"]) size = stop - start #print start, stop rectangle = plt.Rectangle((start, -20), size, 10, fc='red') plt.gca().add_patch(rectangle) ####################################################################################### # plot individual bases vs chromosome locations all_genes = df.gene.unique() all_genes.sort() plot_no = 0####################################################################################### figure_length = len(all_genes) plt.figure(figsize=(15, figure_length)) upper = np.percentile(df[df.dp > 0].dp, 90) lower = -np.percentile(df[df.dp > 0].dp, 40) plt.ylim(lower, upper) for gene in all_genes: plot_no += 1 plt.subplot(len(all_genes), 1, plot_no) try: plt.ylabel(gene) gene= gene.upper() ###################################################################### gene_exons = exons[(gene_exons.gene_upper == gene)] gene_exons.apply(plot_exon, axis =1 ) x_start = int(gene_exons.head(1).exon_start) - 1000 x_stop = int(gene_exons.tail(1).exon_start) + 1000 plt.xlim(x_start, x_stop) plt.ylim(-20,210) ###################################################################### gene_df = df[(df.gene.str.upper() == gene)] gene_df["dp_capped"] = gene_df.apply(lambda x : 200 if x["dp"] > 200 else x["dp"], axis=1) pdf = gene_df[gene_df.dp > 10].sort(columns = ["pos"]) y = pdf.dp_capped x = pdf.pos plt.scatter(x,y, c="black", s=10) #plt.gray() plt.axhline(0) ###################################################################### # create x labels locs = [] labels = [] for i in range(1,int(gene_exons.exon_no.max())+1): if len(gene_exons[gene_exons.exon_no == i]) > 0: # exons have matching amplicons start = gene_exons[gene_exons.exon_no == i].exon_start.min() stop = gene_exons[gene_exons.exon_no == i].exon_stop.max() locs.append( np.mean([start, stop])) label = str(i) #if i % 2 == 0: # label = "\n" + str(i) #else: # label = str(i) + "\n" labels.append(label) plt.xticks(locs, labels) except: logging.warning( "Gene %s in sample %s is not plotted due to an error. Is it in the exon list?" % (gene, sample)) plt.tight_layout() sample = filename.split("/")[-1].split(".")[0] title = target_folder + sample + " raw exon coverage" plt.savefig(title.replace(" ", "_")+".png", dpi=300) plt.close() ####################################################################################### # plot boxplots vs exon counts all_genes = df.gene.unique() all_genes.sort() #all_genes = ["EFNA5", ] plot_no = 0 figure_length = len(all_genes) plt.figure(figsize=(15, figure_length)) upper = np.percentile(df[df.dp > 0].dp, 90) lower = -np.percentile(df[df.dp > 0].dp, 40) for gene in all_genes: plot_no += 1 plt.subplot(len(all_genes), 1, plot_no) #try: plt.ylabel(gene) plt.axhline(0) plt.ylim(lower, upper) ###################################################################### gene= gene.upper() gene_exons = exons[(exons.gene_upper == gene)] gene_df = df[(df.gene.str.upper() == gene)] gene_df["dp_capped"] = gene_df.apply(lambda x : upper if x["dp"] > upper else x["dp"], axis=1) pdf = df[(df.gene.str.upper() == gene)].sort(columns = ["pos"]) xs = [] ys = [] logging.debug( "-" * 150 ) logging.debug( gene ) if len(gene_exons.exon_no.values) > 0: for i in range(1,int(gene_exons.exon_no.max())+1): if len(gene_exons[gene_exons.exon_no == i]) > 0: # exons have matching amplicons if whitelist: logging.debug( "Exon %s, %s variants" % (i, whitelist.get_variants_per_exon(gene, i))) data = list(pdf[pdf.exon_no == i].dp) mean_x = pdf[pdf.exon_no == i].pos.mean() if data and mean_x: #print i, np.mean(data[0]), mean_x ys.append(data) xs.append(i) else: ys.append([0,0,0]) xs.append(i) else: # empty exon that can't have coverage midlevel = lower + (upper-lower)/2 plt.text(i, midlevel, "X", fontsize=16) ys.append([0,0,0]) xs.append(i) if len(xs) > 0: plt.boxplot(ys, positions=xs) if whitelist: ###################################################################### # create x labels locs = [] labels = [] covered_exon_count = 0 for i in range(1,int(gene_exons.exon_no.max())+1): #if len(gene_exons[gene_exons.exon_no == i]) > 0: # exons have matching amplicons covered_exon_count += 1 locs.append( i ) var_count = whitelist.get_variants_per_exon(gene, i) label = "%s.\n(%s)" % (covered_exon_count, var_count) labels.append(label) #else: # empty exon that can't have coverage # locs.append( i ) # label = "" # labels.append(label) plt.xticks(locs, labels) plt.tight_layout() sample = filename.split("/")[-1].split(".")[0] title = target_folder + sample + " summarized exon coverage" plt.savefig(title.replace(" ", "_")+".png", dpi=300) plt.close() return df
method, swap, verbose, **kwargs) kappa.append(k) cld = cluster_reproducibility(func, var, xyz, ngroups, coord, sigma, method, swap, verbose, **kwargs) cls.append(cld) kap.append(np.array(kappa)) clt.append(np.array(cls)) ################################################################################ # Visualize the results import matplotlib.pylab as mp mp.figure() mp.subplot(1,2,1) mp.boxplot(kap) mp.title('voxel-level reproducibility') mp.xticks(range(1,1+len(thresholds)),thresholds) mp.xlabel('threshold') mp.subplot(1,2,2) mp.boxplot(clt) mp.title('cluster-level reproducibility') mp.xticks(range(1,1+len(thresholds)),thresholds) mp.xlabel('threshold') mp.figure() q = 1 for threshold in thresholds: mp.subplot(3, len(thresholds)/3, q) rmap = map_reproducibility(func, var, xyz, ngroups,
stds[nmm, nc]=tempStds CIsMeanUB=CI.variables['meanUB'][ny+70,nj,ni] CIsMeanLB=CI.variables['meanLB'][ny+70,nj,ni] CIsStdsUB=CI.variables['sdUB'][ny+70,nj,ni] CIsStdsLB=CI.variables['sdLB'][ny+70,nj,ni] numMean=minNum_nSIF_mean_85[ny, nj, ni] numStd=minNum_nSIF_std_85[ny, nj, ni] plt.figure() plt.hlines(CIsMeanUB, 0, 31) plt.hlines(CIsMeanLB, 0, 31) plt.hlines(means[-1][0], 0, 31) plt.boxplot(means.T) plt.title('Example mean distributions ('+str(int(numMean))+' members needed), YEAR=' + str(yr)+ ' NI='+str(ni)+ 'NJ='+str(nj)) plt.ylabel('Number of open water days') plt.xlabel('Number of subsampled ensemble members') plt.savefig('SI_FigXx_numNeeded_Mean.'+rcpName[ittR]+'.'+nsk+'.pdf', format='pdf') #plt.show() plt.figure() plt.hlines(CIsStdsUB, 0, 31) plt.hlines(CIsStdsLB, 0, 31) plt.hlines(stds[-1][0], 0, 31) plt.boxplot(stds.T) plt.title('Example standard deviation distributions('+str(int(numStd))+' members needed), YEAR=' + str(yr)+ ' NI='+str(ni)+ 'NJ='+str(nj)) plt.ylabel('Number of open water days') plt.xlabel('Number of subsampled ensemble members') plt.savefig('SI_FigXx_numNeeded_STD.'+rcpName[ittR]+'.'+nsk+'.pdf', format='pdf')
def plot_cluster_expression(out,data1,data2,donor,gene, image): # function for setting the colors of the box plots pairs def setBoxColors(bp): setp(bp['boxes'][0], color='blue') setp(bp['caps'][0], color='blue') setp(bp['caps'][1], color='blue') setp(bp['whiskers'][0], color='blue') setp(bp['whiskers'][1], color='blue') setp(bp['fliers'][0], color='blue') setp(bp['fliers'][1], color='blue') setp(bp['medians'][0], color='blue') setp(bp['boxes'][1], color='red') setp(bp['caps'][2], color='red') setp(bp['caps'][3], color='red') setp(bp['whiskers'][2], color='red') setp(bp['whiskers'][3], color='red') setp(bp['fliers'][2], color='red') setp(bp['fliers'][3], color='red') setp(bp['medians'][1], color='red') N_probes=data1.shape[0] fig = figure() ax = axes() hold(True) s=1 f=2 p_value=[] t_stat=[] ticks=[] for i in range(N_probes): t,p=stats.ttest_ind(data1[i,:], data2[i,:]) bp = boxplot([data1[i,:],data2[i,:]], positions = [s, f], widths = 0.6) setBoxColors(bp) ticks.append( (s+f)/2. ) s+=3 f+=3 p_value.append(p) t_stat.append(t) hB, = plot([1,1],'b-') hR, = plot([1,1],'r-') xlim(0,f+2) ylim(2,20) legend((hB, hR),('Inside', 'Outside')) for i in range(N_probes): text(f+3,10-i,'Probe #{}: p-value={}'.format(i+1,np.round(p_value[i],3) ) ) ax.set_xticklabels(['probe #{}'.format(j) for j in range(1,N_probes+1)]) ax.set_xticks(ticks) title('Donor {}, Allen Brain expression of gene {} inside/outside clusters formed in {} image'.format(donor, gene,image)) hB.set_visible(False) hR.set_visible(False) try: savefig(os.path.join(out,donor+"_"+gene+".png") ) except: savefig(os.path.join(out,donor+"_"+gene+".svg") )
def boxplot_dti_movement(subs_df, figure_name): ''' Create a boxplot showing the 6 different ways of calculating displacement for dti scans. Label the outliers with their subid. ''' #=============================================================== # IMPORTS #--------------------------------------------------------------- import numpy as np import matplotlib.pylab as plt import pandas as pd import matplotlib as mpl #=============================================================== #=============================================================== # Define some measures we need #--------------------------------------------------------------- # First: the columns we're going to plot cols = [ name for name in subs_df.columns if 'mean_rms' in name ] # The total number of subjects n = subs_df.subid.count() # Define the colorbar that you want to use cmap = mpl.cm.gist_ncar norm = mpl.colors.Normalize(vmin=0, vmax=1) map = mpl.cm.ScalarMappable( norm, cmap) # Start the color counter color_counter = 1.0 # Make sure everyone is originally set with a color of 0 subs_df['color'] = 0.0 #=============================================================== # Make the figure #--------------------------------------------------------------- fig, ax = plt.subplots() # Make a box plot of the six different measures of movement box = plt.boxplot(subs_df[cols].values) # One of the pieces of information contained in the box variable # are the locations of the fliers (the outliers) for f in box['fliers']: # Get the information from each of the 12 positions that fliers # could be found in. # x_list: list of x positions, fliers_list: list of y positions x_list, fliers_list = f.get_data() # Sort the fliers_list so that they're in order smallest to largest # Note that you don't have to sort the x list because they're all the # same value :) fliers_list.sort() # Now loop through all the x, y pairs in the x_list and # fliers_list and define a counter (c) for c, (x, y) in enumerate(zip(x_list, fliers_list)): # You can find the subID for each of the outliers # by looking up the y value in the appropriate column #(indexed as x-1 because the plot doesn't start counting at 0) id = subs_df.subid[subs_df[cols[np.int(x-1)]]==y].values[0] # We're also going to set the color of each box so that it's the # same for each individual across plots. Note that you don't have to # do this step if the person already has a color. if subs_df.color[subs_df.subid==id] == 0: subs_df.color[subs_df.subid==id] = color_counter color_counter+=1 # Get the sub_color_id, this is the number that's been filled in # in the subs_df for this participant, and define the color that # will be used in the annotation sub_color_id = subs_df.color[subs_df.subid==id] color = map.to_rgba(10.0*sub_color_id.values[0]/n) # In order to make the labels flip sides left and right as # we go through each person we're going do something creative # with modulo division offset_x = -0.5 * np.float(c%2) + 0.25 + x offset_y = 0.25 + y # Annotate all the outliers with a box that contains their subid # and has a personalized color ax.annotate(id, xy=(x, y), xytext=(offset_x, offset_y), textcoords='data', ha='center', va='center', bbox=dict(boxstyle='round,pad=0.2', fc=color, alpha=0.5), arrowprops=dict(arrowstyle='->', color='black')) # Make the plot look nicer: # Lets make sure the labels all fit onto the x axis plt.xticks(range(1,len(cols)+1), cols, rotation=45) # And label the yaxis ax.set_ylabel('Displacement (mm)') # And set the y axis to being a little higher than the max so the labels fit! ylims = ax.get_ylim() ax.set_ylim(ylims[0], ylims[1]+0.5) # Don't know if this makes a difference, but hey, here's a try plt.tight_layout() # Name the figure and save it fig.savefig(figure_name, bbox_inches=0, dpi=100) return subs_df
#Plot a pie chart plt.cla() plt.pie(carDf.PRICE, labels=carDf.MODEL, shadow=True, autopct='%1.1f') #Plot a histogram plt.cla() plt.hist(data3, color='g') plt.title("Demo Histogram") plt.xlabel("Sin weights") plt.ylabel("Frequency") #Plot a box plot plt.cla() #Pass a List of Lists plt.boxplot([[carDf.WEIGHT[carDf.MAKE == 'Toyota']], [carDf.WEIGHT[carDf.MAKE == 'Ford']]], labels=('Toyota', 'Ford')) #---------------------------------------------------------------------------- # Data Acquisition #---------------------------------------------------------------------------- import os os.chdir("C:/Personal/V2Maestros/Modules/Python - Pandas") #File irisData = pd.read_csv("iris.csv") irisData irisData.describe() irisData['dummy'] = 1
#Plot scatter plt.cla() plt.scatter(carDf.PRICE, carDf.WEIGHT, color='r') #Plot bar charts plt.cla() plt.bar(carDf.ID, carDf.PRICE) plt.cla() plt.barh(carDf.ID, carDf.WEIGHT) plt.yticks(carDf.ID, carDf.MODEL) #Plot pie chart plt.cla() plt.pie(carDf.PRICE, labels=carDf.MODEL, shadow=True, autopct='%1.1f') #Plot a histogram plt.cla() plt.hist(data3, color='g') plt.title('Demo Histogram') plt.xlabel('Sin Weights') plt.ylabel('Frequency') #Plot a boxplot plt.cla() #pass a list of lists plt.boxplot([[carDf.WEIGHT[carDf.MAKE=='Toyota']], [carDf.WEIGHT[carDf.MAKE=='Ford']] ], labels=('Toyota','Ford')) #show show weights change by make of car
iris_data.head() """ No Cleansing is Required """ #Exploratory Data Analysis plt.scatter(iris_data['Petal.Length'],iris_data['Petal.Width']) plt.cla() plt.scatter(iris_data['Sepal.Length'],iris_data['Sepal.Width']) plt.cla() plt.boxplot([[iris_data['Petal.Length'][iris_data.Species=='setosa']], [iris_data['Petal.Length'][iris_data.Species=='versicolor']] , [iris_data['Petal.Length'][iris_data.Species=='virginica']] ], labels=('setosa','versicolor','virginica')) plt.cla() plt.boxplot([[iris_data['Petal.Width'][iris_data.Species=='setosa']], [iris_data['Petal.Width'][iris_data.Species=='versicolor']] , [iris_data['Petal.Width'][iris_data.Species=='virginica']] ], labels=('setosa','versicolor','virginica')) plt.cla() plt.boxplot([[iris_data['Sepal.Length'][iris_data.Species=='setosa']], [iris_data['Sepal.Length'][iris_data.Species=='versicolor']] , [iris_data['Sepal.Length'][iris_data.Species=='virginica']] ], labels=('setosa','versicolor','virginica')) #Note that sepal width is all over the place, a lot of overlap. Not a great predictor.
def feature_by_age_boxplot(age2vals, age2label, outfn, title='', xlabel='Age', ylabel='Value', scale_x = False, output_png=False, methods_str=''): fig = plt.figure() if len(age2vals.keys()) > 1: fig_width = fig.get_figwidth() fig_height = fig.get_figheight() fig.set_figwidth(fig_width * 1.7) fig.set_figheight(fig_height * 1.2) ax1 = fig.add_subplot(111) plt.subplots_adjust(bottom=0.26) ages = sorted(age2label.keys()) box_data = [] box_pos = [] for i, age in enumerate(ages): if age in age2vals: box_data.append(age2vals[age]) box_pos.append(age if scale_x else i) else: box_data.append([]) box_pos.append(age if scale_x else i) bp = plt.boxplot(box_data, widths=.6, sym='', patch_artist=True, positions=box_pos) plt.setp(bp['boxes'], color='#99CCFF', edgecolor="black", lw=1) #plt.setp(bp['boxes'], color='darkkhaki', edgecolor="black", lw=1) plt.setp(bp['whiskers'], color='black', lw=1) plt.setp(bp['medians'], color='black', lw=1.5) plt.setp(bp['caps'], color='black', lw=1) for i, age in enumerate(ages): x_pos = age if scale_x else i if age in age2vals: ax1.plot(x_pos, numpy.average(age2vals[age]), 'x', color='red', markersize=6, markeredgewidth=1.5) if age2label: labels = [] for age in ages: if age < 0: labels.append('') elif age in age2label: np = len(age2vals[age]) if age in age2vals else 0 labels.append('%s (%d)' % (age2label[age], age)) #labels.append('%s (n=%d)' % (age2label[age], np)) else: labels.append('') xtickNames = plt.setp(ax1, xticklabels = labels) plt.setp(xtickNames, fontsize=10) plt.setp(xtickNames, rotation=45) plt.setp(xtickNames, horizontalalignment='right') ymin = ax1.viewLim.ymin ymax = ax1.viewLim.ymax y_range = ymax - ymin #ax1.set_ylim(-.05 * ymin, 1.05 * ymax) ax1.set_ylim(ymin - (.02 * y_range), ymax + (.02 * y_range)) xmin = ax1.viewLim.xmin xmax = ax1.viewLim.xmax x_range = xmax - xmin pad = .02 * x_range ax1.set_xlim(xmin - pad, xmax + pad) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.title(title) plt.figtext(0.1, 0.01, '[DB: '+methods_str+']', size=10) plt.savefig(outfn) if output_png: plt.savefig(outfn.replace('.pdf', '.png')) return
def plot_boxplot(data_without_orig, data_with_orig, labels, filename): data_without = [] data_with = [] print "Medians (%s):"%filename big_y = False for idx in range(len(data_with_orig)): median_without = numpy.median(data_without_orig[idx]) median_with = numpy.median(data_with_orig[idx]) if median_without/1000 > 1000: big_y = True print " * %d: With: %f, Without: %f"%(idx, median_with, median_without) prct_change = (median_without-median_with)/median_without print " * %d: Percent Change: %f"%(idx,prct_change) # data_without.append( data_without_orig[idx] / median_without ) # data_with.append( data_with_orig[idx] / median_without ) # convert to Megabyte/sec data_without_orig[idx] = [x/1000 for x in data_without_orig[idx]] data_with_orig[idx] = [x/1000 for x in data_with_orig[idx]] data_without.append( data_without_orig[idx] ) data_with.append( data_with_orig[idx] ) fig, ax1 = plt.subplots(figsize=(10,6)) index = numpy.arange(len(data_without))+1 bar_width=.1 widths = numpy.ones(len(data_without))*bar_width*2 bp = pylab.boxplot(data_without, positions=index-bar_width, widths=widths, sym='') bp2 = pylab.boxplot(data_with, positions=index+bar_width, widths=widths, sym='') plt.setp(bp['boxes'], color='black') plt.setp(bp['whiskers'], color='black') plt.setp(bp['fliers'], color='grey', marker='+') plt.setp(bp2['boxes'], color='black') plt.setp(bp2['whiskers'], color='black') plt.setp(bp2['fliers'], color='grey', marker='+') boxColors = ['white','grey'] numBoxes = len(data_without) medians = range(numBoxes) for i in range(numBoxes): # Box 1 box = bp['boxes'][i] boxX = [] boxY = [] for j in range(5): boxX.append(box.get_xdata()[j]) boxY.append(box.get_ydata()[j]) boxCoords = zip(boxX,boxY) # Alternate between Dark Khaki and Royal Blue k = i % 2 boxPolygon = plt.Polygon(boxCoords, facecolor=boxColors[0]) ax1.add_patch(boxPolygon) # Now draw the median lines back over what we just filled in med = bp['medians'][i] medianX = [] medianY = [] for j in range(2): medianX.append(med.get_xdata()[j]) medianY.append(med.get_ydata()[j]) plt.plot(medianX, medianY, 'k') medians[i] = medianY[0] # Box 2 box = bp2['boxes'][i] boxX = [] boxY = [] for j in range(5): boxX.append(box.get_xdata()[j]) boxY.append(box.get_ydata()[j]) boxCoords = zip(boxX,boxY) # Alternate between Dark Khaki and Royal Blue boxPolygon = plt.Polygon(boxCoords, facecolor=boxColors[1]) ax1.add_patch(boxPolygon) # Now draw the median lines back over what we just filled in med = bp2['medians'][i] medianX = [] medianY = [] for j in range(2): medianX.append(med.get_xdata()[j]) medianY.append(med.get_ydata()[j]) plt.plot(medianX, medianY, 'k') medians[i] = medianY[0] plt.grid('on') plt.xlim(0,len(labels)+1) # Conver to KB labels = [int(x)/1024 for x in labels] plt.xticks(index, labels) plt.xlabel("File Size (MB)", fontsize=20) plt.ylabel("Disk Throughput (MB/sec)", fontsize=20) for tick in ax1.xaxis.get_major_ticks(): tick.label.set_fontsize(15) for tick in ax1.yaxis.get_major_ticks(): tick.label.set_fontsize(15) # Labels if not big_y: plt.figtext(0.13, 0.18, 'Uninstrumented' , backgroundcolor=boxColors[0], color='black', weight='roman', size=15, bbox=dict(facecolor=boxColors[0], edgecolor='black', boxstyle='round,pad=1')) plt.figtext(0.35, 0.18, 'With Instrumentation', backgroundcolor=boxColors[1], color='white', weight='roman', size=15, bbox=dict(facecolor=boxColors[1], edgecolor='black', boxstyle='round,pad=1')) else: plt.figtext(0.16, 0.18, 'Uninstrumented' , backgroundcolor=boxColors[0], color='black', weight='roman', size=15, bbox=dict(facecolor=boxColors[0], edgecolor='black', boxstyle='round,pad=1')) plt.figtext(0.38, 0.18, 'With Instrumentation', backgroundcolor=boxColors[1], color='white', weight='roman', size=15, bbox=dict(facecolor=boxColors[1], edgecolor='black', boxstyle='round,pad=1')) # plt.show() plt.tight_layout() plt.savefig(filename, format='eps', dpi=1000)
else: # Just in case, replace Missing Values with zero: data[column].fillna(0, inplace=True) print 'Missing values replaced with zeros.' print ' ' Col = preprocessing.scale(data[column]) skness = skew(Col) xlabel = str(skness) figure = plt.figure() print 'Skewness =', skness figure.add_subplot(121) plt.hist(Col, facecolor='lightblue', alpha=0.75) plt.xlabel( " Skewness greater than zero shows large skewed distribution --> ") plt.title(column) plt.text(2, 100000, "Skewness: {0:.2f}".format(skness)) figure.add_subplot(122) plt.boxplot(Col) plt.title("Skewed Distribution") plt.xlabel(xlabel) plt.show() print '\nHasta la vista, human.\n'
for i in range(n_controls) if i!=n] test = control_covs[n] control_model.fit(train) control_fit_cv.append(control_model.log_lik(test)) patient_fit_cv += np.array([control_model.log_lik(p) for p in patient_covs]) patient_fit_cv /= n_controls import matplotlib.pylab as pl pl.rcParams['text.usetex'] = True pl.rcParams['text.latex.preamble'] = r'\usepackage{amsfonts}' pl.figure(1, figsize=(1, 3)) pl.clf() ax = pl.axes([.2, .2, .5, .7]) pl.boxplot([control_fit_cv, patient_fit_cv], widths=.25) pl.plot(1.26*np.ones(len(control_fit_cv)), control_fit_cv, '+k', markeredgewidth=1) pl.plot(2.26*np.ones(len(patient_fits)), patient_fit_cv, '+k', markeredgewidth=1) pl.xticks((1.13, 2.13), ('controls', 'patients'), size=13) if WHITEN: title = 'Tangent\nspace' else: title = r'$\mathbb{R}^{n\times n}$' pl.text(.1, .1, title, transform=ax.transAxes, horizontalalignment='left', verticalalignment='bottom', size=12)
def plot_all_metrics(metrics, gene_names, all_learn_options, save, plots=None, bottom=0.19): num_methods = len(metrics.keys()) metrics_names = metrics[metrics.keys()[0]].keys() num_genes = len(gene_names) width = 0.9/num_methods ind = np.arange(num_genes) if save==True: first_key = all_learn_options.keys()[0] #basefile = r"..\results\V%s_trmetric%s_%s" % (all_learn_options[first_key]["V"], all_learn_options[first_key]["training_metric"], datestamp()) basefile = r"..\results\%s" % (first_key) d = os.path.dirname(basefile) if not os.path.exists(d): os.makedirs(d) with open(basefile + ".plot.pickle", "wb") as f: pickle.dump([metrics, all_learn_options, gene_names], f) for metric in metrics_names: if 'global' not in metric: plt.figure(metric, figsize=(20, 8)) elif plots == None or 'gene level' in plots: plt.figure(metric, figsize=(12, 12)) boxplot_labels = [] boxplot_arrays = {} boxplot_median = {} for i, method in enumerate(metrics.keys()): boxplot_labels.append(method) for metric in metrics[method].keys(): if 'global' in metric: plt.figure(metric) plt.bar([i], metrics[method][metric], 0.9, color=plt.cm.Paired(1.*i/len(metrics.keys())), label=method) else: if plots == None or 'gene level' in plots: plt.figure(metric) plt.bar(ind+(i*width), metrics[method][metric], width, color=plt.cm.Paired(1.*i/len(metrics.keys())), label=method) median_metric = np.median(metrics[method][metric]) print method, metric, median_metric assert not np.isnan(median_metric), "found nan for %s, %s" % (method, metric) if metric not in boxplot_arrays.keys(): boxplot_arrays[metric] = np.array(metrics[method][metric])[:, None] boxplot_median[metric] = [np.median(np.array(metrics[method][metric]))] else: boxplot_arrays[metric] = np.concatenate((boxplot_arrays[metric], np.array(metrics[method][metric])[:, None]), axis=1) boxplot_median[metric].append(np.median(np.array(metrics[method][metric]))) for metric in metrics_names: if plots == None or 'gene level' in plots: ax = plt.figure(metric) leg = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # leg.draggable(state=True, use_blit=True) plt.ylabel(metric) if 'global' in metric: plt.xticks(range(len(metrics.keys())), metrics.keys(), rotation=70) plt.grid(True, which='both') plt.subplots_adjust(left = 0.05, right = 0.8) else: plt.xticks(ind+width, gene_names) plt.grid(True, which='both') plt.subplots_adjust(left = 0.05, right = 0.8) if save == True: plt.xticks(ind+0.5, gene_names) if metric=='AUC': plt.ylim([0.5, 1.0]) plt.savefig(basefile + "_" + metric + "_bar" + ".png") if (plots == None or "boxplots" in plots) and 'global' not in metric: plt.figure('Boxplot %s' % metric) sorted_boxplot = np.argsort(boxplot_median[metric])[::-1] plt.boxplot(boxplot_arrays[metric][:, sorted_boxplot]) plt.ylabel(metric) plt.xticks(range(1, num_methods+1), np.array(boxplot_labels)[sorted_boxplot], rotation=70) plt.subplots_adjust(top = 0.97, bottom = bottom) if metric == 'RMSE': plt.ylim((1.0, 2.0)) if save == True: plt.savefig(basefile + "_" + metric + ".png")
def plot(et_name, iv_measure, single_eyetracker_results): import numpy as np import matplotlib.pyplot as plt from matplotlib.patches import Polygon # Generate some data from five different probability distributions, # each with different characteristics. We want to play with how an IID # bootstrap resample of the data preserves the distributional # properties of the original sample, and a boxplot is one visual tool # to make this assessment numDists = len(single_eyetracker_results.keys()) distNames = single_eyetracker_results.keys() print 'distNames:',distNames data = single_eyetracker_results.values() ########################################### fig = plt.figure(figsize=(10,6)) fig.canvas.set_window_title(et_name+' : '+iv_measure) ax1 = fig.add_subplot(111) plt.subplots_adjust(left=0.075, right=0.95, top=0.9, bottom=0.25) bp = plt.boxplot(data, notch=0, sym='', vert=1, whis=1.5) plt.setp(bp['boxes'], color='black') plt.setp(bp['whiskers'], color='blue') plt.setp(bp['fliers'], color='red', marker='+') # Add a horizontal grid to the plot, but make it very light in color # so we can use it for reading data values but not be distracting ax1.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5) # Hide these grid behind plot objects ax1.set_axisbelow(True) ax1.set_title('Comparison of Sample Selection Window Algorithms\n'+et_name+' : '+iv_measure) ax1.set_xlabel('Window Type') ax1.set_ylabel(iv_measure) # Now fill the boxes with desired colors boxColors = ['darkkhaki']#,'royalblue'] numBoxes = numDists#*2 medians = range(numBoxes) for i in range(numBoxes): med = bp['medians'][i] medianX = [] medianY = [] for j in range(2): medianX.append(med.get_xdata()[j]) medianY.append(med.get_ydata()[j]) plt.plot(medianX, medianY, 'k') medians[i] = medianY[0] plt.plot([np.average(med.get_xdata())], [np.average(data[i])], color='g', marker='*', markeredgecolor='k') # Set the axes ranges and axes labels ax1.set_xlim(0.5, numBoxes+0.5) bottom, top = ax1.get_ylim() top = top+0.1 top = min(top, 10.0) bottom = bottom-0.1 ax1.set_ylim(bottom, top) xtickNames = plt.setp(ax1, xticklabels=distNames) plt.setp(xtickNames, rotation=45, fontsize=8) # Due to the Y-axis scale being different across samples, it can be # hard to compare differences in medians across the samples. Add upper # X-axis tick labels with the sample medians to aid in comparison # (just use two decimal places of precision) pos = np.arange(numBoxes)+1 upperLabels = [str(np.round(s, 3)) for s in medians] weights = ['bold', 'semibold'] for tick,label in zip(range(numBoxes),ax1.get_xticklabels()): k = 0#tick % 2 ax1.text(pos[tick], top-(top*0.05), upperLabels[tick], horizontalalignment='center', size='x-small', weight=weights[k], color=boxColors[k]) plt.savefig('%s_%s.png'%(et_name,iv_measure), bbox_inches='tight') plt.close()
get_ipython().magic(u'time predicted_tags = [np.array(tagrank.get_ranking(title))[:,0] for title in product_log_test.title_tokens]') def calculate_nhits(pred_tags,true_tags): """ Find number of hits of the predicted results """ return len(set(pred_tags).intersection(true_tags)) n_hits = map(lambda (p,t): calculate_nhits(p,t), zip(predicted_tags, product_log_test.query_tokens.values)) import matplotlib.pylab as plt import seaborn as sns get_ipython().magic(u'matplotlib inline') plt.boxplot(n_hits) plt.title("Number of hits at top 5 tags") print "average number of hits at Top 5: ", np.average(n_hits) # ##### Print some results for test data def print_ranking(i): print print test_data.product_name.iloc[i]
for v, dist in zip(mse_t2_avg, mse_avg): m_ = np.count_nonzero(v >= dist) p2_.append(m_) p2_ = np.array(p2_)/2000. p3_ = [] mse_avg = mse_.mean(1) mse_t3_avg = mse_t3.mean(1) for v, dist in zip(mse_t3_avg, mse_avg): m_ = np.count_nonzero(v >= dist) p3_.append(m_) p3_ = np.array(p3_)/2000. pl.boxplot(mse_avg.T, showmeans=True, showfliers=False) pl.scatter(np.arange(1,79), mse_t1_avg, c='b') pl.scatter(np.arange(1,79)[p1_<=0.05], mse_t1_avg[p1_<=0.05], c='b', s=45) pl.scatter(np.arange(1,79), mse_t2_avg, c='g') pl.scatter(np.arange(1,79)[p2_<=0.05], mse_t2_avg[p2_<=0.05], c='g', s=45) pl.scatter(np.arange(1,79), mse_t3_avg, c='r') pl.scatter(np.arange(1,79)[p3_<=0.05], mse_t3_avg[p3_<=0.05], c='r', s=45) ############### Controls ################### gm_can = np.genfromtxt('/home/robbis/Share/CAN_NET_GMperc.csv', skip_header=1, delimiter=',') can_labels = ['MCC','R_aINS','L_pINS','L_AMY'] repetitions = 200 n_permutation = 2000 arg_ = np.argsort(np.abs(corr))[::-1] mse_can = np.zeros((arg_.shape[0], len(algorithms_), repetitions, gm_can.shape[1]))