def UseSeaborn(palette='deep'): """Call to use seaborn plotting package """ import seaborn as sns #No Background fill, legend font scale, frame on legend sns.set(style='whitegrid', font_scale=1.5, rc={'legend.frameon': True}) #Mark ticks with border on all four sides (overrides 'whitegrid') sns.set_style('ticks') #ticks point in sns.set_style({"xtick.direction": "in","ytick.direction": "in"}) # sns.choose_colorbrewer_palette('q') #Nice Blue,green,Red # sns.set_palette('colorblind') if palette == 'xkcd': #Nice blue, purple, green sns.set_palette(sns.xkcd_palette(xkcdcolors)) else: sns.set_palette(palette) #Nice blue, green red # sns.set_palette('deep') # sns.set_palette('Accent_r') # sns.set_palette('Set2') # sns.set_palette('Spectral_r') # sns.set_palette('spectral') #FIX INVISIBLE MARKER BUG sns.set_context(rc={'lines.markeredgewidth': 0.1})
def plot_morph(good_spikes, cluster, morph_dim, spacing=.02, ymax=.04): plt.figure(figsize=(20,20)) with sns.color_palette(sns.xkcd_palette(["twilight blue", "kermit green"]), 2): plt.subplot(222) stim_name = morph_dim[1]+"_rec" spks2plot = good_spikes[(good_spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))] plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name) stim_name = morph_dim+'128' spks2plot = good_spikes[(good_spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))] plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name) plt.legend(loc=1) ax = plt.gca() ax.plot((0, 0), (0, ymax), c=".2", alpha=.5) ax.plot((stim_length, stim_length), (0, ymax), c=".2", alpha=.5) xlim(-.5, 1) ylim(0,ymax) plt.xticks([0, .5]) plt.yticks([0, .5*ymax, ymax]) plt.title('cell: %d morph dim: %s' % (cluster, morph_dim)) plt.subplot(224) stim_name = morph_dim[0]+"_rec" spks2plot = good_spikes[(spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))] plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name) stim_name = morph_dim+'001' spks2plot = good_spikes[(spikes['cluster']==cluster) & (good_spikes['stim_name'].str.contains(stim_name))] plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, label=stim_name) plt.legend(loc=1) ax = plt.gca() ax.plot((0, 0), (0, ymax), c=".2", alpha=.5) ax.plot((stim_length, stim_length), (0, ymax), c=".2", alpha=.5) xlim(-.5, 1) ylim(0,ymax) plt.xticks([0, .5]) plt.yticks([0, .5*ymax, ymax]) with sns.color_palette(sns.diverging_palette(262, 359, s=99, l=43, sep=1, n=128, center="dark"), 128): plt.subplot(121) spks_morph = good_spikes[(good_spikes['cluster']==cluster) & (good_spikes['morph_dim']==morph_dim)] morph_ymax = 128*spacing+ymax for morph_pos in np.unique(spks_morph['morph_pos'].values): stim_name = morph_dim + str(int(morph_pos)) spks2plot = spks_morph[spks_morph['morph_pos'] == morph_pos] plot_fr_se(spks2plot["stim_aligned_time"].values, spks2plot["stim_presentation"].values, offset=morph_pos*spacing, label=stim_name) ax = plt.gca() ax.plot((0, 0), (0, morph_ymax), c=".2", alpha=.5) ax.plot((stim_length, stim_length), (0, morph_ymax), c=".2", alpha=.5) xlim(-.5, 1) ylim(0,morph_ymax) plt.xticks([0, .5]) plt.yticks([]) plt.tick_params(axis='y', which='both', bottom='off', top='off', labelbottom='off') sns.despine()
def create_impDum_barplot(ginfo): #runs to loop through suffix_list = ["_impDums", "", "_dumsOnly"] #labels to appear in graph legend list_desc = ["Clinical values + imputation indicators", "Clinical values only", "Imputation indicators only"] predictor_desc = "covarlist_all" figName = ginfo.FileNamePrefix + '_' + predictor_desc + ginfo.patient_sample + '_ImpAnalysis' tableName = ginfo.FileNamePrefix + '_' + predictor_desc + '_' + ginfo.patient_sample + '.txt' resultsDF = pd.read_csv(outDir + 'R_' + tableName, sep=",") alg_names = resultsDF['Unnamed: 0'] #algorithm names print "alg_names: " , alg_names initial_pos = np.arange(len(alg_names))*( len(suffix_list)+1)+len(suffix_list)+1 bar_width = 1 colors = ["amber","windows blue","greyish"] mycolors = sns.xkcd_palette(colors) plt.figure(figsize=(6.7,8)) #cycle through each patient list plots = [] for counter, suffix in enumerate(suffix_list): tableName = ginfo.FileNamePrefix + '_' + predictor_desc + '_' + \ ginfo.patient_sample + suffix + '.txt' resultsDF = pd.read_csv(outDir + 'R_' + tableName, sep=",") measurements = np.array(resultsDF['cvAUC']) z = stats.norm.ppf(.95) SEs = [( np.array(resultsDF['cvAUC']) - np.array(resultsDF['ci_low']) )/z, ( np.array(resultsDF['ci_up']) - np.array(resultsDF['cvAUC']) )/z ] alg_pos = initial_pos - counter print "measurements: " , measurements print "alg_pos: " , alg_pos plot = plt.barh(bottom=alg_pos, width=measurements, height=bar_width, xerr=SEs, error_kw=dict(ecolor='.1', lw=1, capsize=1, capthick=1), align='center', alpha=1, color=mycolors[counter], label=list_desc[counter]) plots.append(plot) plt.xlabel = "cvAUC" plt.xlim(.5, 1) plt.ylim(0,max(initial_pos)+2) print "counter: " , counter plt.yticks(initial_pos - counter/2, alg_names) plt.legend(prop={'size':8}) plt.tight_layout() plt.savefig(outDir + figName + '.eps', dpi=1200) plt.close()
def iterateAlignment( self ): # Setup drawing #colors = ["purple", "light purple", # "blue", "cyan", "neon blue"] #"red", "rose", #"green", "bright green", "mint"] # roygbv co = ['lightish red', 'yellowish orange', 'canary yellow', 'lime', 'cyan']#,'lavender] co = random.sample( sns.xkcd_rgb.keys(), self.n ) pal = sns.xkcd_palette( co ) for i in range( self.nIters ): print i f, (ax1,ax2) = plt.subplots(1,2, sharex= True, sharey=True) ## Calculate mean shape self.calcMeanShape( ) ax1.plot( self.meanShape.xs, self.meanShape.ys, c = 'k', lw = 1 ) ## Normalize mean shape self.normShape( self.meanShape ) map( lambda t : t.draw( pal, ax1 ), self.allShapes ) ## Realign self.alignAllShapes( ) map( lambda t : t.draw( pal, ax2 ), self.allShapes ) ax2.plot( self.meanShape.xs, self.meanShape.ys, c = 'k', lw = 1 ) # Draw change self.calcMeanShape() f.savefig( "C:/Users/Valerie/Desktop/stars/plots%d/%d.png" % (self.n, i ) ) f.clear() plt.close() i += 1
def iterateAlignment( self ): # Setup drawing f, (ax1,ax2) = plt.subplots(1,2)#, sharex= True, sharey=True) colors = ["purple", "light purple", "blue", "cyan", "neon blue", "red", "rose", "green", "bright green", "mint"] pal = sns.xkcd_palette( colors ) iter = 0 # Draw no change self.calcMeanShape() self.drawAll( ax1, pal ) # 1. Align to first shape (instantiation) self.alignAllShapes( self.allShapes[0] ) while( iter < 500 ): print iter if iter > 0: f, (ax1,ax2) = plt.subplots(1,2, sharex= True, sharey=True) self.drawAll( ax1, pal ) #previous iter ## Calculate mean shape self.calcMeanShape( ) ## Normalize mean shape to first shape self.normTrans( self.allShapes[0] ) ## Realign self.alignAllShapes( self.meanShape ) # Draw change self.calcMeanShape() self.drawAll( ax2, pal ) plt.legend() f.savefig( "C:/Users/Valerie/Desktop/stars/plots5/%d.png" % iter ) f.clear() plt.close() iter += 1
def __init__(self, df, var1, var2, classvar, nn_range=range(1,101), granularity=50., buffer_denom=15., figsize=(9,7), dotsize=70, point_colors=sns.xkcd_palette(['windows blue', 'amber']), mesh_colors=['#8FCCFF', '#FFED79']): self.df = df self.var1 = var1 self.var2 = var2 self.classvar = classvar self.nn_range = nn_range self.granularity = granularity self.buffer_denom = buffer_denom self.figsize = figsize self.dotsize = dotsize self.point_colors = point_colors self.mesh_colors = mesh_colors
def iterateAlignment( self ): # Setup drawing #colors = ["purple", "light purple", # "blue", "cyan", "neon blue"] #"red", "rose", #"green", "bright green", "mint"] # roygbv co = ['lightish red', 'yellowish orange', 'canary yellow', 'lime', 'cyan']#,'lavender] pal = sns.xkcd_palette( co ) for i in range( self.nIters ): f, (ax1,ax2) = plt.subplots(1,2)#, sharex= True, sharey=True) ## Calculate mean shape self.calcMeanShape( ) ax1.plot( self.meanShape.xs, self.meanShape.ys, 'k' ) ## Normalize mean shape self.normMeanShape( ) for sh in self.allShapes: sh.draw( pal, ax1) ## Realign self.alignAllShapes( ) for sh in self.allShapes: sh.draw( pal, ax2 ) ax2.plot( self.meanShape.xs, self.meanShape.ys, 'k' ) # Draw change self.calcMeanShape() f.savefig( "C:/Users/Valerie/Desktop/stars/plots5/%d.png" % i ) f.clear() plt.close() i += 1
def alignTrainingSet( self ): ## Setup drawing co = random.sample( sns.xkcd_rgb.keys(), self.n ) pal = sns.xkcd_palette( co ) for i in range( self.nIters ): start = time.time() # Calculate mean shape self.asm.meanShape = self.asm.calcMeanShape() if i == 0: map( lambda t : t.draw( pal, plt ), self.asm.allShapes ) plt.plot( self.asm.meanShape.xs, self.asm.meanShape.ys, c = 'k', lw = 1 ) plt.gca().invert_yaxis() plt.savefig( os.path.join( self.out, "no-alignment-%d.png" % i ) ) plt.close() # Normalize mean shape self.asm.normMeanShape = self.asm.normShape( self.asm.meanShape ) # Align all shapes to normalized mean shape self.asm.allShapes = self.alignAllShapes() map( lambda t : t.draw( pal, plt ), self.asm.allShapes ) plt.plot( self.asm.normMeanShape.xs, self.asm.normMeanShape.ys, c = 'k', lw = 1 ) plt.gca().invert_yaxis() plt.savefig( os.path.join( self.out, "alignment-%d.png" % ( i ) ) ) plt.close() with open( os.path.join( self.out, 'log.txt' ), 'a' ) as of: of.write( "AlignIter: %f\n" % ( time.time() - start ) ) of.write( '%d\n\n' % i ) print i return self.asm
def prepare_and_plot_1_2(dataset_name): deltas = [ '1.0E-5', '1.0E-6', '1.0E-7', '1.0E-8', '1.0E-9', '1.0E-10', '1.0E-11', '1.0E-12', '1.0E-13', '1.0E-14', '1.0E-15', '1.0E-16', '1.0E-17', '1.0E-18', '1.0E-19', '1.0E-20' ] deltas.reverse() if dataset_name == 'adult' or dataset_name == 'housing': ks = list(range(200, 4701, 300)) else: raise RuntimeError("Does not recognise dataset", dataset_name) path_in = os.getcwd() pattern = '^.*/thesis-data-anonymisation/' path = re.search(pattern, path_in).group(0) file_path_safepub = path + 'data/result/safepub_test/1_2/' + dataset_name file_path_sc = path + 'data/result/sc_test/1_2/' + dataset_name pattern = "^norm_result_" plot_path = path+'data/result/plots/needs_editing/' \ '1_2_'+dataset_name+'.jpg' files_safepub = [ file for file in os.listdir(file_path_safepub) if re.match(pattern, file) ] files_sc = [ file for file in os.listdir(file_path_sc) if re.match(pattern, file) ] information_loss = [] models = [] parameters = [] for file in files_sc: k = int(re.split(pattern + "k_|.csv", file)[1]) res_data = pd.read_csv(file_path_sc + '/' + file) data = res_data['sse'] information_loss += list(data) models += ['MicroDP'] * len(data) parameters += [k] * len(data) for file in files_safepub: delta = re.split(pattern + "delta_|.csv", file)[1] res_data = pd.read_csv(file_path_safepub + '/' + file) data = res_data['sse'] information_loss += list(data) models += ['SafePub'] * len(data) parameters += [ks[deltas.index(delta)]] * len(data) array = np.array([information_loss, parameters]).T df = pd.DataFrame(data=array, columns=['Information loss', 'k/𝛿']) df['Model'] = models ax = sns.lineplot(x='k/𝛿', y='Information loss', hue='Model', style='Model', data=df, palette=sns.xkcd_palette(['windows blue', 'amber']), markers=['o', 'o'], dashes=False) ax.set(ylim=(0.0, 0.75)) #plt.show() plt.savefig(plot_path) plt.clf()
def prepare_and_plot_2_1(dataset_name): match = "^norm_result_" path_in = os.getcwd() pattern = '^.*/thesis-data-anonymisation/' path = re.search(pattern, path_in).group(0) safepub_path = path + "data/result/safepub_test/2_1/" + dataset_name sc_path = path + "data/result/sc_test/2_1/" + dataset_name plot_path = path + "data/result/plots/2_1_" + dataset_name + ".jpg" if dataset_name == 'adult': attribute_range = list(range(2, 9)) elif dataset_name == 'housing': attribute_range = list(range(2, 10)) elif dataset_name == 'musk': attribute_range = list(range(2, 21)) else: raise RuntimeError("Does not recognise dataset", dataset_name) num_attributes = [] information_loss = [] models = [] for a in attribute_range: sc_files = [ file for file in os.listdir(sc_path + '/' + str(a)) if re.match(match, file) ] safepub_files = [ file for file in os.listdir(safepub_path + '/' + str(a)) if re.match(match, file) ] for file in sc_files: df = pd.read_csv(sc_path + '/' + str(a) + '/' + file) data = list(df['sse']) information_loss += data num_attributes += [a] * len(data) models += ['MicroDP'] * len(data) for file in safepub_files: df = pd.read_csv(safepub_path + '/' + str(a) + '/' + file) data = list(df['sse']) information_loss += data num_attributes += [a] * len(data) models += ['SafePub'] * len(data) array = np.array([num_attributes, information_loss]).T df = pd.DataFrame(array, columns=['Number of attributes', 'Information loss']) df['Model'] = models ax = sns.lineplot(x='Number of attributes', y='Information loss', hue='Model', data=df, palette=sns.xkcd_palette(['windows blue', 'amber'])) #ax.set(ylim=(0.0, 1.0)) ax.set(yscale='log') plt.show() #plt.savefig(plot_path) plt.clf() return
def create_LCMS_barplot(ginfo, LCMScompare, outcome, FileNameSuffix2): """Bar plot with bars grouped by predictor set and colors indicating LCMS run LCMScompare = "NPbins_v_RPbins" to compare NP vs. RP using binned data LCMScompare = "NPbins_v_MassHuntNP" to comapre NP binned vs. NP mass hunter """ if LCMScompare == "NPbins_v_RPbins": inLCMSData_list = ['NPbins50x50', 'RPbins50x50'] #datafile names inLCMSData_desc = ['Normal phase, 50x50 intensity grid', 'Reverse phase, 50x50 intensity grid'] #graph labels color_list = ["taupe", "plum"] #xkcd colors elif LCMScompare == "NPbins_v_MassHuntNP": inLCMSData_list = ['NPbins50x50', 'MassHuntNP'] inLCMSData_desc = ['Normal phase, 50x50 intensity grid', 'Normal phase, Mass Hunter'] color_list = ["taupe", "dark teal"] elif LCMScompare == "NonInvasives": inLCMSData_list = ['SalivaMH','UrineMH'] inLCMSData_desc = ['Saliva','Urine'] color_list = ["sky blue","marine blue"] elif LCMScompare == "MassHunt _RPvNP": inLCMSData_list = ['MassHuntRP_noFill','MassHuntNP'] inLCMSData_desc = ['Normal phase, Mass Hunter','Reverse phase, Mass Hunter'] color_list = ["plum","teal"] elif LCMScompare == "RP_noFillvFill": #eventually may instead want MassHuntRP_fill vs. MassHuntRP_isotope inLCMSData_list = ['MassHuntRP_noFill','MassHuntRP_fill'] inLCMSData_desc = ['RP Mass Hunter - no Fill','RP Mass Hunter - with Fill'] color_list = ["plum","mauve"] #'sea blue' #first name listed will appear closest to bottom of y-axis predcat_names = ['Clinical+LCMS','LCMS only','Clinical only'] alg_list = ['Super Learner','Gradient Boost','AdaBoost','Random Forests'] figName = ginfo.FileNamePrefix + '_' + LCMScompare plt.figure(figsize=(6.7,8)) ## Prepare data to be graphed df_list = [] for inLCMSData in inLCMSData_list: for predcat in predcat_names: if predcat=='Clinical+LCMS': resultsDF = pd.read_csv(outDir + 'R_' + ginfo.FileNamePrefix + \ "_covarlist_all_" + inLCMSData + FileNameSuffix2 + '.txt', sep=",") elif predcat=='Clinical only': resultsDF = pd.read_csv(outDir + 'R_' + ginfo.FileNamePrefix + \ "_covarlist_all_" + inLCMSData + 'patients' + FileNameSuffix2 + '.txt', sep=",") elif predcat=='LCMS only': resultsDF = pd.read_csv(outDir + 'R_' + ginfo.FileNamePrefix + \ "_covarlist_" + inLCMSData + FileNameSuffix2 + '.txt', sep=",") df_list.append(resultsDF) ## To fill in during loop positions = [] measurements = [] colors = [] method_labels = [] colors_legend = [] ytick_labels = [] ytick_positions = [] SEs = [] ymax = 0 bar_width = 1 mycolor_list = sns.xkcd_palette(color_list) #loop thru predcat_names ("clinical only", "lcms only" etc.) for p, predcat in enumerate(predcat_names): #cycle through algorithm list ('adaboost', 'RF', etc.) for a, alg in enumerate(alg_list): #cycle LCMS methods ('urine','RP','NP','masshunt' etc.) for d, dataType in enumerate(inLCMSData_list): df = df_list[d*len(predcat_names) + p] #text section headings if a==len(alg_list)-1 and d==len(inLCMSData_list)-1: plt.text(.52, ymax+1, predcat_names[p], weight='bold') #append to running list of values myrow = df.loc[df['Unnamed: 0']==alg] measurement = float(myrow['cvAUC']) measurements.append(measurement) z = stats.norm.ppf(.95) SE = float(myrow['se']) #SE = [( float(myrow['cvAUC']) - float(myrow['ci_low']) )/z, # ( float(myrow['ci_up']) - float(myrow['cvAUC']) )/z ] SEs.append(SE) positions.append(ymax) colors.append(mycolor_list[d]) #add numeric values to plot xpos = float(myrow['ci_low']) -.05 ypos = ymax - .3 mytext = "%.2f" % measurement plt.text(xpos, ypos, mytext, color="white", fontsize=10) if d==0: ytick_labels.append(alg) ytick_positions.append(ymax+.5) ymax += bar_width #add space between groups of bars segmented by predcat values ymax += bar_width*3 print np.array(SEs) plt.barh(bottom=positions, width=measurements, height=bar_width, xerr=np.array(SEs), error_kw=dict(ecolor='.1', lw=1, capsize=1, capthick=1), align='center', alpha=1, color=colors) plt.yticks(ytick_positions, ytick_labels) #size=16 plt.xlim(.5, 1) plt.ylim(-2, ymax) #make left spacing large enough for labels. Default is .1, .9, .9, .1 plt.subplots_adjust(left=.22, right=.9, top=.9, bottom=.1) lhandles = [] for mycolor in mycolor_list[::-1]: hand = mpatches.Patch(color=mycolor) lhandles.append(hand) leg = plt.legend((lhandles), (inLCMSData_desc[::-1])) plt.tight_layout() plt.legend() plt.savefig(outDir + figName + '.eps', dpi=1200) plt.close()
# plt.pcolor(dist, cmap=current_cmap) masked_array = np.ma.array(dist, mask=np.isnan(dist)) np.savetxt('distance_matrix.txt', dist, delimiter=',', fmt='%1.4e') #cmap = matplotlib.colors.ListedColormap(['black', 'grey', 'green', 'red', # 'blue', 'black', 'black']) #cmap.set_bad('black', 0.8) #boundaries = [0, 0.001, 0.4, 0.5, 0.55, 0.65, 0.8, 1] #norm = matplotlib.colors.BoundaryNorm(boundaries, cmap.N, clip=True) # plt.pcolor(masked_array, cmap='gist_rainbow', # vmin=0.3, vmax=0.6) plt.axvline(x=78, label='-OSA-'.format(0.3), c='w', linewidth=4) plt.axhline(y=78, label='-OSA-'.format(0.3), c='w', linewidth=4) # plt.colorbar() # plt.show() l2 = masked_array l2 = l2 + 0.001 l2 = l2/l2.max() # uneven bounds changes the colormapping sns.set() colors = ["black", "blue", "brown", "red", "yellow", "white"] sns.heatmap(l2, cmap=sns.xkcd_palette(colors), norm=PowerNorm(gamma=1), vmin=0.6, vmax=l2.max()) # sns.heatmap(l2) # plt.show() plt.savefig('/home/milad/geodesic_l2.png', dpi=1000)
# SAMPLE_ANNOTATION_FILE = 'sample_annotation_file' COUNT_FILE = 'count_file' DISPLAY_COUNT = 'display_count' DGE_FILE = 'dge_file' SEQ_DEPTH_FILE = 'seq_depth_file' FDR_THRESHOLD = 'fdr_threshold' GROUP_1 = 'g1' GROUP_2 = 'g2' DEFAULT_FDR = 0.05 DEFAULT_COLORS = sns.xkcd_palette(["windows blue", "amber", "greyish", "faded green", "dusty purple", "pale blue", "green yellow", "pumpkin"]) cc = mpl.colors.ColorConverter() DEFAULT_COLORS = cc.to_rgba_array(DEFAULT_COLORS, alpha=0.5) class MakeAbsolutePathAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, os.path.realpath(os.path.abspath(values))) def parse_cl_args(): ''' Parses the command line args
plt.figure(figsize=0.75 * np.array(snakemake.config["plots"]["figsize"])) plt.subplot(111, aspect="equal") #plt.scatter(counts["known"], counts[type], s=1, c="k", alpha=0.3, rasterized=True, edgecolors="face", marker="o") plt.hexbin(counts["known"], counts[type], cmap=cmap, gridsize=25, clip_on=True) maxv = max(plt.xlim()[1], plt.ylim()[1]) plt.plot([0, maxv], [0, maxv], "--k") plt.xlim((0, maxv)) plt.ylim((0,maxv)) plt.ylabel("predicted") plt.xlabel("truth") sns.despine() plt.savefig(path, bbox_inches="tight") colors = sns.xkcd_palette(["grey", "light red"]) plot_hexbin("raw", snakemake.output.scatter_raw, colors[0]) plot_hexbin("posterior", snakemake.output.scatter_posterior, colors[1]) errors = pd.concat(errors) x, y = snakemake.config["plots"]["figsize"] plt.figure(figsize=(x * 1.5, y)) pred_errors = errors[(errors["type"] == "raw") | (errors["type"] == "posterior")] #bins = pd.cut(pred_errors["known"], # [0, 6, 11, 16, 21, 26, 30, 100000], # right=False, # labels=["0-5", "6-10", "11-15", "16-20", "21-25", "26-30", "≥30"]) #pred_errors["bin"] = bins
from ActiveShapeModels import ASM, Point, Shape import matplotlib.pyplot as plt import seaborn as sns import math import numpy as np s1 = Shape( [ Point(200,300), Point(100, 200), Point(300, 50 ) ] ) s2 = Shape( [ Point(150,250), Point(50, 100 ), Point(250, 0) ] ) f, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2, sharex =True, sharey = True) s1.draw( sns.xkcd_palette( ["light blue" ]), ax1) s2.draw( sns.xkcd_palette( ["light blue"] ), ax2) cmShape = ASM.centroid( s1) cmMeanShape = ASM.centroid( s2 ) ax1.scatter( cmShape.x, cmShape.y, c='r') ax2.scatter( cmMeanShape.x, cmMeanShape.y, c='r') ax1.plot( [s1.shapePoints[0].x, s1.shapePoints[1].x], [s1.shapePoints[0].y, s1.shapePoints[1].y], color= 'r', ls = '-') ax2.plot( [s2.shapePoints[0].x, s2.shapePoints[1].x], [s2.shapePoints[0].y, s2.shapePoints[1].y], color= 'r', lw = 1, ls = '-')
load_growth_grouped_total = load_growth.groupby(temp_bins).agg(['sum']) load_growth_grouped_total['temp_bin'] = temp_labels # determine max usage by temperature bin load_growth_grouped_max = load_growth.groupby(temp_bins).agg(['max']) load_growth_grouped_max['temp_bin'] = temp_labels # PLOT 1 fig, axes = plt.subplots(nrows=3, ncols=1, sharex=True, sharey=False) colors = ["windows blue", "amber", "greyish", "faded green"] # sublot 1: mean load_growth_grouped_mean.plot.bar(x='temp_bin', y=['AEV', 'CCHP', 'PHEV', 'eBike'], label=['AEV', 'CCHP', 'PHEV', 'eBike'], color=sns.xkcd_palette(colors), ax=axes[0], legend=False) axes[0].tick_params(rotation=0) axes[0].set_ylabel('mean kWh growth') fig.legend(loc="center right") # subplot 2: total load_growth_grouped_total.plot.bar(x='temp_bin', y=['AEV', 'CCHP', 'PHEV', 'eBike'], label=['AEV', 'CCHP', 'PHEV', 'eBike'], color=sns.xkcd_palette(colors), ax=axes[1], legend=False) axes[1].tick_params(rotation=0) axes[1].set_ylabel('total kWh growth')
sns.palplot(sns.color_palette("husl", 8)) # Let me explain these Qualitative (or categorical) palettes. These are best when you want to distinguish discrete chunks of data that do not have an inherent ordering. Ideally, when importing Seaborn, the default color cycle is changed to a set of six colors that evoke the standard matplotlib color cycle. But when we have more than 6, say 8 categories in our data to distinguish, then the most common way is using `hls` color space, which is a simple transformation of *RGB* values. # Then there is also `hls_palette()` function that lets you control the *lightness* and *saturation* of colors. # # All of it displayed above is just the basic Seaborn aesthetics. Let us now look at *xkcd_rgb* dictionary that has 954 colors in it. Let us try to pull a few out of it: # In[9]: sample_colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple", "pale red", "medium green", "denim blue"] sns.palplot(sns.xkcd_palette(sample_colors)) # Other style is `cubehelix` color palette that makes sequential palettes with a linear increase or decrease in brightness and some variation in [hue](https://en.wikipedia.org/wiki/Hue). Actually let us plot this color palette in a Density contour plot: # In[15]: # Default Matplotlib Cubehelix version: sns.palplot(sns.color_palette("cubehelix", 8)) # In[16]: # Default Seaborn Cubehelix version:
import matplotlib.pyplot as plt import os import pandas as pd from scipy import stats import seaborn as sns import statsmodels import statsmodels.api as sm import subprocess colors = [ "amber", "faded green"] palette = sns.xkcd_palette(colors) sns.palplot(palette) colors = [ "dusty blue", "greyish"] es = sns.xkcd_palette(colors) sns.palplot(es) colors = [ "dusty purple", "grey"] pur = sns.xkcd_palette(colors) sns.palplot(pur) colors = [ "amber", "greyish", "faded green", "grey"] enhpal = sns.xkcd_palette(colors) sns.palplot(enhpal) colors = [ "amber", "greyish", "dusty purple", "brown grey", "windows blue", "bluey grey"] archpal = sns.xkcd_palette(colors) sns.palplot(archpal) FANTOMPATH = "/dors/capra_lab/projects/enhancer_ages/fantom/data/all_fantom_enh/ages/" FANTOMFILE = "syn_breaks_all_fantom_enh_ages.bed"
losses.append(float(w[0])) return losses, nfes mnist_singlescale_loss, mnist_singlescale_nfes = get_values(MNIST_SINGLESCALE) mnist_multiscale_loss, mnist_multiscale_nfes = get_values(MNIST_MULTISCALE) import brewer2mpl line_colors = brewer2mpl.get_map('Set2', 'qualitative', 4).mpl_colors dark_colors = brewer2mpl.get_map('Dark2', 'qualitative', 4).mpl_colors import seaborn as sns sns.set_style("whitegrid") colors = ["windows blue", "amber", "greyish", "faded green", "dusty purple"] sns.palplot(sns.xkcd_palette(colors)) plt.figure(figsize=(4, 2.6)) plt.scatter(mnist_singlescale_nfes[::10], mnist_singlescale_loss[::10], color=line_colors[1], label="Single FFJORD") plt.scatter(mnist_multiscale_nfes[::10], mnist_multiscale_loss[::10], color=line_colors[2], label="Multiscale FFJORD") plt.ylim([0.9, 1.25]) plt.legend(frameon=True, fontsize=10.5) plt.xlabel("NFE", fontsize=18) plt.ylabel("Bits/dim", fontsize=18)
""" import random import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import numpy as np import pandas as pd import seaborn as sns from mpl_toolkits.axes_grid.inset_locator import inset_axes sns.set_style('white') sns.set_context('paper', font_scale=1.5) from glob import glob from figtools import * from fusco import SFS RED, BLUE, GREEN = sns.xkcd_palette(["amber", "dusty purple", "faded green"]) sns.set_context('paper', font_scale=1.5) pi = np.pi mu = 0.02 alpha = 30 fusco_alpha = 0.55 fusco_beta = 2.3 mu_drivers = 2 * 2e-5 """ Allele Frequency Spectra. Figure 1 """ root_folder = '../model/experiments/u0.01/' death_rate = '005'
def plotCategoricalPerformance(x, y, legendstrs=[], plottype='scatter', color_palette=sns.xkcd_palette(colors), dotsize=5, shift=1): fontsize = 16 num_trends = len(y) xlen = x.shape[0] assert (xlen == y[0].shape[0]) Ns = [] for i in range(num_trends): Ns.append(y[i].shape[1]) maxN = max(Ns) sizes = dotsize * np.ones((1, )) # set up legend if (len(legendstrs) > 0): for i in range(num_trends): color = np.tile(np.array([color_palette[i]]), [1, 1]) if (plottype == 'scatter'): plt.scatter(x[0], y[i][0, 0], np.array([dotsize]), c=color) elif (plottype == 'errorBar'): plt.scatter(x[0], np.mean(y[i][0, :]), np.array([dotsize]), c=color) plt.legend(legendstrs, fontsize=fontsize) if (plottype == 'scatter'): xvals = np.zeros((num_trends * xlen * maxN, )) yvals = np.zeros((num_trends * xlen * maxN, )) colors = np.zeros((num_trends * xlen * maxN, 3)) sizes = dotsize * np.ones((num_trends * xlen * maxN, )) ind = 0 sawzorn = False for i in range(num_trends): if (plottype == 'scatter'): xshift_i = (i - (num_trends - 1) / 2) * shift else: xshift_i = 0 N = Ns[i] for j in range(xlen): for n in range(N): yval = y[i][j, n] if (not sawzorn and (yval == 0 or np.isnan(yval))): print('saw a zero or nan') sawzorn = True continue yvals[ind] = yval colors[ind, :] = np.array([color_palette[i]]) xvals[ind] = x[j] + xshift_i ind += 1 plt.scatter(xvals[:ind], yvals[:ind], sizes[:ind], c=colors[:ind]) elif (plottype == 'errorBar'): sizes = dotsize * np.ones((xlen, )) means = np.zeros((num_trends, xlen)) stds = np.zeros((num_trends, xlen)) for i in range(num_trends): # make sure at the end there are no nans! means_i = np.nanmean(y[i], 1) means[i] = means_i stds_i = np.nanstd(y[i], 1) / np.sqrt(Ns[i]) stds[i] = stds_i plt.plot(x, means_i, '-', c=color_palette[i], lw=2) for i in range(num_trends): for j in range(xlen): plt.plot([x[j], x[j]], [means[i, j] - stds[i, j], means[i, j] + stds[i, j]], '-', c=color_palette[i], lw=2) return None
col_wrap=3, data=all_data13, order=1,palette=palette,size=4).set(ylim=(0, 1)) #============================================================================== # Unsupervised Learning - Cluster analysis on Shell data #============================================================================== from sklearn.cluster import KMeans shell=pd.DataFrame() shell=all_data13[all_data13['name']=='RDSB.L'] # We need to scale also oil price, so clustering is not influenced by the relative size of one axis. shell['oil_price_scaled']=scaler.fit_transform(shell['oil_price'].to_frame()) shell['cluster'] = KMeans(n_clusters=6, random_state=1).fit_predict(shell[['share_price_scaled','oil_price_scaled']]) # The 954 most common RGB monitor colors https://xkcd.com/color/rgb/ colors = ['baby blue', 'amber', 'scarlet', 'grey','milk chocolate', 'windows blue'] palette=sns.xkcd_palette(colors) sns.lmplot(x='oil_price', y='share_price_scaled',ci=None,palette=palette, hue='cluster',fit_reg=0 ,data=shell) #============================================================================== # Supervised learning linear regression #============================================================================== from sklearn import linear_model # 1.- Data preparation shell15=pd.DataFrame() shell15=all_data13[(all_data13['name']=='RDSB.L') & (all_data13['year']>2015 )] # Extract data from years 2016/17 shell15=shell15[['share_price','oil_price']].reset_index() # Just using 1 variable for linear regression. To try with more variables use randomforest
def prepare_and_plot_1_1_a(count_records=True): path_in = os.getcwd() pattern = '^.*/thesis-data-anonymisation/' path = re.search(pattern, path_in).group(0) file_path_adult = path + 'data/result/safepub_test/1_1/adult' file_path_housing = path + 'data/result/safepub_test/1_1/housing' if count_records: filename = "num_suppressed_records_granularity.csv" else: filename = "num_suppressed_attributes_granularity.csv" dataset_adult = pd.read_csv(path + 'data/adult/adult.csv') dataset_res_adult = pd.read_csv(file_path_adult + '/' + filename) dataset_housing = pd.read_csv(path + 'data/housing/housing.csv') dataset_res_housing = pd.read_csv(file_path_housing + '/' + filename) if count_records: denom_adult = len(dataset_adult.values) denom_housing = len(dataset_housing.values) y_label = 'Suppressed records' else: denom_adult = len(dataset_adult.columns) denom_housing = len(dataset_housing.columns) y_label = 'Suppressed attributes' plot_path = path + "data/result/plots/1_1_a_" + y_label + ".jpg" frequencies = [] epsilons = [] datasets = [] for eps in dataset_res_adult.columns: freqs_adult = dataset_res_adult[eps] / denom_adult frequencies += list(freqs_adult) epsilons += [float(eps)] * len(freqs_adult) datasets += ['Adult'] * len(freqs_adult) freqs_housing = dataset_res_housing[eps] / denom_housing frequencies += list(freqs_housing) epsilons += [float(eps)] * len(freqs_housing) datasets += ['Housing'] * len(freqs_housing) array = np.array([epsilons, frequencies]).T df = pd.DataFrame(array, columns=['ε', y_label]) df['Dataset'] = datasets ax = sns.lineplot(x='ε', y=y_label, hue='Dataset', data=df, palette=sns.xkcd_palette(['teal', 'orange', 'deep pink'])) ax.set(ylim=(0, 1.05)) #plt.show() plt.savefig(plot_path) plt.clf() return
pyplot.savefig('./plots/distributions_' + strict_name + '/distribution_augmentation_' + str(crop_metric) + kk + SMALL + '.pdf', dpi=1000) all_nets = [experiments.opt[i + 6].name for i in range(5)] name_nets = [ 'Non Regularized', 'Data augment.', 'Dropout', 'Weight Decay', 'All Regularizers' ] colors = ["amber", "greyish", "orange", "black"] for idx_metric, crop_metric in enumerate(crops): cc = itertools.cycle(sns.xkcd_palette(colors)) fig, ax = pyplot.subplots() for idx_net, nets in enumerate(all_nets): if SMALL == '': tmp = np.load(PATH_TO_DATA + '/tmp_results_' + kk + nets + '.npy') else: tmp = np.load(PATH_TO_DATA + '/tmp_results_' + kk + nets + '_' + SMALL + '.npy') mm = np.zeros([TOTAL]) for image_id in range(TOTAL): mm[image_id] += tmp[idx_metric][STRICT][0][image_id][0] mm[image_id] += tmp[idx_metric][STRICT][0][image_id][1] if idx_net == 0:
from ActiveShapeModels import ASM, Point, Shape import matplotlib.pyplot as plt import seaborn as sns import math import numpy as np #s1 = Shape( [ Point(200,300), Point(100, 200), Point(300, 50 ) ] ) #s2 = Shape( [ Point(150,250), Point(50, 100 ), Point(250, 0) ] ) s1 = Shape( [ Point(857, -129), Point(89,-409), Point(-404,254), Point( 96,957), Point(877,712) ]) f, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2) s1.draw( sns.xkcd_palette( ["light blue" ]), 0, ax1) #s2.draw( sns.xkcd_palette( ["light blue"] ), ax2) cmShape = ASM.centroid( s1 ) #cmMeanShape = ASM.centroid( s2 ) ax1.scatter( cmShape.x, cmShape.y, c='r') #ax2.scatter( cmMeanShape.x, cmMeanShape.y, c='r') ax1.plot( [s1.shapePoints[0].x, s1.shapePoints[1].x], [s1.shapePoints[0].y, s1.shapePoints[1].y], color= 'r', ls = '-') #ax2.plot( [s2.shapePoints[0].x, s2.shapePoints[1].x], # [s2.shapePoints[0].y, s2.shapePoints[1].y], # color= 'r', lw = 1, ls = '-')
def plot_kdes(labels=None, results=None, category=None, df=None, label_col=None, result_col=None, colors=None, **kwargs): """ Plots KDEs and Cumulative KDEs Requires seaborn for plotting Can either pass in arrays of labels/results or else df Parameters ----------- labels : array_like categorical values results : array_like numerical values category : string, optional name of label category for plotting, e.g. 'Gender' df : pandas DataFrame, optional label_col : string, optional name of labels column in df result_col : string, optional name of results column in df colors : list of strings, optional takes xkcd hue labels, e.g. ['red', 'blue', 'mustard yellow'] more here: https://xkcd.com/color/rgb/ Returns -------- ax : numpy array of matplotlib axes Plots ------- (1,2) subplots: KDE and cumulative KDE by group in `labels` """ import seaborn as sns if df is None: df = pd.DataFrame(list(zip(labels, results)), columns=['label', 'result']) else: df = df.rename(columns={label_col: 'label', result_col: 'result'}) unique_labels = df.label.dropna().unique() nlabels = len(unique_labels) # Check if there is a distribution to plot in each group stds = df.groupby('label')[['result']].std() if 0 in stds.values: groups = stds.index[stds['result'] == 0].values print('No distribution of results in groups: %s' % ', '.join([str(i) for i in groups])) return if not colors: base_colors = ['red', 'blue'] others = list(set(sns.xkcd_rgb.keys()) - set(base_colors)) extra_colors = list(np.random.choice(others, nlabels, replace=False)) colors = list(base_colors + extra_colors)[:nlabels] sns.set_palette(sns.xkcd_palette(colors)) fig, ax = plt.subplots(1, 2, figsize=(16, 6)) if not category: category = '_vs_'.join(map(str, unique_labels)) ax[0].set_title("%s KDEs" % category) ax[1].set_title("%s Cumulative KDEs" % category) ax[0].set_ylabel('Frequency') ax[1].set_ylabel('Group Fraction Below') ax[0].set_xlabel('Threshold') ax[1].set_xlabel('Threshold') for lab in unique_labels: sns.kdeplot(df.loc[df.label == lab].result, shade=True, label=lab, ax=ax[0], **kwargs) sns.kdeplot(df.loc[df.label == lab].result, shade=False, label=lab, ax=ax[1], cumulative=True, **kwargs) ax0_max_y = max([max(i.get_data()[1]) for i in ax[0].get_lines()]) ax[0].set_ylim(0, ax0_max_y * 1.1) plt.show() return ax
"windows blue", "medium green", "dusty purple", "orange", "amber", "clay", "pink", "greyish", "light cyan", "steel blue", "forest green", "pastel purple", "mint", "salmon", "dark brown"] colors = sns.xkcd_palette(color_names) cmap = gradient_cmap(colors) except: from matplotlib.cm import get_cmap colors = ['b', 'r', 'y', 'g', 'purple'] cmap = get_cmap("jet") from pybasicbayes.util.text import progprint_xrange from pylds.util import random_rotation from pyslds.models import DefaultSLDS npr.seed(0) # Set parameters K = 5
import matplotlib import matplotlib.cm as cm import matplotlib.colors as colors from mpl_toolkits.mplot3d import Axes3D matplotlib.rcParams['pdf.fonttype'] = 42 matplotlib.rcParams['ps.fonttype'] = 42 sns.set(color_codes=True, context="poster") sns.set_style("white", { 'font.family': 'serif', 'font.serif': 'Times New Roman' }) cc = ["light red", "cyan", "apricot"] sns.set_palette(sns.xkcd_palette(cc), desat=.9) GEN = 1000 # 500 EXP_NAME = "XENO_3" #"XENO_Quad_Big" GET_FRESH_PICKLES = False PLOT_TRACE = True RUNS = 100 # NUM_INDS_TO_PLOT_PER_POP = 10 N_ROWS = 10 # 5 N_COLS = 10 # 5 PICKLE_DIR = "/home/sam/Projects/research_code/evosoro/data_analysis/results/{0}_Gen_{1}".format(
def lc_plot(train_loss_list, train_acc_list, test_loss_list, test_acc_list, name): iter_list = list(np.arange(0, len(train_loss_list))) + list( np.arange(0, len(test_loss_list))) type_list = ['Train'] * len(train_loss_list) + ['Test' ] * len(test_loss_list) colors = ['windows blue', 'watermelon'] palette = sns.xkcd_palette(colors) pdf = PdfPages('plot' + name + '.pdf') plt.figure(figsize=(20, 6.5)) sns.set(style="whitegrid") ax1 = plt.subplot(1, 2, 1) loss_frame = { 'Iteration': iter_list, 'Loss': train_loss_list + test_loss_list, 'Dataset': type_list } loss_frame = DataFrame(loss_frame) g = sns.lineplot(x="Iteration", y="Loss", hue='Dataset', style='Dataset', data=loss_frame, legend='full', err_style='bars', palette=palette, linewidth=2, err_kws={'elinewidth': 2}, ax=ax1) plt.xticks(fontsize=12) plt.yticks(fontsize=12) plt.xlabel("Iteration", fontsize=12) plt.ylabel("Loss", fontsize=12) leg = g.legend(loc='lower left', fontsize=12) for legobj in leg.legendHandles: legobj.set_linewidth(2.0) ax1 = plt.subplot(1, 2, 2) loss_frame = { 'Iteration': iter_list, 'Acc': train_acc_list + test_acc_list, 'Dataset': type_list } loss_frame = DataFrame(loss_frame) g = sns.lineplot(x="Iteration", y="Acc", hue='Dataset', style='Dataset', data=loss_frame, legend='full', err_style='bars', palette=palette, linewidth=2, err_kws={'elinewidth': 2}, ax=ax1) plt.xticks(fontsize=12) plt.yticks(fontsize=12) plt.xlabel("Iteration", fontsize=12) plt.ylabel("Acc", fontsize=12) leg = g.legend(loc='lower right', fontsize=12) for legobj in leg.legendHandles: legobj.set_linewidth(2.0) pdf.savefig(bbox_inches='tight') pdf.close() plt.show()
# generate multi level index for columns for precision spstd = pd.concat([sstd, pstd], axis=1) multi_cols = zip((sens_prec_names), spstd.columns) multi_cols = pd.MultiIndex.from_tuples(multi_cols, names=['SP','Method']) spstd.columns = multi_cols #------------------------------------------------------------------------------ # # P L O T T I N G O F R E S U L T S # #------------------------------------------------------------------------------ # setup seaborn style colors = ["windows blue", "faded green"] sns.set_palette(sns.xkcd_palette(colors)) sns.set_style("whitegrid", {"grid.color": ".9"}) sns.set_context("talk") nrows = 1 ncols = 2 fig, ax = plt.subplots(nrows=nrows, ncols=ncols, sharex=True, sharey=True) fig.set_figheight(3.5) fig.set_figwidth(8.27) i = 0 ratios_to_plot = range(2) for c in range(ncols): ratio = ratios_to_plot[c] means = spmean.iloc[ratio,].T.unstack().T means.index.name="" error_bars = spstd.iloc[ratio,].T.unstack().T
import matplotlib.pyplot as plt from matplotlib.ticker import MultipleLocator import matplotlib.ticker as ticker import numpy as np import os, sys from scipy import stats import seaborn as sns import statsmodels import statsmodels.api as sm RE ="/dors/capra_lab/projects/enhancer_ages/fantom/results/for_publication/age_breaks/" colors = [ "amber", "faded green", "dusty purple", "windows blue","greyish"] palette = sns.xkcd_palette(colors) sns.palplot(palette) shuf_colors = [ "amber", "greyish",] shuf_pal = sns.xkcd_palette(shuf_colors) #%% Files path = "/dors/capra_lab/projects/enhancer_ages/fantom/data/" enh = "%sFANTOM_enh_age_arch_full_matrix.tsv" % path summaryEnh = "%sFANTOM_enh_age_arch_summary_matrix.tsv" % path
def prepare_and_plot_3_1_info_loss(dataset_name): path_in = os.getcwd() pattern = '^.*/thesis-data-anonymisation/' result_path = re.search(pattern, path_in).group(0) + 'data/result/' epsilons = ['1.0', '2.0'] record_linkage = [] sse = [] model = [] epsis = [] plot_path = result_path + '/plots/needs_editing/3_1_b_' + dataset_name + '.jpg' for eps in epsilons: sc_result_sse = pd.read_csv(result_path + 'sc_test/3_1/' + dataset_name + '/norm_result_eps-' + eps + '.csv')['sse'] sc_result_rl = pd.read_csv(result_path + 'sc_test/3_1/' + dataset_name + '/result_eps-' + eps + '.csv')['record_linkage'] safepub_result_sse = pd.read_csv(result_path + 'safepub_test/3_1/' + dataset_name + '/norm_result_granularity_eps-' + eps + '.csv')['sse'] safepub_result_rl = pd.read_csv(result_path + 'safepub_test/3_1/' + dataset_name + '/result_granularity_eps-' + eps + '.csv')['record_linkage'] record_linkage += list((sc_result_rl.values * 1000).astype(int)) sse += list(sc_result_sse.values) model += ['MicroDP'] * len(sc_result_rl.values) epsis += [float(eps)] * len(sc_result_rl.values) # record_linkage += list((safepub_result_rl.values * 1000).astype(int)) sse += list(safepub_result_sse.values) model += ['SafePub'] * len(safepub_result_rl.values) epsis += [float(eps)] * len(safepub_result_rl.values) array = np.array([record_linkage, sse, epsis]).T df = pd.DataFrame(np.array([record_linkage, sse, epsis]).T, columns=['Record linkage', 'Information loss', 'ε']) df['Model'] = model ax = sns.lineplot(x='Record linkage', y='Information loss', hue='Model', data=df, palette=sns.xkcd_palette(['windows blue', 'amber'])) # k_ax = sns.scatterplot(x='Record linkage', y='Information loss', hue='Model', data=k_df, # palette=sns.xkcd_palette(['faded green'])) # ax.set(ylim=(0.0, 1)) # k_ax.set(xlim=(0.0, 10)) plt.show() # plt.savefig(plot_path) plt.clf()
'axes.titlesize' : 10}) import matplotlib.pyplot as plt from matplotlib.patches import Rectangle from hips.plotting.layout import create_figure, create_axis_at_location import seaborn as sns color_names = ["windows blue", "amber", "crimson", "faded green", "dusty purple", "greyish"] colors = sns.xkcd_palette(color_names) sns.set(style="white", palette=sns.xkcd_palette(color_names)) from hips.plotting.colormaps import harvard_colors, gradient_cmap #colors = harvard_colors() T = 1000 D = 50 n = T // D def sample_mixture_model(lmbda, p): """ Simple mixture model example """ # Simulate latent states
l1, l2 = l_size[0], l_size[1] num_orbitals = file_input['NOrbitals'].value return num_orbitals, l1, l2 # ************************************ # keep these definitions for kite website import seaborn as sns mpl.rcParams['figure.dpi'] = 100 mpl.rcParams['savefig.dpi'] = 100 sns.set_style("white") # Kite color scheme colors = ["dusty purple", "faded green", "windows blue", "amber", "greyish"] current_palette = sns.xkcd_palette(colors) sns.set_palette(current_palette) sns.set_style("ticks") sns.set_context("talk", font_scale=1.3) # ************************************ # read h5 just to know the number of moments, enters the name of the DOS file file_name = 'phmag.h5' moments_KITE, a_scale, b_scale = get_moments_and_scales(file_name) num_orbitals, _, _ = get_size(file_name) # this was the grid where the DOS was evaluated num_points = 5000 energy1 = np.linspace(0.335, 0.37, num_points) energy2 = np.linspace(-1.17, -1.2, num_points)
rcParams['xtick.major.width'] = 1.25 rcParams['xtick.minor.width'] = 1.25 rcParams['ytick.major.size'] = 2.5 rcParams['ytick.minor.size'] = 1.5 rcParams['ytick.major.width'] = 1.25 rcParams['ytick.minor.width'] = 1.25 rcParams['text.usetex'] = True rcParams['xtick.major.pad'] = 6 rcParams['ytick.major.pad'] = 6 rcParams['ytick.direction'] = 'in' rcParams['xtick.direction'] = 'in' rcParams['figure.figsize'] = 3.5, 3.5/sc.golden # Colours. snscols = sns.xkcd_palette(["windows blue", "amber", "faded green", "greyish", "dusty purple", "pale red"]) # Colourmaps. whblbk = sns.cubehelix_palette(light=1., dark=0.2, start=0.1, hue=1.0, rot=-0.3, as_cmap=True) bkblwh = sns.cubehelix_palette(light=1., dark=0.2, start=0.1, hue=1.0, rot=-0.3, as_cmap=True, reverse=True) # Clip the data for plotting. def clip(arr, maxval=None, minval=None, maskNaN=None, log=False, minNaN=None, maxNaN=None): if log: arr = np.log10(arr) if minNaN is not None: maskNaN = minNaN
def drawShape( axis, shape ): shape.draw( sns.xkcd_palette( ["light blue"] ), 0, axis)
def create_dataCorrect_barplot(ginfo, inLCMSData): #runs to loop through suffix_list = ["", "_C1", "_C2"] #labels to appear in graph legend list_desc = ["Original Data", "Correction of false positive", "Correction of false positive and false negative"] predictor_desc = "covarlist_all" # "covarlist" or "covarlist_all" or "clinOnly" if predictor_desc == "covarlist": title = "LC-MS features only" elif predictor_desc == "covarlist_all": title = "Clinical + LC-MS features" if predictor_desc == "clinOnly" : figName = ginfo.FileNamePrefix + '_covarlist_all_' + \ inLCMSData + 'patients_dataCorrect' tableName = ginfo.FileNamePrefix + '_covarlist_all_' + \ inLCMSData + 'patients.txt' title = "Clinical features only" else: figName = ginfo.FileNamePrefix + '_' + predictor_desc + '_' + inLCMSData + '_dataCorrect' tableName = ginfo.FileNamePrefix + '_' + predictor_desc + '_' + inLCMSData + '.txt' resultsDF = pd.read_csv(outDir + 'R_' + tableName, sep=",") #eliminate LDA+shrinkage since it behaves strangely #resultsDF = resultsDF[resultsDF['Unnamed: 0'] != "LDA+shrinkage"] alg_names = resultsDF['Unnamed: 0'] #algorithm names print "alg_names: " , alg_names initial_pos = np.arange(len(alg_names))*( len(suffix_list)+1)+len(suffix_list)+1 bar_width = 1 colors = ["taupe","teal","salmon"] mycolors = sns.xkcd_palette(colors) plt.figure(figsize=(6.7,8)) #to place next to one another #cycle through each patient list plots = [] for counter, suffix in enumerate(suffix_list): if predictor_desc == "clinOnly" : tableName = ginfo.FileNamePrefix + '_covarlist_all_' + \ inLCMSData + 'patients' + suffix + '.txt' else: tableName = ginfo.FileNamePrefix + '_' + predictor_desc + '_' + \ inLCMSData + suffix + '.txt' resultsDF = pd.read_csv(outDir + 'R_' + tableName, sep=",") #eliminate LDA+shrinkage since it behaves strangely #resultsDF = resultsDF[resultsDF['Unnamed: 0'] != "LDA+shrinkage"] measurements = np.array(resultsDF['cvAUC']) z = stats.norm.ppf(.95) SEs = [( np.array(resultsDF['cvAUC']) - np.array(resultsDF['ci_low']) )/z, ( np.array(resultsDF['ci_up']) - np.array(resultsDF['cvAUC']) )/z ] alg_pos = initial_pos - counter print "measurements: " , measurements print "alg_pos: " , alg_pos plot = plt.barh(bottom=alg_pos, width=measurements, height=bar_width, xerr=SEs, error_kw=dict(ecolor='.1', lw=1, capsize=1, capthick=1), align='center', alpha=1, color=mycolors[counter], label=list_desc[counter]) #add numeric values to plot xpos = np.array(resultsDF['ci_low']) -.05 ypos = alg_pos - .3 mytext = ["%.2f" % x for x in measurements] for place, text in enumerate(mytext): plt.text(xpos[place], ypos[place], text, color="white", fontsize=10) plots.append(plot) plt.xlabel = "cvAUC" plt.title(title) plt.xlim(.5, 1) plt.ylim(0,max(initial_pos)+2) print "counter: " , counter plt.yticks(initial_pos - counter/2, alg_names) plt.legend(prop={'size':8}) plt.tight_layout() plt.savefig(outDir + figName + '.eps', dpi=1200) plt.close()
import pandas as pd import matplotlib.pyplot as plt import seaborn as sns #Takes a combined dataframe of all the results #Gmean plot colors = ["windows blue", "orange red", "light brown", "amber", 'purple', 'jade', 'grey'] myPalette = sns.xkcd_palette(colors) #passing colors to xkcd_palette function sns.set(style="white") #white background g = sns.factorplot(x="samp_technique", y="g_mean", hue="classifier", data=combined_df1, saturation=5, size=5, aspect=2.4, kind="bar", palette= myPalette, legend=False,) #removes legend g.set(ylim=(0, 1)) g.despine(right=False) g.set_xlabels("") g.set_ylabels("G-mean Score") g.set_yticklabels("") #Matplotlib --legend creation myLegend=plt.legend(bbox_to_anchor=(0., 1.2, 1., .102), prop ={'size':7.5}, loc=10, ncol=3, #3 rows per legend #left, bottom, width, height title=r'ROC Score per sampling technique and classifier')
import json import h5py import numpy as np import subprocess as sub import seaborn as sns import matplotlib.pyplot as plt from scipy.signal import savgol_filter #rc_params = {'lines.linewidth':1.1, 'text.latex.preamble': [r'\usepackage{siunitx}', r'\sisetup{detect-all}', r'\renewcommand*\sfdefault{lcmss}', r'\usepackage{sansmath}', r'\sansmath'], 'text.usetex':True} rc_params = {'lines.linewidth':1.5, 'text.usetex':True} palette = sns.color_palette(sns.xkcd_palette(['denim blue','orange red', 'golden', 'medium green','fuchsia', 'aquamarine', 'burnt sienna']), n_colors=10) sns.set(style='ticks', font='serif', palette='Set1', context='paper', font_scale=1.4, rc=rc_params) class Trajectory(object): """This class defines the object of a gromacs trajectory""" def __init__(self, traj_path, basename, basename_trr=None, tracking_dir=os.path.expanduser('~/HiWi/WW/tracking/')): """Init sets the path in which the trajectory is found and will define respective files for python :traj_path: path of trajectory """ if basename_trr == None:
# -*- coding: utf-8 -*- """ Created on Wed Sep 8 21:03:11 2021 @author: kevin """ import torch import numpy as np import scipy as sp import matplotlib.pyplot as plt import seaborn as sns color_names = ["windows blue", "red", "amber", "faded green"] colors = sns.xkcd_palette(color_names) sns.set_style("white") sns.set_context("talk") from torch.nn.functional import binary_cross_entropy, binary_cross_entropy_with_logits #from torch.utils.data import DataLoader #from torchvision.datasets import MNIST from torchvision.transforms import ToTensor from torchvision.utils import make_grid # %% Simple rate RNN generative model dt, T, N, tau, s = 0.01, 100, 20, 1, .5 v1, v2 = np.random.randn(N), np.random.randn(N) Jij = s * np.sqrt(N) * np.random.randn( N, N) #+ np.outer(v1,v1) + np.outer(v2,v2) +np.outer(v1,v2) ### Mask for sparsity sparsity = 0.6
def evaluation(dataset, data_dir, plot_dir): plt.rcdefaults() #Styles sns.set_style('whitegrid', {'axes.linewidth':1.25, 'axes.edgecolor':'0.15', 'grid.linewidth':1.5, 'grid.color':'gray'}) sns.set_color_codes() plt.rcParams['figure.figsize'] = (12.0, 9.0) plt.rc('text', usetex=False) plt.rc('font', size=14.0, family='sans-serif') # Data location and scenario preprocessor='all' # Load configurations reader = cr.ConfigReader(data_dir=data_dir, dataset=dataset) tdf = reader.load_validation_trajectories(preprocessor=preprocessor, load_config=True) # Decode number of layers tdf.loc[:, ('classifier','num_layers')] = tdf['classifier']['num_layers'].apply(lambda X:ord(X)-ord('a')) ## Plot average best architectures top5 = tdf.sort_values([('smac','test_performance')]).head(1) lays = np.int(np.ceil(np.array(top5['classifier']['num_layers']).mean())) labels_list = ['Layer_'+str(i) for i in range(1,7)] pre_m = top5['preprocessor']['choice'].describe().top activations = [] n_layers = [] weights = [] for i in np.arange(1, lays): activations.append(top5['classifier']['activation_layer_'+str(i)].describe().top) n_layers.append(top5['classifier']['num_units_layer_'+str(i)].mean()) weights.append(top5['classifier']['weight_init_'+str(i)].describe().top) tab = top5.classifier.T.dropna() table_list = ['batch_size', 'dropout_output', 'learning_rate', 'lambda2', 'number_epochs', 'solver'] t = tab.loc[table_list] t = t.append(top5['preprocessor']['choice']) a = pd.Series(np.array(n_layers)) botoms = np.fabs(a.sub(a.max()))/2 activ_list = ['relu', 'elu', 'leaky', 'sigmoid', 'tanh', 'scaledTanh', 'linear'] colr_list = sns.xkcd_palette(["windows blue", "pastel blue", "grey blue", "red orange", "emerald", "pine green", "amber"]) activation_color_codes = dict(zip(activ_list,colr_list)) bar_width = 0.1 colors_bars = [activation_color_codes.get(i) for i in activations] with sns.axes_style('ticks'): fig_arch = plt.figure(1, figsize=(15.,9.)) ax_arch = plt.subplot(111) bars = ax_arch.bar(np.arange(lays-1)-(bar_width/2), a, bottom=botoms, width=bar_width, color=colors_bars) sns.despine(left=True) ax_arch.set_ylabel('Number of units in Layer') ax_arch.set_yticklabels([]) ax_arch.set_yticks([]) ax_arch.set_xticks(np.arange(lays-1)) ax_arch.set_xticklabels(labels_list[:lays-1]) ax_arch = autolabel(bars, ax_arch) table_ax(ax_arch, t) ax_arch.legend([b for b in bars], activations, loc='best') ax_arch.set_title('Single best architecture found for dataset %s' % dataset) ax_arch.set_xlim(-0.5, lays-1) fig_arch.savefig(plot_dir + "Best_architecture_on_%s.pdf" % dataset) # Start filtering the error temp_df = tdf.copy() temp_df.columns = tdf.columns.droplevel(0) min_perf = temp_df['test_performance'].min() mean_perf = temp_df['test_performance'].mean() std_perf = temp_df['test_performance'].std() qtil_10 = temp_df['test_performance'].quantile(0.1) del temp_df m = tdf[('smac', 'test_performance')] <= qtil_10 # Setting values to log scale and categorical values log_columns = ['beta1', 'beta2', 'gamma', 'lambda2', 'learning_rate', 'momentum','num_units_layer_1', 'num_units_layer_2', 'num_units_layer_3', 'num_units_layer_4', 'num_units_layer_5', 'num_units_layer_6', 'power', 'std_layer_1', 'std_layer_2', 'std_layer_3','std_layer_4', 'std_layer_5', 'std_layer_6'] for lc in log_columns: try: tdf.loc[:, ('classifier', lc)] = np.log10(tdf.loc[:, ('classifier', lc)]) except KeyError: continue ## After Setting the frames. Start with the plotting plt.clf() # Plot the empirical CDF sorted_train = (tdf['smac']['train_performance'].sort_values(ascending=True).values) sorted_test = (tdf['smac']['test_performance'].sort_values(ascending=True).values) ytrain = np.arange(len(sorted_train)) / float(len(sorted_train)) ytest = np.arange(len(sorted_test)) / float(len(sorted_test)) plt.step(sorted_train, ytrain, label="Train Performance", lw=2.5) plt.step(sorted_test, ytest, label="Test Performance", lw=2.5) plt.xlabel("Cross-validation error $y(x)$") plt.ylabel(r"Number of Configs (%)") plt.xlim(0.0, min(1.0, sorted_test.max()+0.01)) plt.title("Empirical CDF of configurations based on error") plt.legend(loc='best') plt.tight_layout() plt.savefig(plot_dir + 'CDF_Error_%s.pdf' % dataset) categories=['solver','lr_policy','num_layers'] mask_filter = tdf[('smac','test_performance')] <= qtil_10 filtered = tdf[mask_filter] for category in categories: fig_f, axs = plt.subplots(ncols=2, nrows=1, figsize=(15.0, 10.5)) ax0, ax1 = axs.flat sns.boxplot(x=('classifier', category), y=('smac','test_performance'), data=filtered.sort_values(by=[('classifier', category)]), ax=ax0) ax0.set_xlabel(category) ax0.set_ylabel('Test error performance') ax0.set_title('Error distribution based on %s' % category) sns.countplot(x=('classifier', category), data=filtered.sort_values(by=[('classifier', category)]), ax=ax1) ax1.set_xlabel(category) ax1.set_ylabel('Times used') ax1.set_title('Bar plot of frequency of %s' % category) fig_f.suptitle("Descriptive stats of %s on dataset %s using 10%% of configurations" % (category, dataset), y=0.98) # fig_f.tight_layout() fig_f.savefig(plot_dir + 'Descriptive_plots_over_%s_on_%s.pdf' % (category, dataset)) fig_f.show() ## Plot distro over learning rates # Create the grouping of the filtered DF classifier_df = tdf[m]['classifier'] solver_filt = classifier_df.groupby('solver') # with sns.color_palette('Set1',8): # for name,groups in solver_filt: # plt.hist(groups.learning_rate.values, alpha=0.5, bins=20, label=name) # plt.legend() col_hist = sns.color_palette('Paired',8, desat=0.8) rows_to_plot = np.int(np.ceil(len(solver_filt)/2.)) fig2, axs = plt.subplots(nrows=rows_to_plot, ncols=2, figsize=(12.,17.)) fig2.suptitle('Distribution of learning rate values for each\ solver on dataset %s \n (based on 50%% best configurations)' % dataset, y=1.02) for ax, (name, groups) in zip(axs.flat,solver_filt): ax.hist(groups.learning_rate.values, bins=5, histtype='bar', fill=True, label=name, alpha=0.9, color=col_hist.pop()) ax.set_xlabel('learning rate values (log scale)') ax.set_ylabel('# of Configs') ax.legend(loc='best') # plt.tight_layout() ax = axs.flat[-1] ax.set_visible(False) fig2.savefig(plot_dir + 'Histogram_of_learning_rate_solver_on_dataset_%s.pdf' % dataset) ## Plot over different preprocessing methods # Create the grouping of the filtered DF prepro_filt = tdf[m].groupby([('preprocessor','choice')]) prepro_color = sns.color_palette('Paired',14, desat=0.8) fig4, axs = plt.subplots(nrows=3, ncols=5, sharex='col', figsize=(22.,12.)) fig4.suptitle('Distribution of learning rate for each preprocessor on dataset %s'% dataset, y=1.02 ) for ax, (name, grops) in zip(axs.flat,prepro_filt): groups = grops['classifier'] ax.hist(groups.learning_rate.values, bins=5, histtype='bar', fill=True, label=name, color=prepro_color.pop()) ax.set_xlabel('learning rate values (log scale)') ax.set_ylabel('# of Configs') ax.legend(loc='best') # plt.tight_layout() fig4.savefig(plot_dir + 'Histogram_of_learning_rate_prepro_on_dataset_%s.pdf' % dataset)
ax.annotate('', xy=(X[i],y), xytext=(X[j],y), arrowprops=props) # Call the function label_diff(0,1,'p=0.0370',X,means) label_diff(0,2,'p<0.0001',X,means) label_diff(0,3,'p=0.0025',X,means) label_diff(0,4,'p=0.0000',X,means) sns.set(font_scale = 2) sns.set_style("whitegrid") plt.show() quit() col_list = ["red", "green", "blue", "purple", "coral"] col_list_palette = sns.xkcd_palette(col_list) sns.set_palette(col_list_palette) sns.despine(offset=10, trim=True) # labels = ['1_0_0', '0.95_0.05_0', '0.85_0.10_0.05', # '0.70_0.20_0.10','0.50_0.30_0.20','0.25_0.50_0.25'] labels = ['1_0_0', '0.95_0.05_0', '0.85_0.10_0.05', '0.7_0.2_0.1' '0.25_0.50_0.25'] expDataLabels = ['Passage 11', 'Passage 15', 'Passage 19', 'Passage 28', 'Parental'] # ax = sns.violinplot(data = frames_LCStrans_mod, cut = 0, inner = 'box') # ax = sns.violinplot(data = expData, cut = 0, inner = 'box') # ax = sns.boxplot(data = expData, showfliers = False) # ax = sns.pointplot(data = frames_LCStrans_mod, estimator= median) # fig, ax = plt.subplots()
# CURVE_P0 = (1.0, 1.0, 1.0, 1.0) # def sigmoid(x, a0, a1, a2, a3): # return (a0 + a1 * x) / (1.0 + a2 * np.exp(-a3 * x)) CURVE_A0 = np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=float) def sigmoid(x, a0, a1, a2, a3, a4): return a0 * (1.0 - np.exp(a1 - a2 * x)) / (1.0 + np.exp(a3 - a4 * x)) compounds, maxisos = zip(*[('G6P', 6), ('F6P', 6), ('FBP', 6), ('DHAP', 3), ('xPG', 3), ('PEP', 3)]) compound2maxiso = dict(zip(compounds, maxisos)) colors = ["dusty purple", "windows blue", "teal green", "scarlet", "purplish pink", "orange"] colpalette = sns.xkcd_palette(colors) # colpalette = sns.color_palette("PuBuGn_d", len(compounds)) exps = ['eca', 'bsa', 'wt'] titles = [r'$\Delta$pfkA$\Delta$pfkB + pfkA from E. coli', r'$\Delta$pfkA$\Delta$pfkB + pfkA from B. subtilis', 'Wild-type E. coli'] count_df = pd.DataFrame.from_csv('integration_results.txt', sep='\t', index_col=0) count_df.fillna(0, inplace=True) samples_df = pd.DataFrame.from_csv('samples.csv', index_col=0) # remove time point - 45 min, seems to be a mistake count_df = count_df.loc[samples_df['time (min)'] != 45, :]
raw_counts = raw_counts.reindex(known_counts.index, fill_value=0) raw_errors.append(raw_counts - known_counts["count"]) raw_error_mean = pd.concat(raw_errors).mean() errors = [] for uncertainty in [0, 5, 10, 20, 30]: u = "err-{}%".format(uncertainty) if uncertainty > 0 else "default" for mean, posterior_counts, known_counts in zip(snakemake.params.means, snakemake.input.get(u), all_known_counts): posterior_estimates = pd.read_table(posterior_counts, index_col=[0, 1]) posterior_estimates = posterior_estimates.reindex(known_counts.index, fill_value=0) errors.append(pd.DataFrame({"error": posterior_estimates["expr_map"] - known_counts["count"], "mean": mean, "uncertainty": uncertainty})) errors = pd.concat(errors) x, y = snakemake.config["plots"]["figsize"] plt.figure(figsize=(x * 1.5, y)) colors = sns.xkcd_palette(["light red"]) sns.violinplot(x="uncertainty", y="error", data=errors, bw=1, inner="quartile", palette=colors, linewidth=1) plt.plot(plt.xlim(), [0, 0], "-k", linewidth=1, zorder=-5) plt.plot(plt.xlim(), [raw_error_mean] * 2, ":k", linewidth=1, zorder=-5) sns.despine() plt.xlabel("error rate underestimation (%)") plt.ylabel("predicted - truth") plt.savefig(snakemake.output[0], bbox_inches="tight")
def learning_curve(log_file): print('==> Plotting log file: %s' % log_file) df = pandas.read_csv(log_file) colors = ['red', 'green', 'blue', 'purple', 'orange'] colors = seaborn.xkcd_palette(colors) plt.figure(figsize=(20, 6), dpi=500) row_min = df.min() row_max = df.max() # initialize DataFrame for train columns = [ 'epoch', 'iteration', 'train/loss', 'train/acc', 'train/acc_cls', 'train/mean_iu', 'train/fwavacc', ] df_train = df[columns] df_train = pandas.rolling_mean(df_train, window=10) df_train = df_train.dropna() iter_per_epoch = df_train.query('epoch == 1')['iteration'].values[0] df_train['epoch_detail'] = df_train['iteration'] / iter_per_epoch # initialize DataFrame for val columns = [ 'epoch', 'iteration', 'valid/loss', 'valid/acc', 'valid/acc_cls', 'valid/mean_iu', 'valid/fwavacc', ] df_valid = df[columns] df_valid = df_valid.dropna() df_valid['epoch_detail'] = df_valid['iteration'] / iter_per_epoch data_frames = {'train': df_train, 'valid': df_valid} n_row = 2 n_col = 3 for i, split in enumerate(['train', 'valid']): df_split = data_frames[split] # loss plt.subplot(n_row, n_col, i * n_col + 1) plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) plt.plot(df_split['epoch_detail'], df_split['%s/loss' % split], '-', markersize=1, color=colors[0], alpha=.5, label='%s loss' % split) plt.xlim((0, row_max['epoch'])) plt.ylim((min(row_min['train/loss'], row_min['valid/loss']), max(row_max['train/loss'], row_max['valid/loss']))) plt.xlabel('epoch') plt.ylabel('%s loss' % split) # loss (log) plt.subplot(n_row, n_col, i * n_col + 2) plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) plt.semilogy(df_split['epoch_detail'], df_split['%s/loss' % split], '-', markersize=1, color=colors[0], alpha=.5, label='%s loss' % split) plt.xlim((0, row_max['epoch'])) plt.ylim((min(row_min['train/loss'], row_min['valid/loss']), max(row_max['train/loss'], row_max['valid/loss']))) plt.xlabel('epoch') plt.ylabel('%s loss (log)' % split) # lbl accuracy plt.subplot(n_row, n_col, i * n_col + 3) plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) plt.plot(df_split['epoch_detail'], df_split['%s/acc' % split], '-', markersize=1, color=colors[1], alpha=.5, label='%s accuracy' % split) plt.plot(df_split['epoch_detail'], df_split['%s/acc_cls' % split], '-', markersize=1, color=colors[2], alpha=.5, label='%s accuracy class' % split) plt.plot(df_split['epoch_detail'], df_split['%s/mean_iu' % split], '-', markersize=1, color=colors[3], alpha=.5, label='%s mean IU' % split) plt.plot(df_split['epoch_detail'], df_split['%s/fwavacc' % split], '-', markersize=1, color=colors[4], alpha=.5, label='%s fwav accuracy' % split) plt.legend() plt.xlim((0, row_max['epoch'])) plt.ylim((0, 1)) plt.xlabel('epoch') plt.ylabel('%s label accuracy' % split) out_file = osp.splitext(log_file)[0] + '.png' plt.savefig(out_file) print('==> Wrote figure to: %s' % out_file)
def plot_surf_label(coords, faces, labels=None, elev=0, azim=0, cpal='bright', threshold=None, bg_map=None, bg_on_labels=False, alpha='auto', darkness=1, figsize=None, **kwargs): ''' - labels requires a tuple of label/s, each a list/array of node indices - cpal takes either the name of a seaborn color palette or matplotlib color map, or a list of rgb values or color names from http://xkcd.com/color/rgb/ ''' import numpy as np import matplotlib.pyplot as plt import matplotlib.tri as tri from mpl_toolkits.mplot3d import Axes3D import seaborn as sns # load mesh and derive axes limits faces = np.array(faces, dtype=int) limits = [coords.min(), coords.max()] # set alpha if in auto mode if alpha == 'auto': if bg_map is None: alpha = .5 else: alpha = 1 # if cap is given as string, translate to seaborn color palette if type(cpal) == str: cpal = sns.color_palette(cpal, len(labels)) if type(cpal) == list: if len(cpal) < len(labels): raise ValueError('There are not enough colors in the color list.') try: cpal = sns.color_palette(cpal) except: cpal = sns.xkcd_palette(cpal) # initiate figure and 3d axes if figsize is not None: fig = plt.figure(figsize=figsize) else: fig = plt.figure() ax = fig.add_subplot(111, projection='3d', xlim=limits, ylim=limits) ax.view_init(elev=elev, azim=azim) ax.set_axis_off() # plot mesh without data p3dcollec = ax.plot_trisurf(coords[:, 0], coords[:, 1], coords[:, 2], triangles=faces, linewidth=0., antialiased=False, color='white') if bg_map is not None or labels is not None: face_colors = np.ones((faces.shape[0], 4)) face_colors[:, :3] = .5*face_colors[:, :3] if bg_map is not None: bg_data = bg_map if bg_data.shape[0] != coords.shape[0]: raise ValueError('The bg_map does not have the same number ' 'of vertices as the mesh.') bg_faces = np.mean(bg_data[faces], axis=1) bg_faces = bg_faces - bg_faces.min() bg_faces = bg_faces / bg_faces.max() bg_faces *= darkness face_colors = plt.cm.gray_r(bg_faces) # modify alpha values of background face_colors[:, 3] = alpha*face_colors[:, 3] # color the labels, either overriding or overlaying bg_map if labels is not None: for n_label,label in enumerate(labels): for n_face, face in enumerate(faces): count = len(set(face).intersection(set(label))) if count > 1: if bg_on_labels: face_colors[n_face,0:3] = cpal[n_label] * face_colors[n_face,0:3] else: face_colors[n_face,0:3] = cpal[n_label] p3dcollec.set_facecolors(face_colors) return fig
def _do_classifyplot(df, out_file, title=None, size=None): """Plot using classification-based plot using seaborn. """ metric_labels = {"fdr": "False discovery rate", "fnr": "False negative rate"} metrics = [("fnr", "tpr"), ("fdr", "spc")] colors = ["light grey", "greyish"] data_dict = df.set_index(["sample", "caller", "vtype"]).T.to_dict() plt.ioff() sns.set(style='white') vtypes = sorted(df["vtype"].unique(), reverse=True) callers = sorted(df["caller"].unique()) samples = sorted(df["sample"].unique()) fig, axs = plt.subplots(len(vtypes) * len(callers), len(metrics)) fig.text(.5, .95, title if title else "", horizontalalignment='center', size=14) for vi, vtype in enumerate(vtypes): sns.set_palette(sns.xkcd_palette([colors[vi]])) for ci, caller in enumerate(callers): for j, (metric, label) in enumerate(metrics): cur_plot = axs[vi * len(vtypes) + ci][j] vals, labels = [], [] for sample in samples: cur_data = data_dict[(sample, caller, vtype)] vals.append(cur_data[metric]) labels.append(cur_data[label]) cur_plot.barh(np.arange(len(samples)), vals) all_vals = [] for k, d in data_dict.items(): if k[-1] == vtype: for m in metrics: all_vals.append(d[m[0]]) metric_max = max(all_vals) cur_plot.set_xlim(0, metric_max) pad = 0.1 * metric_max for ai, (val, label) in enumerate(zip(vals, labels)): cur_plot.annotate(label, (pad + (0 if max(vals) > metric_max / 2.0 else max(vals)), ai + 0.35), va='center', size=7) if j == 0: cur_plot.tick_params(axis='y', which='major', labelsize=8) cur_plot.locator_params(nbins=len(samples) + 2, axis="y", tight=True) cur_plot.set_yticklabels(samples, size=8, va="bottom") cur_plot.set_title("%s: %s" % (vtype, caller), fontsize=12, loc="left") else: cur_plot.get_yaxis().set_ticks([]) if ci == len(callers) - 1: cur_plot.tick_params(axis='x', which='major', labelsize=8) cur_plot.get_xaxis().set_major_formatter( FuncFormatter(lambda v, p: "%s%%" % (int(v) if round(v) == v else v))) if vi == len(vtypes) - 1: cur_plot.get_xaxis().set_label_text(metric_labels[metric], size=12) else: cur_plot.get_xaxis().set_ticks([]) cur_plot.spines['bottom'].set_visible(False) cur_plot.spines['left'].set_visible(False) cur_plot.spines['top'].set_visible(False) cur_plot.spines['right'].set_visible(False) x, y = (6, len(vtypes) * len(callers) + 1 * 0.5 * len(samples)) if size is None else size fig.set_size_inches(x, y) fig.tight_layout(rect=(0, 0, 1, 0.95)) plt.subplots_adjust(hspace=0.6) fig.savefig(out_file)
def prepare_and_plot_1_1(dataset_name): all_epsilons = [ 2.0, 1.5, 1.25, 1.0986122886681098, 1.0, 0.75, 0.6931471805599453, 0.5, 0.1, 0.01 ] path_in = os.getcwd() pattern = '^.*/thesis-data-anonymisation/' path = re.search(pattern, path_in).group(0) safepub_path = path + "data/result/safepub_test/1_1/" + dataset_name safe_pub_match = "^norm_result_granularity_eps-" safepub_files = [ file for file in os.listdir(safepub_path) if re.match(safe_pub_match, file) ] sc_path = path + "data/result/sc_test/1_1/" + dataset_name sc_match = "^norm_result_eps-" sc_files = [ file for file in os.listdir(sc_path) if re.match(sc_match, file) ] sc_spec_path = path + "data/result/sc_spec_test/1_1/" + dataset_name sc_spec_match = "^norm_result_eps-" sc_spec_files = [ file for file in os.listdir(sc_spec_path) if re.match(sc_spec_match, file) ] k_file = path + "data/result/k-anonym_test/1_1/" + dataset_name + "/norm_result_k5_suppression.csv" plot_path = path + "data/result/plots/1_1_" + dataset_name + "_spec.jpg" models = [] information_loss = [] epsilons = [] for file in sc_files: epsilon = float(re.split(sc_match + "|" + ".csv", file)[1]) df = pd.read_csv(sc_path + '/' + file) data = list(df['sse']) information_loss += data models += ['MicroDP'] * len(data) epsilons += [epsilon] * len(data) for file in sc_spec_files: epsilon = float(re.split(sc_match + "|" + ".csv", file)[1]) df = pd.read_csv(sc_spec_path + '/' + file) data = list(df['sse']) information_loss += data models += ['MicroDP-800'] * len(data) epsilons += [epsilon] * len(data) for file in safepub_files: epsilon = float(re.split(safe_pub_match + "|" + ".csv", file)[1]) df = pd.read_csv(safepub_path + '/' + file) data = list(df['sse']) information_loss += data models += ['SafePub'] * len(data) epsilons += [epsilon] * len(data) df = pd.read_csv(k_file) information_loss += [list(df['sse'])[0]] * len(all_epsilons) models += ['k-anonymisation'] * len(all_epsilons) epsilons += all_epsilons array = np.array([epsilons, information_loss]).T df = pd.DataFrame(array, columns=['ε', 'Information loss']) df['Model'] = models ax = sns.lineplot(x='ε', y='Information loss', hue='Model', data=df, palette=sns.xkcd_palette([ 'windows blue', 'dark blue', 'amber', 'faded green' ])) ax.set(ylim=(0, 1.05)) plt.show() #plt.savefig(plot_path) plt.clf()
# In[6]: denver_loc = (-104.9903, 39.7392) miami_loc = (-80.2089, 25.7753) denver = daymet.get_daymet_singlepixel(longitude=denver_loc[0], latitude=denver_loc[1], years=[2012, 2013, 2014]) miami = daymet.get_daymet_singlepixel(longitude=miami_loc[0], latitude=miami_loc[1], years=[2012, 2013, 2014]) # In[9]: sns.set_context("talk") fig, ax1 = plt.subplots(1, figsize=(18, 10)) den_15day = denver.rolling(center=False,window=15).mean() ax1.fill_between(den_15day.index, den_15day.tmin, den_15day.tmax, alpha=0.4, lw=0, label='Denver', color=sns.xkcd_palette(['faded green'])[0]) ax1.set_title('Denver vs Miami temps (15 day rolling mean)', fontsize=20) miami_15day = miami.rolling(center=False,window=15).mean() ax1.fill_between(miami_15day.index, miami_15day.tmin, miami_15day.tmax, alpha=0.4, lw=0, label='Miami', color=sns.xkcd_palette(['dusty purple'])[0]) ax1.set_ylabel(u'Temp. (°C)', fontsize=20) fig.tight_layout() plt.legend(fontsize=20)
df.loc[:, 'cap_BATT'] = df.apply( lambda x: x['cap_ELC_DIST'] + x['cap_ELC_CENTRAL'], axis=1) #----------------------------------------------------- # Aesthetics (style + context) # https://seaborn.pydata.org/tutorial/aesthetics.html #----------------------------------------------------- resolution = 1000 # Resolution (DPI - dots per inch) style = 'white' # options: "white", "whitegrid", "dark", "darkgrid", "ticks" context = 'talk' # options "paper", "notebook", "talk", "poster" (smallest -> largest) # Series palette options colorblind_palette = sns.color_palette( 'colorblind') # https://seaborn.pydata.org/tutorial/color_palettes.html xkcd_palette = sns.xkcd_palette( ["royal blue", "tangerine", "greyish", "faded green", "raspberry"]) # https://xkcd.com/color/rgb/ custom_palette = [(0.380, 0.380, 0.380), (0.957, 0.451, 0.125), (.047, 0.149, 0.361), (0.847, 0.000, 0.067)] # Custom palette #----------------------------------------------------- # Plotting Inputs #----------------------------------------------------- # x variables, all lists are expected to the same length x_var = "year" # Need to be columns in DataFrame x_label = "Year (-)" # Note: keep short x_convert = 1.0 # Multiplier to convert to display units x_tick = [] # Ok to leave empty x_lim = [] # Ok to leave empty
import logging from typing import * import seaborn logger = logging.getLogger(__name__) seaborn.set_palette( seaborn.xkcd_palette( ["windows blue", "amber", "faded green", "dusty purple"])) def _init_mapping_ancestors(): colors = ["windows blue", "amber", "faded green", "dusty purple"] ancestors = ["Archaea", "Actinobacteria", "Enterobacterales", "FCB group"] palette = seaborn.xkcd_palette(colors) return {x[0]: x[1] for x in zip(ancestors, palette)} # def _init_mapping_ancestors(): # colors = ["windows blue", "amber", "faded green", "dusty purple"] # ancestors = ["Archaea", "Actinobacteria", "Enterobacterales", "FCB group"] # # color_pal = seaborn.color_palette("colorblind", 6).as_hex() # colors = ','.join(color_pal) # palette = seaborn.color_palette(color_pal) # return {x[0]: x[1] for x in zip(ancestors, palette)} def _init_mapping_verified(): colors = [
def prepare_and_plot_1_1_safepub(dataset_name): all_epsilons = [ 2.0, 1.5, 1.25, 1.0986122886681098, 1.0, 0.75, 0.6931471805599453, 0.5, 0.1, 0.01 ] path_in = os.getcwd() pattern = '^.*/thesis-data-anonymisation/' path = re.search(pattern, path_in).group(0) safepub_path = path + "data/result/safepub_test/1_1/" + dataset_name safe_pub_match = "^norm_result_granularity_eps-" safepub_files = [ file for file in os.listdir(safepub_path) if re.match(safe_pub_match, file) ] k_file = path + "data/result/k-anonym_test/1_1/" + dataset_name + "/norm_result_k5_suppression.csv" plot_path = path + "data/result/plots/1_1_safepub_" + dataset_name + ".jpg" models = [] information_loss = [] epsilons = [] metrics = [] for file in safepub_files: epsilon = float(re.split(safe_pub_match + "|" + ".csv", file)[1]) df = pd.read_csv(safepub_path + '/' + file) data_disc = list(df['discernibility']) information_loss += data_disc metrics += ['Discernibility'] * len(data_disc) models += ['SafePub'] * len(data_disc) epsilons += [epsilon] * len(data_disc) data_ent = list(df['entropy']) information_loss += data_ent metrics += ['Non-uniform entropy'] * len(data_ent) models += ['SafePub'] * len(data_ent) epsilons += [epsilon] * len(data_ent) df = pd.read_csv(k_file) information_loss += [list(df['discernibility'])[0]] * len(all_epsilons) metrics += ['Discernibility'] * len(all_epsilons) models += ['k-anonymisation'] * len(all_epsilons) epsilons += all_epsilons information_loss += [list(df['entropy'])[0]] * len(all_epsilons) metrics += ['Non-uniform entropy'] * len(all_epsilons) models += ['k-anonymisation'] * len(all_epsilons) epsilons += all_epsilons array = np.array([epsilons, information_loss]).T df = pd.DataFrame(array, columns=['ε', 'Information loss']) df['Model'] = models df['Metric'] = metrics ax = sns.lineplot(x='ε', y='Information loss', hue='Model', style='Metric', data=df, palette=sns.xkcd_palette(['amber', 'faded green'])) ax.set(ylim=(0, 1.05)) #plt.show() plt.savefig(plot_path) plt.clf() return
import numpy as np import random from matplotlib import colors import hilbert color_list = ['cyan', 'goldenrod', 'seafoam green', 'light yellow', 'scarlet', 'neon blue', 'barney purple', 'reddish orange', 'lemon', 'cerise', 'light lime green', 'teal blue', 'bubblegum pink', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'black', 'vermillion', 'amber', 'melon', 'purpleish', 'bright light blue', 'strawberry', 'celadon'] n_colors = len(color_list) palette = sns.xkcd_palette(color_list) cmap = colors.ListedColormap(palette) bounds = range(n_colors+1) norm = colors.BoundaryNorm(bounds, cmap.N) def generate_random_color(): idx = random.choice(range(n_colors)) return idx def plot_hilbert_curve(n, ax): locs = hilbert.generate_locations(n) for i in xrange(len(locs)-1): start, finish = locs[i], locs[i+1] xs, ys = zip(start,finish) ax.plot(xs, ys, 'white', alpha=0.4, lw='1')
def _init_mapping_independence_conditions(): colors = ["windows blue", "amber", "faded green"] conditions = ["Random", "Independent", "Fully dependent"] palette = seaborn.xkcd_palette(colors) return {x[0]: x[1] for x in zip(conditions, palette)}
def _init_mapping_archea_bacteria(): colors = ["magenta", "windows blue"] name = ["Archaea", "Bacteria"] palette = seaborn.xkcd_palette(colors) return {x[0]: x[1] for x in zip(name, palette)}
def _init_mapping_stop_codons(): colors = ["windows blue", "amber", "faded green"] conditions = ["TAG", "TGA", "TAA"] palette = seaborn.xkcd_palette(colors) return {x[0]: x[1] for x in zip(conditions, palette)}
def plot_surf_stat_map(coords, faces, stat_map=None, elev=0, azim=0, cmap='coolwarm', threshold=None, bg_map=None, bg_on_stat=False, alpha='auto', darkness=1, vmax=None, symmetric_cbar="auto", figsize=None, labels=None, label_cpal=None, mask=None, mask_lenient=None, **kwargs): import numpy as np import matplotlib.pyplot as plt import matplotlib.tri as tri from mpl_toolkits.mplot3d import Axes3D import seaborn as sns # load mesh and derive axes limits faces = np.array(faces, dtype=int) limits = [coords.min(), coords.max()] # set alpha if in auto mode if alpha == 'auto': if bg_map is None: alpha = .5 else: alpha = 1 # if cmap is given as string, translate to matplotlib cmap if type(cmap) == str: cmap = plt.cm.get_cmap(cmap) # initiate figure and 3d axes if figsize is not None: fig = plt.figure(figsize=figsize) else: fig = plt.figure() ax = fig.add_subplot(111, projection='3d', xlim=limits, ylim=limits) ax.view_init(elev=elev, azim=azim) ax.set_axis_off() # plot mesh without data p3dcollec = ax.plot_trisurf(coords[:, 0], coords[:, 1], coords[:, 2], triangles=faces, linewidth=0., antialiased=False, color='white') # where mask is indices of nodes to include: if mask is not None: cmask = np.zeros(len(coords)) cmask[mask] = 1 cutoff = 2 # include triangles in cortex only if ALL nodes in mask if mask_lenient: # include triangles in cortex if ANY are in mask cutoff = 0 fmask = np.where(cmask[faces].sum(axis=1) > cutoff)[0] # If depth_map and/or stat_map are provided, map these onto the surface # set_facecolors function of Poly3DCollection is used as passing the # facecolors argument to plot_trisurf does not seem to work if bg_map is not None or stat_map is not None: face_colors = np.ones((faces.shape[0], 4)) face_colors[:, :3] = .5*face_colors[:, :3] if bg_map is not None: bg_data = bg_map if bg_data.shape[0] != coords.shape[0]: raise ValueError('The bg_map does not have the same number ' 'of vertices as the mesh.') bg_faces = np.mean(bg_data[faces], axis=1) bg_faces = bg_faces - bg_faces.min() bg_faces = bg_faces / bg_faces.max() bg_faces *= darkness face_colors = plt.cm.gray_r(bg_faces) # modify alpha values of background face_colors[:, 3] = alpha*face_colors[:, 3] if stat_map is not None: stat_map_data = stat_map stat_map_faces = np.mean(stat_map_data[faces], axis=1) # Call _get_plot_stat_map_params to derive symmetric vmin and vmax # And colorbar limits depending on symmetric_cbar settings cbar_vmin, cbar_vmax, vmin, vmax = \ _get_plot_stat_map_params(stat_map_faces, vmax, symmetric_cbar, kwargs) if threshold is not None: kept_indices = np.where(abs(stat_map_faces) >= threshold)[0] stat_map_faces = stat_map_faces - vmin stat_map_faces = stat_map_faces / (vmax-vmin) if bg_on_stat: face_colors[kept_indices] = cmap(stat_map_faces[kept_indices]) * face_colors[kept_indices] else: face_colors[kept_indices] = cmap(stat_map_faces[kept_indices]) else: stat_map_faces = stat_map_faces - vmin stat_map_faces = stat_map_faces / (vmax-vmin) if bg_on_stat: if mask is not None: face_colors[fmask] = cmap(stat_map_faces)[fmask] * face_colors[fmask] else: face_colors = cmap(stat_map_faces) * face_colors else: if mask is not None: face_colors[fmask] = cmap(stat_map_faces)[fmask] * face_colors[fmask] else: face_colors = cmap(stat_map_faces) if labels is not None: ''' labels requires a tuple of label/s, each a list/array of node indices ---------------------------------------------------------------------- color palette for labels if label_cpal is None, outlines will be black if it's a color palette name, a different color for each label will be generated if it's a list of rgb or color names, these will be used valid color names from http://xkcd.com/color/rgb/ ''' if label_cpal is not None: if type(label_cpal) == str: cpal = sns.color_palette(label_cpal, len(labels)) if type(label_cpal) == list: if len(label_cpal) < len(labels): raise ValueError('There are not enough colors in the color list.') try: cpal = sns.color_palette(label_cpal) except: cpal = sns.xkcd_palette(label_cpal) for n_label, label in enumerate(labels): for n_face, face in enumerate(faces): count = len(set(face).intersection(set(label))) if (count > 0) & (count < 3): if label_cpal is None: face_colors[n_face,0:3] = sns.xkcd_palette(["black"])[0] else: face_colors[n_face,0:3] = cpal[n_label] p3dcollec.set_facecolors(face_colors) return fig
experiment = "drosophila-4-rdpg-sbm" run = 2 config = utils.load_config(base_path, experiment, run) sbm_df = utils.load_pickle(base_path, experiment, run, "sbm_master_df") tsbm_df = utils.load_pickle(base_path, experiment, run, "tsbm_df") tsbm_df["sim_ind"] = 0 #%% [markdown] # ### Plot the noise observed in SBM model fitting #%% # Plotting setup} plt.style.use("seaborn-white") sns.set_context("talk", font_scale=1.5) plt_kws = dict(s=75, linewidth=0, legend="brief") sbm_cmap = sns.light_palette("purple", as_cmap=True) rdpg_cmap = sns.xkcd_palette(["grass green"]) # Plot 1 plt.figure(figsize=(22, 12)) sns.scatterplot( data=sbm_df, x="n_params_gmm", y="mse", hue="n_block_try", size="n_components_try", alpha=0.5, palette=sbm_cmap, **plt_kws, ) plt.xlabel("# Params (GMM params for SBMs)") plt.ylabel("MSE")
import itertools # These are the colors. Notice how this is programmed: # You initialize your colors by # colorset = palette() # then you can cycle through the colors: # color = next(colorset) # if you want your set to be reset, just create # a new palette() instance! This way the colors do not interfere. color_names = ['windows blue', "pale red", "faded green", "amber", 'dark green', 'dark fuchsia', 'browny orange', 'puke green', 'dark royal blue', 'dusty purple', 'red orange','dark grey','blue grey', 'bright purple', 'chocolate brown', 'shit', 'pistachio','stone','asparagus','butter'] colors = sns.xkcd_palette(color_names) palette = lambda: itertools.cycle(sns.xkcd_palette(color_names) ) fontsize_labels = 26 # size used in latex document rcParams['text.latex.preamble'] = [r'\usepackage[cmbright]{sfmath}'] rcParams['font.family']= 'sans-serif' rcParams['font.sans-serif']= 'cmbright' rcParams['font.weight'] = "light" rcParams['text.usetex'] = True rcParams['figure.autolayout'] = True rcParams['font.size'] = fontsize_labels rcParams['axes.labelsize'] = fontsize_labels rcParams['xtick.labelsize'] = fontsize_labels rcParams['ytick.labelsize'] = fontsize_labels
import matplotlib matplotlib.rcParams.update({'font.sans-serif' : 'Helvetica', 'axes.labelsize': 10, 'xtick.labelsize' : 6, 'ytick.labelsize' : 6, 'axes.titlesize' : 10}) import matplotlib.pyplot as plt import seaborn as sns color_names = ["windows blue", "amber", "crimson", "faded green", "dusty purple", "greyish"] colors = sns.xkcd_palette(color_names) sns.set(style="white", palette=sns.xkcd_palette(color_names), color_codes = False) # + X_columns = ['total_travel_time', 'total_travel_cost', # 'total_travel_distance', 'cross_bay', 'household_size', 'num_kids', 'cars_per_licensed_drivers', 'gender' ] y_column = data['mode_id'] # -