def displacement_plot(centered, limits=None, style=None): u"""Draws nice displacement plots using ggplot2. params: centered (pd.DataFrame): needs cX, cY, Object, Frame columns, probably produced by calling center() above limits (real): Sets the limits of the scales to a square window showing ±limits on each axis. style (Iterable): Collection of strings. Recognized values are 'theme-bw' (which uses theme_bw instead of theme_seaborn) and 'no-terminal-dot' (which does not label the end of tracks which terminate early). Returns: g (gg.ggplot): Plot object """ style = {} if style is None else style centered['Object'] = centered['Object'].map(str) centered = centered.sort(['Frame', 'Object']) g = (gg.ggplot(centered, gg.aes(x='cX', y='cY', color='Object')) + gg.geom_path(size=0.3)) g += gg.theme_bw() # if 'theme-bw' in style else gg.theme_seaborn() if limits: g = g + gg.ylim(-limits, limits) + gg.xlim(-limits, limits) if 'no-terminal-dot' not in style: max_frame = centered['Frame'].max() endframe = centered.groupby('Object')['Frame'].max() endframe = endframe[endframe != max_frame].reset_index() endframe = endframe.merge(centered, on=['Object', 'Frame']) # we should check if endframe is empty before adding it: # https://github.com/yhat/ggplot/issues/425 if not endframe.empty: g += gg.geom_point(data=endframe, color='black', size=1) return g
def get_plot(X, ids, cluster_dct): new_X = [] new_ids = [] for (i, nid) in enumerate(ids): if nid in cluster_dct.keys(): new_X.append(X[i, :]) new_ids.append(nid) X = np.array(new_X) ids = np.array(new_ids) print(X.shape, ids.shape) feat_cols = ['pixel' + str(i) for i in range(X.shape[1])] df = pd.DataFrame(X, columns=feat_cols) n_sne = 480000 rndperm = ids # print(len(rndperm), rndperm) # rndperm_o = np.random.permutation(df.shape[0]) # time_start = time.time() tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300) # tsne_results = tsne.fit_transform(df.loc[rndperm[:n_sne],feat_cols].values) tsne_results = tsne.fit_transform(df.values) df_tsne = df.copy() df_tsne['x-tsne'] = tsne_results[:, 0] df_tsne['y-tsne'] = tsne_results[:, 1] # print(df_tsne) print(max(tsne_results[:, 0]), min(tsne_results[:, 0]), max(tsne_results[:, 1]), min(tsne_results[:, 1])) kmeans = KMeans(n_clusters=10, random_state=0).fit(tsne_results) # ids = ids[rndperm[:n_sne]] # print(ids[0], cluster_dct[ids[0]]) labels = [cluster_dct[i] for i in ids] y = np.array(labels) print("y", y, y.shape) # y = np.array(kmeans.labels_) df_tsne['label'] = y df_tsne['label'] = df_tsne['label'].apply(lambda i: str(i)) chart = ggplot( df_tsne, aes(x='x-tsne', y='y-tsne', color='label') ) \ + geom_point(size=5,alpha=0.5) \ + theme_bw()\ + ggtitle("tSNE dimensions of steam graph embeddings") # ggsave() print(chart)
def _setup_ggplot(self): try: from ggplot import theme_bw t = theme_bw() for k, v in t.get_rcParams().iteritems(): mpl.rcParams[k] = v def plot_post_hook(): for ax in self.figure.axes: t.post_plot_callback(ax) self._post_plot_hook = plot_post_hook except ImportError: pass except AttributeError: pass
def plotAlignmentStat(input, output): """plot Alignment summary using ggplot""" df = pd.read_csv(input, thousands=",") # replace % with '' and convert the type to float #df.replace('%', '', regex=True) print df.dtypes # convert to numeric #df1=df.apply(pd.to_numeric, args=('coerce',)) # Get certain rows print df df = df.iloc[[2, 4, 5], ] #df = df.ix[['Uniquely mapped reads %', 'Number of reads mapped to multiple loci %', 'Reads unmapped: too short %']] dfm = pd.melt(df, id_vars=['category'], var_name='sampleName', value_name='Value') print dfm #from ggplot import * #import pandas as pd #df = pd.DataFrame({"x":[1,2,3,4], "y":[1,3,4,2]}) #ggplot(aes(x="x", weight="y"), df) + geom_bar() #ggplot(diamonds, aes(x='price', fill='cut')) + geom_histogram() + theme_bw() + scale_color_brewer(type='qual') from ggplot import ggplot, geom_bar, aes, theme_bw, ggtitle, coord_flip, geom_histogram #,scale_y_continuous,coord_flip p = ggplot(dfm, aes(x='sampleName', weight='Value', fill='category')) + geom_bar() + theme_bw() + ggtitle( "Alignment Summary stats") + coord_flip( ) # + scale_y_continuous(labels='comma #p = ggplot(dfm, aes(x='sampleName', weight='Value', fill='category')) + geom_bar(position = "stack", stat='identity') + theme_bw() + ggtitle("Alignment Summary stats") + coord_flip()# + scale_y_continuous(labels='comma') + coord_flip() #p = ggplot(df, aes(x = "category", y = "value", fill = "variable")) + \ #geom_bar(stat="bar", labels=df["category"].tolist()) + \ #theme(axis_text_x = element_text(angle=90)) dirname, filename = os.path.split(output) print dirname print filename p.save(output) #ggsave(plot=p, filename=filename, path=dirname) return
#total-based dftmp = df[['n_sub']+brks[:5]].melt(id_vars=['n_sub'],value_vars=brks[:5], var_name = 'stat',value_name = 'value') dftmp['method']=['(Total-Expected Total)/Expected Total']*dftmp['n_sub'].size df_stacked = dftmp #enhancement-based dftmp = df[['n_sub']+brks[5:10]].melt(id_vars=['n_sub'],value_vars=brks[5:10], var_name = 'stat',value_name = 'value') dftmp['method']=['(Enhanc-Expected Enhanc)/Expected Enhanc']*dftmp['n_sub'].size df_stacked = df_stacked.append(dftmp) #enhancements + full sample background dftmp = df[['n_sub']+brks[10:]].melt(id_vars=['n_sub'],value_vars=brks[10:], var_name = 'stat',value_name = 'value') dftmp['method']=['(Enhanc+Expected Backgr-Expected Total)/Expected Total']*dftmp['n_sub'].size df_stacked = df_stacked.append(dftmp) df_stacked['percentile']=['{0}th%'.format(a[1:3]) for a in df_stacked['stat']] #plots #compare all 3 plt1 = gg.ggplot(df_stacked, gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.theme_bw()+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.facet_wrap('method')+gg.ggtitle('Bias comparison {0}'.format(title)) plt1.save(filename = r'..\charts\drivebias_laqn_{0}.png'.format(species), width=None, height=None, dpi=300) #plot total alone for presenation plt2 = gg.ggplot(df_stacked[df_stacked['method']=='(Total-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title)) t = gg.theme_bw() t._rcParams['font.size']=16 plt2 = plt2+t plt2.save(filename = r'..\charts\drivebias_laqn_{0}_total.png'.format(species), width=None, height=None, dpi=300) #plot enhancement alone for presenation plt3 = gg.ggplot(df_stacked[df_stacked['method']=='(Enhanc+Expected Backgr-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title)) t = gg.theme_bw() t._rcParams['font.size']=16 plt3 = plt3+t plt3.save(filename = r'..\charts\drivebias_laqn_{0}_enhanc.png'.format(species), width=None, height=None, dpi=300)
) group by pod_id_location """) qry_job = bqclient.query(qry_str, location='EU', job_config=job_config) #save result as dataframe df = qry_job.to_dataframe() df_long = df.melt(id_vars=['pod_str', 'pod_idx'], value_vars=['p05', 'p25', 'med', 'p75', 'p95'], var_name='yparam', value_name='value') #plots #plt1 = gg.ggplot(df, gg.aes(x='date_UTC',y='no2_ppb'))+gg.geom_line()+gg.xlab('Time')+gg.ylab('NO2 (ppb)')+gg.theme_bw()+gg.facet_wrap('pod_id_location',scales='free_y') #plt1.save(filename = r'.\charts\ulezpodts.png', width=None, height=None, dpi=200) plt2 = gg.ggplot(df_long, gg.aes( x='pod_str', y='value', color='yparam')) + gg.geom_point() + gg.xlab( 'pod') + gg.ylab('NO2 (as % of median)') + gg.theme_bw() + gg.theme( figure_size=(12, 6)) + gg.scale_x_discrete() plt2.save(filename=r'.\charts\ulezpodvar.png', width=10, height=6, dpi=200) #repeat for mobile data using segid instead of podid where N = 10 and N = 40 #repeat for stationary data at mobile times qry_str = (""" with cte0 as ( --all data, ULEZ pods with 6000 hrs select date_UTC, a.pod_id_location, no2_ppb from AQMesh.NO2_scaled_hightimeres_ppb_20180901_20190630 a join AQMesh.NO2_site_metadata_v2_1_20180901_20190630 b on a.pod_id_location=b.pod_id_location where ULEZ = true and no2_ppb <> -999 and a.pod_id_location in --limit to pods with at least 6000 hours
(vcfdf['TestBias']=='Pass') & (vcfdf['CHROM']==reference) ]['Pi'])) return testwindows # Generate new dataframe with analyses performed per window if options.graphics == True: print "Analysing by "+ str(windowsize) +"sliding windows and generating plots" windowed_df = pd.DataFrame({'window':sorted(list(set(vcfdf['window']))), 'MaxMinor':windowMax(sorted(list(set(vcfdf['window'])))), 'Pi':windowPi(sorted(list(set(vcfdf['window']))))}) # Now try and plot graph p_MaxMinor = gg.ggplot(gg.aes('window', 'MaxMinor'),data=windowed_df) +gg.geom_point() +gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")", y="Minor Variant Frequency (%)") +gg.ggtitle(vcfoutput + "\n Valid Minor Variant Sites :" + str(len(minorvar))) # Plot Nucleotide Diversity (Pi) along genome p_pi =gg.ggplot(gg.aes('window', 'Pi'),data=windowed_df) +gg.geom_point() +gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")", y="Mean nucleotide diversity (" + u"\u03c0" +")") +gg.scale_y_continuous(expand=(0,0),limits=(0, windowed_df['Pi'].max(axis=0)+0.001)) +gg.ggtitle(vcfoutput + "\n Genome-wide Mean Nucleotide Diversity (" +u"\u03c0"+ ") :" +str(round(gw_Pi,6))) #p_pi # Facetted plot (still not sorted y axes labels yet) windowed_df_melt = pd.melt(windowed_df, id_vars=['window']) p_combi = gg.ggplot(gg.aes('window', 'value',colour='variable'),data=windowed_df_melt) p_combi = p_combi + gg.geom_point(colour='variable') + gg.facet_grid('variable',scales='free_y')+gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")") # Print graphs to .png p_combi.save(vcfinput + ".MinorVar_combo.png") p_MaxMinor.save(vcfinput + ".MinorVar.png")
import sys from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() species = 'no2' df = pd.read_csv(r'.\charts\background_data_melted.csv', index_col='idx', dtype={ 'timestamp': 'str', 'vidperiod': 'str', 'type': 'str', 'param': 'str', 'value': 'float64' }) print(df[:10]) df['timestamp'] = pd.to_datetime(df['timestamp'], format="%Y-%m-%d %H:%M:%S") #plots plt1 = gg.ggplot(df, gg.aes( x='timestamp', y='value', color='type')) + gg.geom_line() + gg.xlab( 'Time') + gg.ylab('Concentration') + gg.theme_bw() + gg.ylim( 0, 100) + gg.facet_wrap('vidperiod', scales='free') + gg.ggtitle( 'Regional background comparison {0}'.format(species)) #+gg.theme(axis_text_x=gg.element_text(angle=20)) plt1.save(filename=r'.\charts\background_{0}_ggtest_{1}.png'.format( species, dt.datetime.today().strftime('%Y%b%d')), width=None, height=None, dpi=300)
for x in repeatedKnnResults], columns = ['p', 'k', 'cvAccuracy', 'testAccuracy']) ggdata = pandas.concat( [DataFrame({'log10(p)' : log10(knnResultsSimplified.p), 'k' : knnResultsSimplified.k.apply(int), 'type' : 'cv', 'Accuracy' : knnResultsSimplified.cvAccuracy}), DataFrame({'log10(p)' : log10(knnResultsSimplified.p), 'k' : knnResultsSimplified.k.apply(int), 'type' : 'test', 'Accuracy' : knnResultsSimplified.testAccuracy})], axis = 0 ) ggobj = ggplot.ggplot( data = ggdata, aesthetics = ggplot.aes(x='log10(p)', y='Accuracy', color='type', group='type', linetype='type') ) ggobj += ggplot.theme_bw() # ggobj += ggplot.scale_x_log() ggobj += ggplot.geom_point(alpha=0.6) ggobj += ggplot.stat_smooth() ggobj += ggplot.facet_wrap('k') print(ggobj)
def firms_dynamics_plot(decision): data = pd.read_csv(os.path.join( parameters.OUTPUT_PATH, "temp_general_firms_pop_%s_decision_%s_time_%s.txt" % (parameters.pop_redutor, decision, parameters.final_Time)), sep=",", header=None, decimal=",").astype(float) # renaming the collunms names data.columns = [ 'time', 'total_firms', 'average_output', 'average_age', 'average_size', 'new_firms', 'exit_firms', 'max_size', 'total_effort', 'average_effort' ] #logical test to control the process of burn the initial if parameters.time_to_cut_plots > 0: data = data.loc[( data['time']).astype(int) >= parameters.time_to_cut_plots, :] # variable to add in the plot title title_pop_val = float(parameters.pop_redutor) * 100 # create a list of a years to plot list_of_years_division = list( range(int(data['time'].min()), int(data['time'].max()), 12)) + [data['time'].max() + 1] list_of_years = [int(i / 12) for i in list_of_years_division] # graph paramter variables dpi_var_plot = 700 width_var_plot = 15 height_var_plot = 10 ############################################################################################################### # plotting AGENTS UTILITY # Total firms plot_data = gg.ggplot(data, gg.aes('time', 'total_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years) +\ gg.ggtitle('Total firms') + gg.xlab('Years') + gg.ylab('Total of Firms')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_total_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_total_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_total_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Average of output plot_data = gg.ggplot(data, gg.aes('time', 'average_output')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Average of output') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_average_output_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_output_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_output_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Average of age plot_data = gg.ggplot(data, gg.aes('time', 'average_age')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Average of age of firms') + gg.xlab('Years') + gg.ylab('Age of Firms')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_average_age_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_age_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_age_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Average of size plot_data = gg.ggplot(data, gg.aes('time', 'average_size')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Average of size of firms') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_average_size_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_size_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_size_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # number of new firms plot_data = gg.ggplot(data, gg.aes('time', 'new_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Number of new firms') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_number_of_new_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_number_of_new_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_number_of_new_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Number of firms out plot_data = gg.ggplot(data, gg.aes('time', 'exit_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Number of firms out') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_number_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_number_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_number_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Average and max size of firms dat_merged = pd.concat([ data.iloc[:, data.columns == 'average_effort'], data.iloc[:, data.columns == 'total_effort'] ], axis=1) plot_data = dat_merged.plot( title='Average and maximum effort of employees') plot_data.set_xlabel('Years') plot_data.set_ylabel('Values units of effort') plot_data.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plot_data.set_xticks(list_of_years_division) plot_data.set_xticklabels(list_of_years) plot_data.set_axis_bgcolor('w') fig = plot_data.get_figure() fig.set_size_inches(width_var_plot, height_var_plot) # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join( parameters.OUTPUT_PATH, ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot fig.savefig(os.path.join( parameters.OUTPUT_PATH, ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), dpi=dpi_var_plot) dat_merged = pd.concat([ data.iloc[:, data.columns == 'average_size'], data.iloc[:, data.columns == 'max_size'] ], axis=1) plot_data = dat_merged.plot(title='Average and maximum size firms') plot_data.set_xlabel('Years') plot_data.set_ylabel('Number of employees') plot_data.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plot_data.set_xticks(list_of_years_division) plot_data.set_xticklabels(list_of_years) plot_data.set_axis_bgcolor('w') fig = plot_data.get_figure() fig.set_size_inches(width_var_plot, height_var_plot) # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join( parameters.OUTPUT_PATH, ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot fig.savefig(os.path.join( parameters.OUTPUT_PATH, ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), dpi=dpi_var_plot)
df_along = df_a.melt(id_vars=['site_str', 'n_passes'], value_vars=['p05', 'p25', 'p50', 'p75', 'p95'], var_name='yparam', value_name='value') print(c['name']) #print(df_a) #plots #split percentiles into different charts, all sites #plt1 = gg.ggplot(df_along, gg.aes(x='n_passes',y='value',color='site_str'))+gg.geom_point()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.theme_bw()+gg.xlim(0,100)+gg.facet_wrap('yparam',scales='free_y') #plt1.save(filename = r'..\charts\bias_{0}.png'.format(c['name']), width=None, height=None, dpi=200) #n_segments plt2 = gg.ggplot( df_a, gg.aes(x='n_passes', y='n_segments', color='site_str') ) + gg.geom_line() + gg.xlab('n, number drive periods') + gg.ylab( 'Sample size (number of drive patterns)') + gg.theme_bw() + gg.xlim( 0, 35) + gg.ylim(0, 2000) plt2.save(filename=r'..\charts\n_segments_{0}_{1}.png'.format( c['name'], dtstamp), width=None, height=None, dpi=200) #combine percentiles, split sites plt3 = gg.ggplot( df_along, gg.aes(x='n_passes', y='value', color='yparam') ) + gg.geom_line() + gg.xlab('n, number of drive periods') + gg.ylab( 'Sample error (%)') + gg.theme_bw() + gg.xlim(0, 35) + gg.ylim( -100, 100) + gg.geom_hline( y=25, linetype="dashed", color="gray") + gg.geom_hline( y=-25, linetype="dashed", color="gray") + gg.geom_vline( x=[10, 15], linetype="dashed",
for x in repeatedKnnResults], columns = ['p', 'k', 'cvAccuracy', 'testAccuracy']) ggdata = pandas.concat( [DataFrame({'log10(p)' : log10(knnResultsSimplified.p), 'k' : knnResultsSimplified.k.apply(int), 'type' : 'cv', 'Accuracy' : knnResultsSimplified.cvAccuracy}), DataFrame({'log10(p)' : log10(knnResultsSimplified.p), 'k' : knnResultsSimplified.k.apply(int), 'type' : 'test', 'Accuracy' : knnResultsSimplified.testAccuracy})], axis = 0 ) ggobj = ggplot.ggplot( data = ggdata, aesthetics = ggplot.aes(x='log10(p)', y='Accuracy', color='type', group='type', linetype='type') ) ggobj += ggplot.theme_bw() # ggobj += ggplot.scale_x_log() ggobj += ggplot.geom_point(alpha=0.6) ggobj += ggplot.stat_smooth() ggobj += ggplot.facet_wrap('k') print ggobj