def test_ndim_2_facet_wrap(self): p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap( 'cut', 'clarity') nrow, ncol = p.facets.nrow, p.facets.ncol self.assertEqual(nrow, 7) self.assertEqual(ncol, 6) self.assertEqual(p.facets.ndim, 40)
def test_ndim_2_facet_wrap_subplots(self): p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap( 'cut', 'clarity') fig, subplots = p.make_facets() nrow, ncol = subplots.shape self.assertEqual(nrow, 7) self.assertEqual(ncol, 6)
def plot_bin_dists(df, bin_def="distance_bin <= 500"): plt.rcParams['figure.figsize'] = np.array([16, 12]) * 0.65 p = gp.ggplot(gp.aes(x='R2'), data=df.query(bin_def)) p = p + gp.geom_histogram( fill='coral') + gp.facet_wrap("distance_bin") + gp.theme_seaborn( context='talk') + gp.ggtitle(bin_def) return p
def _ggplot(df, out_file): """Plot faceted items with ggplot wrapper on top of matplotlib. XXX Not yet functional """ import ggplot as gg df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]] df["category"] = [cat_labels[x] for x in df["category"]] df["caller"] = [caller_labels.get(x, None) for x in df["caller"]] p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar() + gg.facet_wrap("variant.type", "category") + gg.theme_seaborn()) gg.ggsave(p, out_file)
def plotHistogramMeans(hist, fileName): num_clust = hist.shape[0] IDS = np.mat(range(0, num_clust)) IDS = IDS.reshape(num_clust, 1) histD = np.concatenate((IDS, hist), axis=1) Data = pd.DataFrame(histD, columns=['ID'] + range(0, hist.shape[1])) Melted = pd.melt(Data, id_vars=['ID']) pv = ggplot.ggplot( ggplot.aes(x='variable', y='value'), data=Melted) + ggplot.geom_line() + ggplot.facet_wrap("ID") print "Saving mean histograms" ggplot.ggsave(pv, './IMG/' + fileName)
def plot_after_transmission_results(data, path_names): # import input data for tranmission analysis var_and_val = pd.DataFrame(columns=['x', 'Variable'], index=range(0, 12)) plot_lm = pd.DataFrame( columns=['x', 'Life Months', 'Scenario', 'Variable'], index=range(0, 24)) data_in = pd.read_excel( os.path.join(path_names['transmission'], 'Input files', 'transmission_rate_multiplier_required_inputs.xlsx')) col = [ 'Yearly incidence in MSM', 'Number of HIV uninfected individuals (HRG size)', 'Number of HIV infected individuals in primary cohort at t=0' ] col_adj = ['Incidence', 'Uninfected', 'Infected'] base_val = [0.009, 2960000, 136400] for i in range(len(col)): idx = data_in.loc[data_in.loc[:, col[i]] != base_val[i], col[i]].index.values[0] var_and_val.loc[idx - 1:idx + 3 - 1, 'x'] = data_in.loc[idx:idx + 3, col[i]].values var_and_val.loc[idx - 1:idx + 3 - 1, 'Variable'] = col_adj[i] row_idx = -2 var_idx = [-1, -1, -1] for var in data: if 'HIV+' in var: var_idx[2] += 1 plot_lm.loc[row_idx:row_idx + 1, 'x'] = var_and_val.loc[ var_and_val['Variable'] == col_adj[2], 'x'].values[var_idx[2]] plot_lm.loc[row_idx:row_idx + 1, 'Variable'] = var_and_val.loc[ var_and_val['Variable'] == col_adj[2], 'Variable'].values[var_idx[2]] plot_lm.loc[ row_idx:row_idx + 1, 'Life Months'] = data[var]['popstats'].loc[:, 'LMs_'].values plot_lm.loc[ row_idx:row_idx + 1, 'Scenario'] = data[var]['popstats'].loc[:, 'RUN_NAME_'].values elif 'HIV-' in var: var_idx[1] += 1 plot_lm.loc[row_idx:row_idx + 1, 'x'] = var_and_val.loc[ var_and_val['Variable'] == col_adj[1], 'x'].values[var_idx[1]] plot_lm.loc[row_idx:row_idx + 1, 'Variable'] = var_and_val.loc[ var_and_val['Variable'] == col_adj[1], 'Variable'].values[var_idx[1]] plot_lm.loc[ row_idx:row_idx + 1, 'Life Months'] = data[var]['popstats'].loc[:, 'LMs_'].values plot_lm.loc[ row_idx:row_idx + 1, 'Scenario'] = data[var]['popstats'].loc[:, 'RUN_NAME_'].values elif 'Incidence' in var: var_idx[0] += 1 plot_lm.loc[row_idx:row_idx + 1, 'x'] = var_and_val.loc[ var_and_val['Variable'] == col_adj[0], 'x'].values[var_idx[0]] plot_lm.loc[row_idx:row_idx + 1, 'Variable'] = var_and_val.loc[ var_and_val['Variable'] == col_adj[0], 'Variable'].values[var_idx[0]] plot_lm.loc[ row_idx:row_idx + 1, 'Life Months'] = data[var]['popstats'].loc[:, 'LMs_'].values plot_lm.loc[ row_idx:row_idx + 1, 'Scenario'] = data[var]['popstats'].loc[:, 'RUN_NAME_'].values row_idx += 2 # plot save_path = os.path.join(path_names['transmission'], r'Input files', r'Plots for final runs') if not os.path.exists(save_path): os.makedirs(save_path) (ggplot(aes(x='x', y='Life Months', color='Scenario'), plot_lm) + geom_line() + facet_wrap('Variable', scales='free')).save( os.path.join(save_path, 'Comparison of ')) return
import pandas as pd import numpy as np # from source import view_and_print_output import ggplot as gg df = pd.DataFrame() for num_layers, num_nodes in [(2, 50), (2, 100), (2, 150), (2, 200), (4, 50), (4, 100), (4, 150), (4, 200)]: file_coarse = '../../data/coarse_lambda_dropout_' + str(num_layers) + '_' + str(num_nodes) + '.txt' newdata = pd.read_csv(file_coarse) newdata = newdata.sort_values(by='validation error', ascending=True) newdata['lambda'] = np.log10(newdata['lambda']) newdata['index'] = (np.arange(len(newdata), dtype='float')/len(newdata))**3 newdata['config'] = str(num_layers * 100 + num_nodes) + ' ' + str(num_layers) + ' ' + str(num_nodes) df = df.append(newdata) print(df.sort_values(by='validation error', ascending=False).head(20)) p = gg.ggplot(gg.aes(x='lambda', y='dropout prob', color='index'), data=df) + \ gg.geom_point() + \ gg.xlab('lambda') + \ gg.ylab('dropout prob') + \ gg.scale_x_continuous(limits=(-5, 2)) + \ gg.facet_wrap('config') print(p) # Conclusion: ignore dropout
"""Plot target variable as time series.""" import get_data from ggplot import aes, geom_line, facet_wrap, ggplot if __name__ == "__main__": df = get_data.get_all_data() p = ggplot(df, aes('datetime', 'cap', group='date')) + \ geom_line(alpha=0.2) + \ facet_wrap('name') p.save('../output/time_series.pdf')
def plot_transmission_results(tx_results, percentage_decline, save_path, path_names): #%% what are inputs? # transmission results # There'll be a folder called 'Runs prepared for ...' # all the folders inside that folder will have a CEPAC results folder. # tx_data is a dictionary and will have two keys, 'monthly' and 'popstats' # 'monthly' key will only have primary transmissions data tx_data = deepcopy(tx_results) t = 120 total_var = 3 total_val = 4 # percentage decline # this is also dictionary of percentage decline values for each folder # having cepac results # save_path eaxact folder where you want to save your images # path_names will have paths to transmissions and sensitivity directories #%% plot percentage decline # geberate an environment object first # lets go for line plot data_plot = pd.DataFrame( columns=['x', 'Percentage decline', 'Transmissions', 'Variable'], index=range(0, total_var * total_val)) data_in = pd.read_excel( os.path.join(path_names['transmission'], 'Input files', 'transmission_rate_multiplier_required_inputs.xlsx')) col = [ 'Incidence rate per 100 PY specific to high-risk group 1', 'HIV uninfected individuals in high-risk group 1', 'HIV infected individuals in high-risk group 1' ] col_adj = ['Incidence', 'Uninfected', 'Infected'] data_in[col[0]] = data_in[col[0]].round(1) base_val = [np.float64(0.9), 2960000, 136400] y1_values = {col[0]: [], col[1]: [], col[2]: []} for var in percentage_decline: if 'HIV+' in var: y1_values[col[2]].append(percentage_decline[var]) elif 'HIV-' in var: y1_values[col[1]].append(percentage_decline[var]) elif 'Incidence' in var: y1_values[col[0]].append(percentage_decline[var]) for i in range(len(col)): idx = data_in.loc[data_in.loc[:, col[i]] != base_val[i], col[i]].index.values[0] data_plot.loc[idx - 1:idx + 3 - 1, 'x'] = data_in.loc[idx:idx + 3, col[i]].values data_plot.loc[idx - 1:idx + 3 - 1, 'Variable'] = col_adj[i] data_plot.loc[idx - 1:idx + 3 - 1, 'Percentage decline'] = y1_values[col[i]] # plot df_float = data_plot.loc[data_plot.loc[:, 'Percentage decline'] <= 200, :] (ggplot(aes(x='x', y='Percentage decline'), df_float) + geom_line() + facet_wrap('Variable', scales='free')).save( os.path.join(save_path, 'Percentage decline')) del df_float #%% visualizing transmissions # index = range(time * number of values for each variable * number of variables) def set_abc(run, var_idx, var_name, var_value_idx): # set variable names data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1, 'Variable'] = var_name # set variable value data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1, 'Value'] = data_plot.loc[ data_plot.loc[:, 'Variable'] == var_name, 'x'].values[var_value_idx] if 'RunA' in run: data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1, 'RunA tx'] = tx_data[var]['monthly'][run][ 'transmissions'].iloc[0:t].values elif 'RunB' in run: data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1, 'RunB tx'] = tx_data[var]['monthly'][run][ 'transmissions'].iloc[0:t].values elif 'RunC' in run: data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1, 'RunC tx'] = tx_data[var]['monthly'][run][ 'transmissions'].iloc[0:t].values data_plot_tx = pd.DataFrame( index=range(t * total_var * total_val), columns=['Variable', 'Value', 'RunA tx', 'RunB tx', 'RunC tx']) var_idx = -1 var_val_idx = [-1, -1, -1] for var in tx_data: var_idx += 1 if 'HIV+' in var: var_val_idx[2] += 1 var_name = col_adj[2] for run in tx_data[var]['monthly']: set_abc(run, var_idx, var_name, var_val_idx[2]) elif 'HIV-' in var: var_val_idx[1] += 1 var_name = col_adj[1] for run in tx_data[var]['monthly']: set_abc(run, var_idx, var_name, var_val_idx[1]) elif 'Incidence' in var: var_val_idx[0] += 1 var_name = col_adj[0] for run in tx_data[var]['monthly']: set_abc(run, var_idx, var_name, var_val_idx[0]) else: continue data_plot_tx['t'] = 0 t_float = -1 for row in data_plot_tx.index: if t_float == t - 1: t_float = -1 t_float += 1 data_plot_tx.loc[row, 't'] = t_float #%% plots for individual runs run_col = ['RunA tx', 'RunB tx', 'RunC tx'] inci = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == 'Incidence', :] inf = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == 'Infected', :] uninf = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == 'Uninfected', :] for i in run_col: (ggplot(aes(x='t', y=i, color='Value'), data_plot_tx) + geom_line() + facet_wrap('Variable', scales='free')).save( os.path.join( save_path, str(i + r'_transmissions for all variable all values'))) (ggplot(aes(x='t', y=i), inci) + geom_line() + facet_wrap('Variable', 'Value', scales='free')).save( os.path.join( save_path, str(i + r'_plots for individual values of incidence'))) (ggplot(aes(x='t', y=i), inf) + geom_line() + facet_wrap('Variable', 'Value', scales='free')).save( os.path.join( save_path, str(i + r'_plots for individual values of infected population'))) (ggplot(aes(x='t', y=i), uninf) + geom_line() + facet_wrap('Variable', 'Value', scales='free')).save( os.path.join( save_path, str(i + '_plots for individual values of uninfected population'))) #%% compare runs ABC data_plot_abc = {} for var in col_adj: float_df = pd.DataFrame(index=range(0, t * total_var * total_val), columns=['t', 'Value', 'Transmissions', 'Run']) insert_idx = -1 for val in data_plot.loc[data_plot.loc[:, 'Variable'] == var, 'x']: var_df = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == var, :] var_df = var_df.reset_index(drop=True) var_val_df = var_df.loc[var_df.loc[:, 'Value'] == val, :] var_val_df = var_val_df.reset_index(drop=True) for c in ['RunA tx', 'RunB tx', 'RunC tx']: insert_idx += 1 float_df.loc[insert_idx * t:(insert_idx * t) + t - 1, 'Run'] = c float_df.loc[insert_idx * t:(insert_idx * t) + t - 1, 'Transmissions'] = var_val_df.loc[:, c].values float_df.loc[insert_idx * t:(insert_idx * t) + t - 1, 'Run'] = c float_df.loc[insert_idx * t:(insert_idx * t) + t - 1, 'Value'] = val float_df.loc[insert_idx * t:(insert_idx * t) + t - 1, 't'] = np.arange(t) data_plot_abc[var] = float_df.dropna() (ggplot(aes(x='t', y='Transmissions', color='Run'), float_df) + geom_line() + facet_wrap('Value', scales='free') + ggtitle(var)).save( os.path.join( save_path, str(var + '_comparison of transmissions in runs ABC'))) #%% compare runs BC for var in data_plot_abc: float_df = data_plot_abc[var].loc[ data_plot_abc[var].loc[:, 'Run'] != 'RunA tx', :] (ggplot(aes(x='t', y='Transmissions', color='Run'), float_df) + geom_line(alpha=0.2) + facet_wrap('Value', scales='free') + stat_smooth(method='loess', se=False) + ggtitle(var)).save( os.path.join(save_path, str(var + '_comparison of transmissions in runs BC'))) return
import sys from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() species = 'no2' df = pd.read_csv(r'.\charts\background_data_melted.csv', index_col='idx', dtype={ 'timestamp': 'str', 'vidperiod': 'str', 'type': 'str', 'param': 'str', 'value': 'float64' }) print(df[:10]) df['timestamp'] = pd.to_datetime(df['timestamp'], format="%Y-%m-%d %H:%M:%S") #plots plt1 = gg.ggplot(df, gg.aes( x='timestamp', y='value', color='type')) + gg.geom_line() + gg.xlab( 'Time') + gg.ylab('Concentration') + gg.theme_bw() + gg.ylim( 0, 100) + gg.facet_wrap('vidperiod', scales='free') + gg.ggtitle( 'Regional background comparison {0}'.format(species)) #+gg.theme(axis_text_x=gg.element_text(angle=20)) plt1.save(filename=r'.\charts\background_{0}_ggtest_{1}.png'.format( species, dt.datetime.today().strftime('%Y%b%d')), width=None, height=None, dpi=300)
geom_point(color='steelblue') + xlab('Engine Displacement') + ylab('Average MPG') + ggtitle('Gasoline cars')) #%% step 10 grouped_by_year = vehicles_non_hybrid.groupby(['year']) avg_grouped_by_year = grouped_by_year['displ', 'comb08'].agg([np.mean]) #%% step 11 avg_grouped_by_year['year'] = avg_grouped_by_year.index melted_avg_grouped_by_year = pd.melt(avg_grouped_by_year, id_vars='year') from ggplot import facet_wrap p = ggplot(aes(x='year', y='value', color='variable_0'), data=melted_avg_grouped_by_year) p + geom_point() + facet_wrap('variable_0') #%% Section Investigating the makes and models of automobiles with Python # ------ step 1, 2 ------------------ pd.unique(vehicles_non_hybrid.cylinders) vehicles_non_hybrid.cylinders = vehicles_non_hybrid.cylinders.astype('float') pd.unique(vehicles_non_hybrid.cylinders) vehicles_non_hybrid_4 = vehicles_non_hybrid[(vehicles_non_hybrid.cylinders == 4.0)] #%% step 3 import matplotlib.pyplot as plt %matplotlib inline grouped_by_year_4_cylinder = vehicles_non_hybrid_4.groupby(['year']).make.nunique() fig = grouped_by_year_4_cylinder.plot()
def test_facet_wrap_ncol(self): p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap('cut', ncol=2) nrow, ncol = p.facets.nrow, p.facets.ncol self.assertEqual(nrow, 3) self.assertEqual(ncol, 2)
tile(w_from_figure_wh_ratio, norm(data)), '%s-layer-acts-%s-%s-(i=%s)' % (img_desc, layer, show_tuple_tight(data.shape), batch_i), ) conv_layers = filter(lambda (layer, acts): len(acts.data.shape) == 4, net.blobs.items()) fc_layers = filter(lambda (layer, acts): len(acts.data.shape) != 4, net.blobs.items()) # Plot conv acts for layer, acts in conv_layers: plot_conv_acts(layer, acts) # Plot fc acts df = pd.concat([ pd.DataFrame({'act': acts.data[batch_i], 'layer': layer}).reset_index() for layer, acts in fc_layers ]) plot_gg(gg_layer( gg.ggplot(df, gg.aes(y='act', x='index')), gg.geom_point(alpha=.5), gg.facet_wrap(x='layer', scales='free'), gg.ggtitle('%s layer acts fc/prob points (i=%s)' % (img_desc, batch_i)), )) plot_gg(gg_layer( gg.ggplot(df, gg.aes(x='act')), gg.geom_histogram(bins=25, size=0), gg.facet_wrap(x='layer', scales='free'), gg.scale_y_log(), gg.ylim(low=0.1), gg.ggtitle('%s layer acts fc/prob histo (i=%s)' % (img_desc, batch_i)), ))
# 安装ggplot,需要numpy, scipy支持,安装过程容易报错 # 升级pip, 以免安装.whl失败。注意 .whl文件名不能修改,不要使用迅雷下载 # pip install --upgrade setuptools # 安装numpy,scipy,windows下需要编译,可以在http://www.lfd.uci.edu/~gohlke/pythonlibs/ 下载编译包.whl安装。 # pip install .whl # windows下需要安装VC++ 14.0,http://landinghub.visualstudio.com/visual-cpp-build-tools ,在该网站下载 Visual C++ Build Tools 2015 # 安装ggplot # pip install -i https://pypi.tuna.tsinghua.edu.cn/simple ggplot # 绘制散点图 import ggplot as gp meat = gp.meat # 使用ggplot自带的测试数据 p = gp.ggplot( gp.aes( x='date', # 指定x轴数据 y='beef', # 指定y轴数据 color='beef'), # 指定填充颜色 data=meat) # 指定数据集 p + gp.geom_line() # 绘制折线图 p + gp.geom_point() # 绘制散点图 # 绘制分面图 gp.ggplot(gp.aes(x='carat', y='price', color='color'), data=gp.diamonds) + gp.geom_point() + gp.facet_wrap('cut') # 绘制直方图 gp.ggplot(gp.aes(x='price'), data=gp.diamonds) + gp.geom_histogram()
for x in repeatedKnnResults], columns = ['p', 'k', 'cvAccuracy', 'testAccuracy']) ggdata = pandas.concat( [DataFrame({'log10(p)' : log10(knnResultsSimplified.p), 'k' : knnResultsSimplified.k.apply(int), 'type' : 'cv', 'Accuracy' : knnResultsSimplified.cvAccuracy}), DataFrame({'log10(p)' : log10(knnResultsSimplified.p), 'k' : knnResultsSimplified.k.apply(int), 'type' : 'test', 'Accuracy' : knnResultsSimplified.testAccuracy})], axis = 0 ) ggobj = ggplot.ggplot( data = ggdata, aesthetics = ggplot.aes(x='log10(p)', y='Accuracy', color='type', group='type', linetype='type') ) ggobj += ggplot.theme_bw() # ggobj += ggplot.scale_x_log() ggobj += ggplot.geom_point(alpha=0.6) ggobj += ggplot.stat_smooth() ggobj += ggplot.facet_wrap('k') print ggobj
def test_ndim_2_facet_wrap(self): p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap('cut', 'clarity') nrow, ncol = p.facets.nrow, p.facets.ncol self.assertEqual(nrow, 7) self.assertEqual(ncol, 6) self.assertEqual(p.facets.ndim, 40)
for x in repeatedKnnResults], columns = ['p', 'k', 'cvAccuracy', 'testAccuracy']) ggdata = pandas.concat( [DataFrame({'log10(p)' : log10(knnResultsSimplified.p), 'k' : knnResultsSimplified.k.apply(int), 'type' : 'cv', 'Accuracy' : knnResultsSimplified.cvAccuracy}), DataFrame({'log10(p)' : log10(knnResultsSimplified.p), 'k' : knnResultsSimplified.k.apply(int), 'type' : 'test', 'Accuracy' : knnResultsSimplified.testAccuracy})], axis = 0 ) ggobj = ggplot.ggplot( data = ggdata, aesthetics = ggplot.aes(x='log10(p)', y='Accuracy', color='type', group='type', linetype='type') ) ggobj += ggplot.theme_bw() # ggobj += ggplot.scale_x_log() ggobj += ggplot.geom_point(alpha=0.6) ggobj += ggplot.stat_smooth() ggobj += ggplot.facet_wrap('k') print(ggobj)
ax.legend(["Entries", "Exits"]) ax.set_ylabel("Entries/exits per hour (1e6 is a million)") ax.set_xlabel("Hour (0 is midnight, 12 is noon, 23 is 11pm)") ax.set_xlim(0, 23) turnstile_rain = turnstile_weather[["rain", "ENTRIESn_hourly", "EXITSn_hourly"]] turnstile_rain["rain2"] = np.where(turnstile_rain["rain"] == 1, "raining", "not raining") turnstile_rain.groupby("rain2").describe() turnstile_rain = turnstile_weather[["rain", "ENTRIESn_hourly", "EXITSn_hourly"]] turnstile_rain["ENTRIESn_hourly_log10"] = np.log10(turnstile_rain["ENTRIESn_hourly"] + 1) turnstile_rain["rain2"] = np.where(turnstile_rain["rain"] == 1, "raining", "not raining") set1 = brewer2mpl.get_map('Set1', 'qualitative', 3).mpl_colors plot = gg.ggplot(turnstile_rain, gg.aes(x="ENTRIESn_hourly_log10", color="rain2")) + \ gg.geom_density() + \ gg.facet_wrap("rain2", scales="fixed") + \ gg.scale_colour_manual(values=set1) + \ gg.xlab("log10(entries per hour)") + \ gg.ylab("Number of turnstiles") + \ gg.ggtitle("Entries per hour whilst raining and not raining") plot np.random.seed(42) data = pd.Series(np.random.normal(loc=180, scale=40, size=600)) data.hist() p = turnstile_weather["ENTRIESn_hourly"].hist() pylab.suptitle("Entries per hour across all stations") pylab.xlabel("Entries per hour") pylab.ylabel("Number of occurrences")
"insignificant": coefficients[feature]["unsignificant"] } df = pd.DataFrame.from_dict(values_dict, orient='index') df = df.transpose() df = pd.melt(df) df['feature'] = feature dfs_to_concat.append(df) master_df = pd.concat(dfs_to_concat) # histogram p = ggplot(aes(x='value', fill='variable', color='variable'), data=master_df) p += geom_histogram(bins=25, alpha=0.5) p += scale_x_continuous(limits=(-25, 25)) p += ggtitle("sarimax coefficient magnitude distribution") p += facet_wrap("feature", ncol=3, scales="free") p += labs(x=" ", y=" ") # visuals t = theme_gray() t._rcParams['font.size'] = 10 t._rcParams['font.family'] = 'monospace' p += t p.save("arima_1/" + "histogram.png") # boxplot p = ggplot(aes(x='variable', y='value'), data=master_df) p += geom_boxplot() p += scale_y_continuous(limits=(-25, 25)) p += ggtitle("sarimax coefficient magnitudes")
]] turnstile_rain["rain2"] = np.where(turnstile_rain["rain"] == 1, "raining", "not raining") turnstile_rain.groupby("rain2").describe() turnstile_rain = turnstile_weather[[ "rain", "ENTRIESn_hourly", "EXITSn_hourly" ]] turnstile_rain["ENTRIESn_hourly_log10"] = np.log10( turnstile_rain["ENTRIESn_hourly"] + 1) turnstile_rain["rain2"] = np.where(turnstile_rain["rain"] == 1, "raining", "not raining") set1 = brewer2mpl.get_map('Set1', 'qualitative', 3).mpl_colors plot = gg.ggplot(turnstile_rain, gg.aes(x="ENTRIESn_hourly_log10", color="rain2")) + \ gg.geom_density() + \ gg.facet_wrap("rain2", scales="fixed") + \ gg.scale_colour_manual(values=set1) + \ gg.xlab("log10(entries per hour)") + \ gg.ylab("Number of turnstiles") + \ gg.ggtitle("Entries per hour whilst raining and not raining") plot np.random.seed(42) data = pd.Series(np.random.normal(loc=180, scale=40, size=600)) data.hist() p = turnstile_weather["ENTRIESn_hourly"].hist() pylab.suptitle("Entries per hour across all stations") pylab.xlabel("Entries per hour") pylab.ylabel("Number of occurrences")
def test_ndim_2_facet_wrap_subplots(self): p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap('cut', 'clarity') fig, subplots = p.make_facets() nrow, ncol = subplots.shape self.assertEqual(nrow, 7) self.assertEqual(ncol, 6)
#split percentiles into different charts, all sites #plt1 = gg.ggplot(df_along, gg.aes(x='n_passes',y='value',color='site_str'))+gg.geom_point()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.theme_bw()+gg.xlim(0,100)+gg.facet_wrap('yparam',scales='free_y') #plt1.save(filename = r'..\charts\bias_{0}.png'.format(c['name']), width=None, height=None, dpi=200) #n_segments plt2 = gg.ggplot( df_a, gg.aes(x='n_passes', y='n_segments', color='site_str') ) + gg.geom_line() + gg.xlab('n, number drive periods') + gg.ylab( 'Sample size (number of drive patterns)') + gg.theme_bw() + gg.xlim( 0, 35) + gg.ylim(0, 2000) plt2.save(filename=r'..\charts\n_segments_{0}_{1}.png'.format( c['name'], dtstamp), width=None, height=None, dpi=200) #combine percentiles, split sites plt3 = gg.ggplot( df_along, gg.aes(x='n_passes', y='value', color='yparam') ) + gg.geom_line() + gg.xlab('n, number of drive periods') + gg.ylab( 'Sample error (%)') + gg.theme_bw() + gg.xlim(0, 35) + gg.ylim( -100, 100) + gg.geom_hline( y=25, linetype="dashed", color="gray") + gg.geom_hline( y=-25, linetype="dashed", color="gray") + gg.geom_vline( x=[10, 15], linetype="dashed", color="gray") + gg.scale_color_manual( values=colors) + gg.facet_wrap('site_str') plt3.save(filename=r'..\charts\percentiles_{0}_{1}.png'.format( c['name'], dtstamp), width=None, height=None, dpi=200)
#total-based dftmp = df[['n_sub']+brks[:5]].melt(id_vars=['n_sub'],value_vars=brks[:5], var_name = 'stat',value_name = 'value') dftmp['method']=['(Total-Expected Total)/Expected Total']*dftmp['n_sub'].size df_stacked = dftmp #enhancement-based dftmp = df[['n_sub']+brks[5:10]].melt(id_vars=['n_sub'],value_vars=brks[5:10], var_name = 'stat',value_name = 'value') dftmp['method']=['(Enhanc-Expected Enhanc)/Expected Enhanc']*dftmp['n_sub'].size df_stacked = df_stacked.append(dftmp) #enhancements + full sample background dftmp = df[['n_sub']+brks[10:]].melt(id_vars=['n_sub'],value_vars=brks[10:], var_name = 'stat',value_name = 'value') dftmp['method']=['(Enhanc+Expected Backgr-Expected Total)/Expected Total']*dftmp['n_sub'].size df_stacked = df_stacked.append(dftmp) df_stacked['percentile']=['{0}th%'.format(a[1:3]) for a in df_stacked['stat']] #plots #compare all 3 plt1 = gg.ggplot(df_stacked, gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.theme_bw()+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.facet_wrap('method')+gg.ggtitle('Bias comparison {0}'.format(title)) plt1.save(filename = r'..\charts\drivebias_laqn_{0}.png'.format(species), width=None, height=None, dpi=300) #plot total alone for presenation plt2 = gg.ggplot(df_stacked[df_stacked['method']=='(Total-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title)) t = gg.theme_bw() t._rcParams['font.size']=16 plt2 = plt2+t plt2.save(filename = r'..\charts\drivebias_laqn_{0}_total.png'.format(species), width=None, height=None, dpi=300) #plot enhancement alone for presenation plt3 = gg.ggplot(df_stacked[df_stacked['method']=='(Enhanc+Expected Backgr-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title)) t = gg.theme_bw() t._rcParams['font.size']=16 plt3 = plt3+t plt3.save(filename = r'..\charts\drivebias_laqn_{0}_enhanc.png'.format(species), width=None, height=None, dpi=300)
def plotHistogramMeans(hist,fileName): num_clust = hist.shape[0] IDS = np.mat(range(0,num_clust)) IDS = IDS.reshape(num_clust,1) histD = np.concatenate((IDS,hist),axis=1) Data = pd.DataFrame(histD,columns = ['ID']+range(0,hist.shape[1])) Melted = pd.melt(Data,id_vars=['ID']) pv = ggplot.ggplot( ggplot.aes(x='variable',y='value'),data=Melted) + ggplot.geom_line() + ggplot.facet_wrap("ID") print "Saving mean histograms" ggplot.ggsave(pv,'./IMG/'+fileName)
import pandas as pd meat = gp.meat p=gp.ggplot(gp.aes(x='date',y='beef'),data=meat)+gp.geom_point(color='red')+gp.ggtitle(u'散点图') print (p) p=gp.ggplot(gp.aes(x='date',y='beef'),data=meat)+gp.geom_line(color='blue')+gp.ggtitle(u'折线图') print (p) p=gp.ggplot(gp.aes(x='date',y='beef'),data=meat)+gp.geom_point(color='red')+gp.geom_line(color='blue')+gp.ggtitle(u'散点图+折线图') print (p) # 将想要表达的变量组成一列 meat_lng = pd.melt(meat[['date','beef','pork','broilers']],id_vars='date') # meat_lng包含了date,value(变量的值组成的列),variable(变量的名称组成的列) p = gp.ggplot(gp.aes(x='date',y='value',colour='variable'),data=meat_lng)+\ gp.geom_point()+gp.geom_line() print (p) meat_lng = pd.melt(meat[['date','beef','pork','broilers']],id_vars='date') p = gp.ggplot(gp.aes(x='date',y='value',colour='variable'),data=meat_lng)+gp.geom_point()+gp.facet_wrap('variable') print (p) p = gp.ggplot(gp.aes(x='beef'),data=meat)+gp.geom_histogram() print (p) meat_lng = pd.melt(meat[['date','beef','pork']],id_vars='date') p = gp.ggplot(gp.aes(x='value'),data=meat_lng)+gp.facet_wrap('variable')+gp.geom_histogram() print (p)