def render_slp_component_element_values(): with open(FILE_NAME_COMPS, 'r') as f: d = cPickle.load(f) mn = d['mean'] cid = np.amax(np.abs(mn), axis = 1)[:, np.newaxis] gf = GeoField() gf.load('data/pres.mon.mean.nc', 'pres') gf.slice_spatial(None, [20, 89]) mnd = gf.reshape_flat_field(cid) f = render_component_single(mnd[0, :, :], gf.lats, gf.lons, (0, 0.15), None, 'NH Extratropical Components - max values') f.savefig('figs/slp_nh_component_maxima_sameaxis.pdf') f = render_component_single(mnd[0, :, :], gf.lats, gf.lons, None, None, 'NH Extratropical Components - max values') f.savefig('figs/slp_nh_component_maxima.pdf') f = plt.figure() plt.hist(cid, bins = 40) plt.title('Histogram of max values @ grid points across components') plt.xlabel('Maximum value [-]') plt.ylabel('Frequency [-]') f.savefig('figs/slp_nh_compmax_hist.pdf') return f
def render_slp_component_element_values(): with open(FILE_NAME_COMPS, 'r') as f: d = cPickle.load(f) mn = d['mean'] cid = np.amax(np.abs(mn), axis=1)[:, np.newaxis] gf = GeoField() gf.load('data/pres.mon.mean.nc', 'pres') gf.slice_spatial(None, [20, 89]) mnd = gf.reshape_flat_field(cid) f = render_component_single(mnd[0, :, :], gf.lats, gf.lons, (0, 0.15), None, 'NH Extratropical Components - max values') f.savefig('figs/slp_nh_component_maxima_sameaxis.pdf') f = render_component_single(mnd[0, :, :], gf.lats, gf.lons, None, None, 'NH Extratropical Components - max values') f.savefig('figs/slp_nh_component_maxima.pdf') f = plt.figure() plt.hist(cid, bins=40) plt.title('Histogram of max values @ grid points across components') plt.xlabel('Maximum value [-]') plt.ylabel('Frequency [-]') f.savefig('figs/slp_nh_compmax_hist.pdf') return f
def render_slp_components(): with open('results/slp_nh_var_bootstrap_results_b1000.bin', 'r') as f: d = cPickle.load(f) mn = d['mean'] mn_mask = (np.abs(mn) > 1.0 / mn.shape[0]**0.5) mn_thr = mn * mn_mask print np.sum(np.sum(mn_mask, axis= 1) == 0) cid = np.argmax(np.abs(mn) * mn_mask, axis = 1)[:, np.newaxis] + 1 gf = GeoField() gf.load('data/pres.mon.mean.nc', 'pres') gf.slice_spatial(None, [20, 89]) mnd = gf.reshape_flat_field(cid) # plt.figure() # plt.hist(cid, bins = 43) # plt.show() f = render_component_single(mnd[0, :, :], gf.lats, gf.lons, False, None, 'NH Extratropical Components', cmap = plt.get_cmap('gist_ncar')) plt.show()
def render_frequency_prevalence(gf, period, templ): """ The frequency is in samples/year. """ ff = np.zeros((gf.d.shape[1], gf.d.shape[2]), dtype = np.float64) tm = np.arange(0, gf.d.shape[0] / 12.0, 1.0 / 12, dtype = np.float64) for i in range(gf.d.shape[1]): for j in range(gf.d.shape[2]): pg = lombscargle(tm, gf.d[:, i, j].astype(np.float64), np.array([2.0 * np.pi / period])) ff[i,j] = np.sqrt(pg[0] * 4.0 / tm.shape[0]) f = render_component_single(ff, gf.lats, gf.lons, None, None, '%gyr period' % period) f.savefig('figs/%s_%dyr_cycle_prevalence.pdf' % (templ, period))
def render_frequency_prevalence(gf, period, templ): """ The frequency is in samples/year. """ ff = np.zeros((gf.d.shape[1], gf.d.shape[2]), dtype=np.float64) tm = np.arange(0, gf.d.shape[0] / 12.0, 1.0 / 12, dtype=np.float64) for i in range(gf.d.shape[1]): for j in range(gf.d.shape[2]): pg = lombscargle(tm, gf.d[:, i, j].astype(np.float64), np.array([2.0 * np.pi / period])) ff[i, j] = np.sqrt(pg[0] * 4.0 / tm.shape[0]) f = render_component_single(ff, gf.lats, gf.lons, None, None, '%gyr period' % period) f.savefig('figs/%s_%dyr_cycle_prevalence.pdf' % (templ, period))
def plot_slp_model_orders(): with open('results/slp_eigvals_multi_surrogates_nh_var.bin', 'r') as f: d = cPickle.load(f) print(d.keys()) o = d['orders'][:, np.newaxis] print(o.shape) gf = GeoField() gf.load('data/pres.mon.mean.nc', 'pres') gf.slice_spatial(None, [20, 89]) # od = gf.reshape_flat_field(o) f = render_component_single(o[:, 0, :], gf.lats, gf.lons, False, None, 'NH Extratropical Components - AR model order') plt.show()
def render_slp_component_element_values(): with open('results/slp_nh_var_bootstrap_results_b1000_cosweights.bin', 'r') as f: d = cPickle.load(f) mn = d['mean'] cid = np.amax(np.abs(mn), axis = 1)[:, np.newaxis] gf = GeoField() gf.load('data/pres.mon.mean.nc', 'pres') gf.slice_spatial(None, [20, 89]) mnd = gf.reshape_flat_field(cid) f = render_component_single(mnd[0, :, :], gf.lats, gf.lons, False, None, 'NH Extratropical Components - max values') plt.show()
def render_slp_model_orders(): with open(FILE_NAME_EIGS, 'r') as f: d = cPickle.load(f) print(d.keys()) o = d['orders'][:, np.newaxis] print(o.shape) gf = GeoField() gf.load('data/pres.mon.mean.nc', 'pres') gf.slice_spatial(None, [20, 89]) # od = gf.reshape_flat_field(o) f = render_component_single(o[:, 0, :], gf.lats, gf.lons, False, None, 'NH Extratropical Components - AR model order') f.savefig('figs/slp_nh_model_order.pdf')
def render_slp_model_orders(): with open(FILE_NAME_EIGS, 'r') as f: d = cPickle.load(f) print(d.keys()) o = d['orders'][:, np.newaxis] print(o.shape) gf = GeoField() gf.load('data/pres.mon.mean.nc', 'pres') gf.slice_spatial(None, [20, 89]) # od = gf.reshape_flat_field(o) f = render_component_single( o[:, 0, :], gf.lats, gf.lons, False, None, 'NH Extratropical Components - AR model order') f.savefig('figs/slp_nh_model_order.pdf')
dlam = pca_eigvals_gf(d)[:NUM_EIGVALS] print("[%s] Data analysis DONE." % (str(datetime.now()))) # <markdowncell> # **Show the variance of the data (filtered)** # <markdowncell> # **Show a plot of the model orders** # <codecell> mo = sgf.model_orders() plt = render_component_single(mo, gf.lats, gf.lons, plt_name = 'Model orders of AR surrogates') # <codecell> pool = Pool(POOL_SIZE) log = open('geodata_estimate_component_count-%s.log' % datetime.now().strftime('%Y%m%d-%H%M'), 'w') # storage for three types of surrogates slam_ar = np.zeros((NUM_SURR, NUM_EIGVALS)) slam_w1 = np.zeros((NUM_SURR, NUM_EIGVALS)) slam_f = np.zeros((NUM_SURR, NUM_EIGVALS)) surr_completed = 0 # construct the job queue job_list = []
Z = fastcluster.linkage(ytri, method = 'single') print("Plotting dendrogram ...") dendrogram(Z, 7, 'level') max_d = np.amax(Z[:,2]) print("Maximum distance is %g" % max_d) my_d = max_d / 2 cont = True while cont: f = fcluster(Z, my_d, 'distance') print f.shape, my_d if np.amax(f) > 30: my_d = (max_d + my_d) * 0.5 elif np.amax(f) < 10: my_d = my_d - (max_d - my_d) / max_d else: cont = False # now plot the clusters f_grid = np.reshape(f, (num_lats, num_lons)) plt.figure() plt.imshow(f_grid) plt.colorbar() plt.title('Clustering of the data') plt.figure() render_component_single(f_grid, gf.lats, gf.lons, False, None, "Cluster assignment") plt.show()
print("[%s] Data analysis DONE." % (str(datetime.now()))) # <markdowncell> # **Show the variance of the data (filtered)** # <markdowncell> # **Show a plot of the model orders** # <codecell> mo = sgf.model_orders() plt = render_component_single(mo, gf.lats, gf.lons, plt_name='Model orders of AR surrogates') # <codecell> pool = Pool(POOL_SIZE) log = open( 'geodata_estimate_component_count-%s.log' % datetime.now().strftime('%Y%m%d-%H%M'), 'w') # storage for three types of surrogates slam_ar = np.zeros((NUM_SURR, NUM_EIGVALS)) slam_w1 = np.zeros((NUM_SURR, NUM_EIGVALS)) slam_f = np.zeros((NUM_SURR, NUM_EIGVALS)) surr_completed = 0
sgf.d = sgf.sd.copy() log("** WARNING ** Replaced synth model with surrogate model to check false positives." ) # analyze data & obtain eigvals and surrogates log("Computing eigenvalues of dataset ...") d = gf.data() if COSINE_REWEIGHTING: d *= gf.qea_latitude_weights() dlam = pca_eigvals_gf(d)[:NUM_EIGVALS] log("Rendering orders of fitted AR models.") mo = sgf.model_orders() render_component_single(mo, gf.lats, gf.lons, plt_name='Model orders of AR surrogates', fname='%s_ar_model_order%s.png' % (DATA_NAME, SUFFIX)) # construct the job queue log("Constructing pool") pool = Pool(WORKER_COUNT) # construct the surrogates in parallel # we can duplicate the list here without worry as it will be copied into new python processes # thus creating separate copies of sd log("Running parallel generation of surrogates and analysis.") # generate and compute eigenvalues for 20000 surrogates t_start = datetime.now()
if USE_SURROGATE_MODEL: # HACK to replace original data with surrogates gf.d = sgf.sd.copy() sgf.d = sgf.sd.copy() log("** WARNING ** Replaced synth model with surrogate model to check false positives.") # analyze data & obtain eigvals and surrogates log("Computing eigenvalues of dataset ...") d = gf.data() if COSINE_REWEIGHTING: d *= gf.qea_latitude_weights() dlam = pca_eigvals_gf(d)[:NUM_EIGVALS] log("Rendering orders of fitted AR models.") mo = sgf.model_orders() render_component_single(mo, gf.lats, gf.lons, plt_name = 'Model orders of AR surrogates', fname='%s_ar_model_order%s.png' % (DATA_NAME, SUFFIX)) # construct the job queue jobq = Queue() resq = Queue() for i in range(NUM_SURR): jobq.put(1) for i in range(WORKER_COUNT): jobq.put(None) log("Starting workers") workers = [Process(target = compute_surrogate_cov_eigvals, args = (sgf,jobq,resq)) for i in range(WORKER_COUNT)] # construct the surrogates in parallel # we can duplicate the list here without worry as it will be copied into new python processes
# -*- coding: utf-8 -*- """ Created on Thu Mar 1 11:17:39 2012 @author: martin """ from datetime import date from geo_field import GeoField from var_model import VARModel from geo_rendering import render_component_single import matplotlib.pyplot as plt d = GeoField() d.load("data/pres.mon.mean.nc", 'pres') d.transform_to_anomalies() d.normalize_monthly_variance() d.slice_date_range(date(1948, 1, 1), date(2012, 1, 1)) #d.slice_months([12, 1, 2]) d.slice_spatial(None, [-89, 89]) render_component_single(d.d[0, :, :], d.lats, d.lons, False, None, 'SLP anomalies Jan 1948') render_component_single(d.d[-1, :, :], d.lats, d.lons, False, None, 'SLP anomalies Jan 2012') plt.show()