from datetime import datetime, date from surr_geo_field_ar import SurrGeoFieldAR from geo_field import GeoField from multiprocessing import Pool # load netCDF SLP field d = GeoField() d.load("/home/martin/Work/Geo/data/netcdf/pres.mon.mean.nc", 'pres') d.slice_date_range(date(1948, 1, 1), date(2012, 1, 1)) #d.slice_months([12, 1, 2]) d.slice_spatial(None, [-89, 89]) # copy into surrogate field sd = SurrGeoFieldAR() sd.copy_field(d) # create the Pool pool = Pool(4) t1 = datetime.now() sd.prepare_surrogates(pool) print("Prep: elapsed time %s" % str(datetime.now() - t1)) t1 = datetime.now() sd.construct_surrogate() print("Gen: elapsed time %s" % str(datetime.now() - t1)) t1 = datetime.now() sd.construct_surrogate() print("Gen: elapsed time %s" % str(datetime.now() - t1))
# construct a test system S = np.zeros(shape = (20, 50), dtype = np.int32) S[10:18, 25:45] = 1 S[0:3, 6:12] = 2 S[8:15, 2:12] = 3 v, Sr = constructVAR(S, [0.0, 0.4, 0.8, 0.7], [-0.5, 0.5], [0.0, 0.0]) # v, Sr = constructVAR(S, [0.0, 0.001, 0.01], [-0.1, 0.1], [0.00, 0.00], [0.01, 0.01]) ts = v.simulate(200) gf = make_model_geofield(S, ts) # initialize a parallel pool pool = Pool(POOL_SIZE) # replace field with surrogate field sgf = SurrGeoFieldAR() sgf.copy_field(gf) sgf.prepare_surrogates(pool) sgf.construct_surrogate_with_noise() gf = sgf gf.d = gf.surr_data().copy() # # construct "components" from the structural matrix Uopt = np.zeros((len(Sr), np.amax(Sr))) for i in range(Uopt.shape[1]): Uopt[:,i] = np.where(Sr == (i+1), 1.0, 0.0) # remove the first element (it's the driver which is not included in the optimal component) Uopt[np.nonzero(Uopt[:,i])[0][0],i] = 0.0 Uopt[:,i] /= np.sum(Uopt[:,i]**2) ** 0.5 print("Analyzing data ...")
# construct the testing model from a spec S = np.zeros(shape=(20, 50), dtype=np.int32) S[10:18, 25:45] = 1 S[0:3, 6:12] = 2 v, Sr = constructVAR(S, [0.0, 0.8, 0.8], [-0.1, 0.1], [0.0, 0.0]) #v, Sr = constructVAR2(S, [-0.2, 0.2], [0.0, 0.9, 0.9], 0.8) #S = np.zeros(shape = (5, 10), dtype = np.int32) #S[1:4, 0:2] = 1 #S[0:3, 6:9] = 2v, Sr = constructVAR(S, [0.0, 0.191, 0.120], [-0.1, 0.1], [0.00, 0.00], [0.01, 0.01]) ts = v.simulate(768) gf = make_model_geofield(S, ts) sgf = SurrGeoFieldAR() sgf.copy_field(gf) sgf.prepare_surrogates() sgf.construct_surrogate_with_noise() ts2 = sgf.surr_data() plt.figure(figsize=(8, 8)) plt.imshow(S, interpolation='nearest') plt.title('Structural matrix') plt.figure() plt.imshow(v.A, interpolation='nearest') plt.colorbar() plt.title('AR structural') plt.figure()
# print("[%s] Loading SAT geo field..." % (str(datetime.now()))) # gf = load_monthly_sat_all() print("[%s] Field loaded." % (str(datetime.now()))) # <codecell> print gf.d.shape print gf.lons[0], gf.lons[-1] print gf.lats[0], gf.lats[-1] print gf.d.shape[1] * gf.d.shape[2] # <codecell> if USE_SURROGATE_MODEL: pool = Pool(POOL_SIZE) sgf = SurrGeoFieldAR([0, MAX_AR_ORDER], 'sbc') print("Running preparation of surrogates ...") sgf.copy_field(gf) sgf.prepare_surrogates(pool) sgf.construct_surrogate_with_noise() sgf.d = sgf.sd # hack to replace original data with surrogate print("Max AR order is %d ..." % sgf.max_ord) gf = sgf print("Replaced field with surrogate field.") pool.close() del pool print("Analyzing data ...") d = gf.data() if COSINE_REWEIGHTING: d *= gf.qea_latitude_weights()
# <codecell> os.chdir('/home/martin/Projects/ndw-climate/') # load up the monthly SLP geo-field print("[%s] Loading geo field..." % (str(datetime.now()))) gf = load_monthly_data_general("data/hgt.mon.mean.nc", "hgt", date(1948, 1, 1), date(2012, 1, 1), None, None, None, 5) # load up the monthly SLP geo-field if USE_MUVAR: print("[%s] Constructing F2 surrogate ..." % (str(datetime.now()))) sgf = SurrGeoFieldAR() sgf.copy_field(gf) sgf.construct_fourier2_surrogates() sgf.d = sgf.sd.copy() # slide in fourier2 surrogate orig_gf = gf gf = sgf # load up the monthly SLP geo-field print("[%s] Done loading, data hase shape %s." % (str(datetime.now()), gf.d.shape)) # <markdowncell> # **Surrogate construction** #
from datetime import datetime, date from surr_geo_field_ar import SurrGeoFieldAR from geo_field import GeoField from multiprocessing import Pool # load netCDF SLP field d = GeoField() d.load("/home/martin/Work/Geo/data/netcdf/pres.mon.mean.nc", 'pres') d.slice_date_range(date(1948, 1, 1), date(2012, 1, 1)) #d.slice_months([12, 1, 2]) d.slice_spatial(None, [-89, 89]) # copy into surrogate field sd = SurrGeoFieldAR() sd.copy_field(d) # create the Pool pool = Pool(4) t1 = datetime.now() sd.prepare_surrogates(pool) print("Prep: elapsed time %s" % str(datetime.now() - t1)) t1 = datetime.now() sd.construct_surrogate() print("Gen: elapsed time %s" % str(datetime.now() - t1)) t1 = datetime.now() sd.construct_surrogate()
S[0:3, 6:12] = 2 S[8:15, 2:12] = 3 v, Sr = constructVAR(S, [0.0, 0.6, 0.9, 0.7], [0.3, 0.5], [0.0, 0.0]) ts = v.simulate(200) gf = make_model_geofield(S, ts) # initialize a parallel pool pool = Pool(POOL_SIZE) # compute the eigenvalues/eigenvectos of the covariance matrix of Ud, dlam, _ = pca_components_gf(gf.data()) drdims = np.zeros((NUM_EIGS, )) for i in range(NUM_EIGS): drdims[i] = dlam[i] / np.sum(dlam[i:]**2)**0.5 sd = SurrGeoFieldAR([0, 30], 'sbc') sd.copy_field(gf) sd.prepare_surrogates(pool) srdims = np.zeros((NUM_SURR, NUM_EIGS)) # generate and compute eigenvalues for 20000 surrogates t1 = datetime.now() # construct the surrogates in parallel # we can duplicate the list here without worry as it will be copied into new python processes # thus creating separate copies of sd print("Running parallel generation of surrogates and SVD") slam_list = pool.map(compute_surrogate_cov_eigvals, [Ud] * NUM_SURR) # rearrange into numpy array (can I use vstack for this?) for i in range(len(slam_list)):
S[0:3, 6:12] = 2 S[8:15, 2:12] = 3 v, Sr = constructVAR(S, [0.0, 0.6, 0.9, 0.7], [0.3, 0.5], [0.0, 0.0]) ts = v.simulate(200) gf = make_model_geofield(S, ts) # initialize a parallel pool pool = Pool(POOL_SIZE) # compute the eigenvalues/eigenvectos of the covariance matrix of Ud, dlam, _ = pca_components_gf(gf.data()) drdims = np.zeros((NUM_EIGS,)) for i in range(NUM_EIGS): drdims[i] = dlam[i] / np.sum(dlam[i:]**2)**0.5 sd = SurrGeoFieldAR([0, 30], 'sbc') sd.copy_field(gf) sd.prepare_surrogates(pool) srdims = np.zeros((NUM_SURR, NUM_EIGS)) # generate and compute eigenvalues for 20000 surrogates t1 = datetime.now() # construct the surrogates in parallel # we can duplicate the list here without worry as it will be copied into new python processes # thus creating separate copies of sd print("Running parallel generation of surrogates and SVD") slam_list = pool.map(compute_surrogate_cov_eigvals, [Ud] * NUM_SURR) # rearrange into numpy array (can I use vstack for this?) for i in range(len(slam_list)):
# Now we must filter the data in frequency, the data loading has thus been moved here. # <codecell> os.chdir('/home/martin/Projects/ndw-climate/') # load up the monthly SLP geo-field print("[%s] Loading geo field..." % (str(datetime.now()))) gf = load_monthly_data_general("data/hgt.mon.mean.nc", "hgt", date(1948, 1, 1), date(2012, 1, 1), None, None, None, 5) # load up the monthly SLP geo-field if USE_MUVAR: print("[%s] Constructing F2 surrogate ..." % (str(datetime.now()))) sgf = SurrGeoFieldAR() sgf.copy_field(gf) sgf.construct_fourier2_surrogates() sgf.d = sgf.sd.copy() # slide in fourier2 surrogate orig_gf = gf gf = sgf # load up the monthly SLP geo-field print("[%s] Done loading, data hase shape %s." % (str(datetime.now()), gf.d.shape)) # <markdowncell> # **Surrogate construction**
# with open('data/test_gf.bin', 'r') as f: # d = cPickle.load(f) # initialize a parallel pool pool = Pool(POOL_SIZE) # compute the eigenvalues/eigenvectos of the covariance matrix of d = gf.data() if COSINE_REWEIGHTING: d = d * gf.qea_latitude_weights() Ud, dlam, _ = pca_components_gf(d) Ud = Ud[:, :NUM_EIGS] dlam = dlam[:NUM_EIGS] sd = SurrGeoFieldAR([0, 30], 'sbc') sd.copy_field(gf) sd.prepare_surrogates(pool) slam = np.zeros((NUM_SURR, NUM_EIGS)) maxU = np.zeros((NUM_SURR, NUM_EIGS)) # generate and compute eigenvalues for 20000 surrogates t1 = datetime.now() # construct the surrogates in parallel # we can duplicate the list here without worry as it will be copied into new python processes # thus creating separate copies of sd print("Running parallel generation of surrogates and SVD") slam_list = pool.map(compute_surrogate_cov_eigvals, [(sd, Ud)] * NUM_SURR) # rearrange into numpy array (can I use vstack for this?)
# construct the testing model from a spec S = np.zeros(shape = (20, 50), dtype = np.int32) S[10:18, 25:45] = 1 S[0:3, 6:12] = 2 v, Sr = constructVAR(S, [0.0, 0.8, 0.8], [-0.1, 0.1], [0.0, 0.0]) #v, Sr = constructVAR2(S, [-0.2, 0.2], [0.0, 0.9, 0.9], 0.8) #S = np.zeros(shape = (5, 10), dtype = np.int32) #S[1:4, 0:2] = 1 #S[0:3, 6:9] = 2v, Sr = constructVAR(S, [0.0, 0.191, 0.120], [-0.1, 0.1], [0.00, 0.00], [0.01, 0.01]) ts = v.simulate(768) gf = make_model_geofield(S, ts) sgf = SurrGeoFieldAR() sgf.copy_field(gf) sgf.prepare_surrogates() sgf.construct_surrogate_with_noise() ts2 = sgf.surr_data() plt.figure(figsize = (8, 8)) plt.imshow(S, interpolation = 'nearest') plt.title('Structural matrix') plt.figure() plt.imshow(v.A, interpolation = 'nearest') plt.colorbar() plt.title('AR structural') plt.figure()
# construct a test system S = np.zeros(shape=(20, 50), dtype=np.int32) S[10:18, 25:45] = 1 S[0:3, 6:12] = 2 S[8:15, 2:12] = 3 v, Sr = constructVAR(S, [0.0, 0.4, 0.8, 0.7], [-0.5, 0.5], [0.0, 0.0]) # v, Sr = constructVAR(S, [0.0, 0.001, 0.01], [-0.1, 0.1], [0.00, 0.00], [0.01, 0.01]) ts = v.simulate(200) gf = make_model_geofield(S, ts) # initialize a parallel pool pool = Pool(POOL_SIZE) # replace field with surrogate field sgf = SurrGeoFieldAR() sgf.copy_field(gf) sgf.prepare_surrogates(pool) sgf.construct_surrogate_with_noise() gf = sgf gf.d = gf.surr_data().copy() # # construct "components" from the structural matrix Uopt = np.zeros((len(Sr), np.amax(Sr))) for i in range(Uopt.shape[1]): Uopt[:, i] = np.where(Sr == (i + 1), 1.0, 0.0) # remove the first element (it's the driver which is not included in the optimal component) Uopt[np.nonzero(Uopt[:, i])[0][0], i] = 0.0 Uopt[:, i] /= np.sum(Uopt[:, i]**2)**0.5 print("Analyzing data ...")