def estimate_components_meng(d): """ Compute components using the method of Meng. """ U, _, _ = pca_components_gf(d) C = extract_sparse_components(U, SPCA_SPARSITY, NUM_COMPONENTS, U) return C
def estimate_components_orthomax(d): """ Compute the PCA/FA components based on the input data d as returned by GeoField bootstrap constructor. """ U, s, _ = pca_components_gf(d) U = U[:, :NUM_COMPONENTS] if not ROTATE_NORMALIZED: U *= s[np.newaxis, :NUM_COMPONENTS] Ur, _, _ = orthomax(U, gamma = GAMMA, norm_rows=True) Ur /= np.sum(Ur**2, axis = 0) ** 0.5 return Ur
def estimate_components_orthomax(d): """ Compute the PCA/FA components based on the input data d as returned by GeoField bootstrap constructor. """ U, s, _ = pca_components_gf(d) U = U[:, :NUM_COMPONENTS] if not ROTATE_NORMALIZED: U *= s[np.newaxis, :NUM_COMPONENTS] Ur, _, _ = orthomax(U, gamma=GAMMA, norm_rows=True) Ur /= np.sum(Ur**2, axis=0)**0.5 return Ur
def estimate_components_ica(d): """ Compute the ICA based on the input data d. """ U, s, Vt = pca_components_gf(d) U = U[:, :NUM_COMPONENTS] V = np.transpose(Vt) V = V[:, :NUM_COMPONENTS] f = FastICANode(whitened = True, max_it = 10000, g = 'tanh', fine_g = 'tanh', max_it_fine = 1000) f.execute(V) P = f.get_projmatrix() Ur = np.dot(U, P) Ur /= np.sum(Ur**2, axis = 0) ** 0.5 return Ur
def estimate_components_ica(d): """ Compute the ICA based on the input data d. """ U, s, Vt = pca_components_gf(d, True) U = U[:, :NUM_COMPONENTS] V = np.transpose(Vt) V = V[:, :NUM_COMPONENTS] f = FastICANode(whitened = True, max_it = 10000, g = 'tanh', fine_g = 'tanh', max_it_fine = 1000) Vr = f.execute(V) P = f.get_projmatrix() Ur = np.dot(U, P) Ur /= np.sum(Ur**2, axis = 0) ** 0.5 return Ur
def compute_surrogate_cov_eigvals(x): sd, U = x # sd.construct_surrogate_with_noise() sd.construct_white_noise_surrogates() # sd.construct_fourier1_surrogates() d = sd.surr_data() if COSINE_REWEIGHTING: d = d * sd.qea_latitude_weights() Ur, sr, _ = pca_components_gf(d) # perm, sf = match_components_munkres(U, Ur) # Ur = Ur[:, perm[:NUM_EIGS]] # Ur *= sf # return sr[perm[:NUM_EIGS]] return sr[:NUM_EIGS], np.amax(np.abs(Ur[:, :NUM_EIGS]), axis = 0)
def estimate_components_orthomax(d): """ Compute the PCA/FA components based on the input data d as returned by GeoField bootstrap constructor. """ U, s, _ = pca_components_gf(d) U = U[:, :NUM_COMPONENTS] if not ROTATE_NORMALIZED: U *= s[np.newaxis, :NUM_COMPONENTS] Ur, _, iters = orthomax(U, rtol = np.finfo(np.float32).eps ** 0.5, gamma = GAMMA, maxiter = 500, norm_rows = ROTATE_NORM_ROWS) Ur /= np.sum(Ur**2, axis = 0) ** 0.5 if iters >= 499: print('Warning: max iters reached.') return None else: return Ur
def estimate_components_orthomax(d): """ Compute the PCA/FA components based on the input data d as returned by GeoField bootstrap constructor. """ try: U, s, _ = pca_components_gf(d) U = U[:, :NUM_COMPONENTS] Ur, T, iters = orthomax(U, rtol = np.finfo(np.float32).eps ** 0.5, gamma = GAMMA, maxiter = 500) if iters >= 499: return None else: return Ur, T except LinAlgError as e: print("**LINALG ERROR** code: %d text : %s" % (e.errno, e.strerror)) except: print("**UNEXPECTED ERROR** %s" % sys.exc_info()[0])
def estimate_components_orthomax(d): """ Compute the PCA/FA components based on the input data d as returned by GeoField bootstrap constructor. """ U, s, _ = pca_components_gf(d) U = U[:, :NUM_COMPONENTS] if not ROTATE_NORMALIZED: U *= s[np.newaxis, :NUM_COMPONENTS] Ur, _, iters = orthomax(U, rtol=np.finfo(np.float32).eps**0.5, gamma=GAMMA, maxiter=500, norm_rows=ROTATE_NORM_ROWS) Ur /= np.sum(Ur**2, axis=0)**0.5 if iters >= 499: print('Warning: max iters reached.') return None else: return Ur
def compute_lno_sample_components(x): gf, Urd, i, j = x b = gf.data() b = np.vstack([b[:i, ...], b[j:, ...]]) U, _, _ = pca_components_gf(b) Ur, _, _ = orthomax(U[:, :NUM_COMPONENTS]) # compute closeness of components C = np.dot(Ur.T, Urd) # find optimal matching of components m = Munkres() match = m.compute(1.0 - np.abs(C)) perm = np.zeros((NUM_COMPONENTS, ), dtype=np.int) for i in range(len(match)): m_i = match[i] perm[m_i[0]] = m_i[1] # flip the sign in the matched boostrap component if the correlation was negative Ur[m_i[1]] = -Ur[m_i[1]] if C[m_i[0], m_i[1]] < 0.0 else Ur[m_i[1]] # reorder the bootstrap components according to the best matching Ur = Ur[:, perm] return Ur
def compute_lno_sample_components(x): gf, Urd, i, j = x b = gf.data() b = np.vstack([b[:i,...], b[j:,...]]) U, _, _ = pca_components_gf(b) Ur, _, _ = orthomax(U[:, :NUM_COMPONENTS]) # compute closeness of components C = np.dot(Ur.T, Urd) # find optimal matching of components m = Munkres() match = m.compute(1.0 - np.abs(C)) perm = np.zeros((NUM_COMPONENTS,), dtype = np.int) for i in range(len(match)): m_i = match[i] perm[m_i[0]] = m_i[1] # flip the sign in the matched boostrap component if the correlation was negative Ur[m_i[1]] = - Ur[m_i[1]] if C[m_i[0], m_i[1]] < 0.0 else Ur[m_i[1]] # reorder the bootstrap components according to the best matching Ur = Ur[:, perm] return Ur
sgf.construct_surrogate_with_noise() gf = sgf gf.d = gf.surr_data().copy() # # construct "components" from the structural matrix Uopt = np.zeros((len(Sr), np.amax(Sr))) for i in range(Uopt.shape[1]): Uopt[:,i] = np.where(Sr == (i+1), 1.0, 0.0) # remove the first element (it's the driver which is not included in the optimal component) Uopt[np.nonzero(Uopt[:,i])[0][0],i] = 0.0 Uopt[:,i] /= np.sum(Uopt[:,i]**2) ** 0.5 print("Analyzing data ...") # compute the eigenvalues and eigenvectors of the (spatial) covariance matrix Ud, sd, Vtd = pca_components_gf(gf.data()) Ud = Ud[:, :NUM_COMPONENTS] if not ROTATE_NORMALIZED: Ud *= sd[np.newaxis, :NUM_COMPONENTS] # estimate the components Ur = COMPONENT_ESTIMATOR(gf.data()) print("Running bootstrap analysis [%d samples]" % NUM_BOOTSTRAPS) # initialize maximal and minimal boostraps max_comp = np.abs(Ur.copy()) min_comp = np.abs(Ur.copy()) mean_comp = np.zeros_like(Ur) var_comp = np.zeros_like(Ur)
#gf.slice_spatial(None, [20, 87]) # northern hemisphere, extratropical gf.slice_spatial(None, [-88, 88]) #gf.slice_months([12, 1, 2]) #S = np.zeros(shape = (5, 10), dtype = np.int32) #S[1:4, 0:2] = 1 #S[0:3, 6:9] = 2 #v, Sr = constructVAR(S, [0.0, 0.191, 0.120], [-0.1, 0.1], [0.00, 0.00], [0.01, 0.01]) #ts = v.simulate(768) #gf = make_model_geofield(S, ts) # initialize a parallel pool pool = Pool(POOL_SIZE) # compute components for data Ud, sd, Vtd = pca_components_gf(gf.data()) Ud = Ud[:, :NUM_COMPONENTS] Ur, _, its = orthomax(Ud) print("Finished after %d iterations." % its) t_start = datetime.now() LNO_COUNT = len(gf.tm) // LNO_PAR #LNO_COUNT = 4 print("Running leave one out analysis [%d samples] at %s" % (LNO_COUNT, str(t_start))) # initialize maximal and minimal boostraps EXTREMA_MEMORY = math.ceil(DISCARD_RATE * LNO_COUNT) max_comp = np.tile(np.abs(Ur.copy()), (EXTREMA_MEMORY + BULK_STEP, 1, 1)) min_comp = np.tile(np.abs(Ur.copy()), (EXTREMA_MEMORY + BULK_STEP, 1, 1))
def estimate_components_tpca(d): """ Compute spatial PCA components. """ U, _, _ = pca_components_gf(d, False) return U[:, :NUM_COMPONENTS]
#sgf.copy_field(gf) #sgf.prepare_surrogates(pool) #mo = sgf.model_orders() #render_component_single(mo, gf.lats, gf.lons, plt_name = 'Model orders of AR surrogates', # fname='%s_ar_model_order%s.png' % (DATA_NAME, SUFFIX), # cbticks = np.arange(0,np.amax(mo)+1,2)) #pool.close() #del pool log("Analyzing data ...") d = gf.data() if COSINE_REWEIGHTING: d *= gf.qea_latitude_weights() # note: s2 is not S from USV, it is already squared and scaled to represent variance Ud, s2, Vt = pca_components_gf(d) s_orig = ((Vt.shape[1] - 1) * s2) ** 0.5 du = np.reshape(d, (768, d.shape[1]*d.shape[2])).transpose() dm = du - np.mean(du, axis=1)[:, np.newaxis] log("**DEBUG**: reconstruction check, diff from original SVD %g" % np.sum( (np.dot(np.dot(Ud, np.diag(s_orig)), Vt) - dm)**2)) Ud = Ud[:, :NUM_COMPONENTS] Vt = Vt[:NUM_COMPONENTS, :] s2n = s2[:NUM_COMPONENTS] s_orign = s_orig[:NUM_COMPONENTS] log("Total variance %g explained by selected components %g." % (np.sum(s2n), np.sum(s2n) / np.sum(s2))) # estimate the components and their variance Ur, Rot = COMPONENT_ESTIMATOR(d)
print("Running preparation of surrogates ...") sgf.copy_field(gf) sgf.prepare_surrogates(pool) sgf.construct_surrogate_with_noise() sgf.d = sgf.sd # hack to replace original data with surrogate print("Max AR order is %d ..." % sgf.max_ord) gf = sgf print("Replaced field with surrogate field.") pool.close() del pool print("Analyzing data ...") d = gf.data() if COSINE_REWEIGHTING: d *= gf.qea_latitude_weights() Ud, sd, Vtd = pca_components_gf(d) Ud = Ud[:, :NUM_COMPONENTS] if not ROTATE_NORMALIZED: Ud *= sd[np.newaxis, :NUM_COMPONENTS] # estimate the components Ur = COMPONENT_ESTIMATOR(d) print("DONE.") # <codecell> print(np.sum(sd[:NUM_COMPONENTS]) / np.sum(sd)) print(1.0 * NUM_COMPONENTS / len(sd)) # <codecell>
print("Estimate PCA components script version 1.0") S = np.zeros(shape=(20, 50), dtype=np.int32) S[10:18, 25:45] = 1 S[0:3, 6:12] = 2 S[8:15, 2:12] = 3 v, Sr = constructVAR(S, [0.0, 0.6, 0.9, 0.7], [0.3, 0.5], [0.0, 0.0]) ts = v.simulate(200) gf = make_model_geofield(S, ts) # initialize a parallel pool pool = Pool(POOL_SIZE) # compute the eigenvalues/eigenvectos of the covariance matrix of Ud, dlam, _ = pca_components_gf(gf.data()) drdims = np.zeros((NUM_EIGS, )) for i in range(NUM_EIGS): drdims[i] = dlam[i] / np.sum(dlam[i:]**2)**0.5 sd = SurrGeoFieldAR([0, 30], 'sbc') sd.copy_field(gf) sd.prepare_surrogates(pool) srdims = np.zeros((NUM_SURR, NUM_EIGS)) # generate and compute eigenvalues for 20000 surrogates t1 = datetime.now() # construct the surrogates in parallel # we can duplicate the list here without worry as it will be copied into new python processes # thus creating separate copies of sd
print("Estimate PCA components script version 1.0") S = np.zeros(shape = (20, 50), dtype = np.int32) S[10:18, 25:45] = 1 S[0:3, 6:12] = 2 S[8:15, 2:12] = 3 v, Sr = constructVAR(S, [0.0, 0.6, 0.9, 0.7], [0.3, 0.5], [0.0, 0.0]) ts = v.simulate(200) gf = make_model_geofield(S, ts) # initialize a parallel pool pool = Pool(POOL_SIZE) # compute the eigenvalues/eigenvectos of the covariance matrix of Ud, dlam, _ = pca_components_gf(gf.data()) drdims = np.zeros((NUM_EIGS,)) for i in range(NUM_EIGS): drdims[i] = dlam[i] / np.sum(dlam[i:]**2)**0.5 sd = SurrGeoFieldAR([0, 30], 'sbc') sd.copy_field(gf) sd.prepare_surrogates(pool) srdims = np.zeros((NUM_SURR, NUM_EIGS)) # generate and compute eigenvalues for 20000 surrogates t1 = datetime.now() # construct the surrogates in parallel # we can duplicate the list here without worry as it will be copied into new python processes # thus creating separate copies of sd
# plt.subplot(1,2,2) # plt.imshow(S, interpolation = 'nearest') # plt.colorbar() # with open('data/test_gf.bin', 'r') as f: # d = cPickle.load(f) # initialize a parallel pool pool = Pool(POOL_SIZE) # compute the eigenvalues/eigenvectos of the covariance matrix of d = gf.data() if COSINE_REWEIGHTING: d = d * gf.qea_latitude_weights() Ud, dlam, _ = pca_components_gf(d) Ud = Ud[:, :NUM_EIGS] dlam = dlam[:NUM_EIGS] sd = SurrGeoFieldAR([0, 30], 'sbc') sd.copy_field(gf) sd.prepare_surrogates(pool) slam = np.zeros((NUM_SURR, NUM_EIGS)) maxU = np.zeros((NUM_SURR, NUM_EIGS)) # generate and compute eigenvalues for 20000 surrogates t1 = datetime.now() # construct the surrogates in parallel # we can duplicate the list here without worry as it will be copied into new python processes # thus creating separate copies of sd
print("Running preparation of surrogates ...") sgf.copy_field(gf) sgf.prepare_surrogates(pool) sgf.construct_surrogate_with_noise() sgf.d = sgf.sd # hack to replace original data with surrogate print("Max AR order is %d ..." % sgf.max_ord) gf = sgf print("Replaced field with surrogate field.") pool.close() del pool print("Analyzing data ...") d = gf.data() if COSINE_REWEIGHTING: d *= gf.qea_latitude_weights() Ud, sd, Vtd = pca_components_gf(d) Ud = Ud[:, :NUM_COMPONENTS] if not ROTATE_NORMALIZED: Ud *= sd[np.newaxis, :NUM_COMPONENTS] # estimate the components Ur = COMPONENT_ESTIMATOR(d) print("DONE.") # <codecell> print(np.sum(sd[:NUM_COMPONENTS]) / np.sum(sd)) print(1.0*NUM_COMPONENTS/len(sd)) # <codecell>