def estimate_components_meng(d):
    """
    Compute components using the method of Meng.
    """
    U, _, _ = pca_components_gf(d)
    C = extract_sparse_components(U, SPCA_SPARSITY, NUM_COMPONENTS, U)
    return C
def estimate_components_meng(d):
    """
    Compute components using the method of Meng.
    """
    U, _, _ = pca_components_gf(d)
    C = extract_sparse_components(U, SPCA_SPARSITY, NUM_COMPONENTS, U)
    return C
def estimate_components_orthomax(d):
    """
    Compute the PCA/FA components based on the input data d
    as returned by GeoField bootstrap constructor.
    """
    U, s, _ = pca_components_gf(d)
    U = U[:, :NUM_COMPONENTS]
    if not ROTATE_NORMALIZED:
        U *= s[np.newaxis, :NUM_COMPONENTS]
    Ur, _, _ = orthomax(U, gamma = GAMMA, norm_rows=True)
    Ur /= np.sum(Ur**2, axis = 0) ** 0.5
    return Ur
def estimate_components_orthomax(d):
    """
    Compute the PCA/FA components based on the input data d
    as returned by GeoField bootstrap constructor.
    """
    U, s, _ = pca_components_gf(d)
    U = U[:, :NUM_COMPONENTS]
    if not ROTATE_NORMALIZED:
        U *= s[np.newaxis, :NUM_COMPONENTS]
    Ur, _, _ = orthomax(U, gamma=GAMMA, norm_rows=True)
    Ur /= np.sum(Ur**2, axis=0)**0.5
    return Ur
def estimate_components_ica(d):
    """
    Compute the ICA based on the input data d.
    """
    U, s, Vt = pca_components_gf(d)
    U = U[:, :NUM_COMPONENTS]
    V = np.transpose(Vt)
    V = V[:, :NUM_COMPONENTS]
    f = FastICANode(whitened = True, max_it = 10000, g = 'tanh', fine_g = 'tanh', max_it_fine = 1000)
    f.execute(V)
    P = f.get_projmatrix()
    Ur = np.dot(U, P)
    Ur /= np.sum(Ur**2, axis = 0) ** 0.5
    return Ur
def estimate_components_ica(d):
    """
    Compute the ICA based on the input data d.
    """
    U, s, Vt = pca_components_gf(d, True)
    U = U[:, :NUM_COMPONENTS]
    V = np.transpose(Vt)
    V = V[:, :NUM_COMPONENTS]
    f = FastICANode(whitened = True, max_it = 10000, g = 'tanh', fine_g = 'tanh', max_it_fine = 1000)
    Vr = f.execute(V)
    P = f.get_projmatrix()
    Ur = np.dot(U, P)
    Ur /= np.sum(Ur**2, axis = 0) ** 0.5
    return Ur
def compute_surrogate_cov_eigvals(x):
    sd, U = x
#    sd.construct_surrogate_with_noise()
    sd.construct_white_noise_surrogates()
#    sd.construct_fourier1_surrogates()
    
    d = sd.surr_data()
    if COSINE_REWEIGHTING:
        d = d * sd.qea_latitude_weights()
    
    Ur, sr, _ = pca_components_gf(d)
    
#    perm, sf = match_components_munkres(U, Ur)
#    Ur = Ur[:, perm[:NUM_EIGS]]
#    Ur *= sf
    
#    return sr[perm[:NUM_EIGS]]
    return sr[:NUM_EIGS], np.amax(np.abs(Ur[:, :NUM_EIGS]), axis = 0)
def estimate_components_orthomax(d):
    """
    Compute the PCA/FA components based on the input data d
    as returned by GeoField bootstrap constructor.
    """
    U, s, _ = pca_components_gf(d)
    U = U[:, :NUM_COMPONENTS]
    if not ROTATE_NORMALIZED:
        U *= s[np.newaxis, :NUM_COMPONENTS]
    Ur, _, iters = orthomax(U, rtol = np.finfo(np.float32).eps ** 0.5,
                            gamma = GAMMA,
                            maxiter = 500,
                            norm_rows = ROTATE_NORM_ROWS)
    Ur /= np.sum(Ur**2, axis = 0) ** 0.5
    if iters >= 499:
        print('Warning: max iters reached.')
        return None
    else:
        return Ur
def estimate_components_orthomax(d):
    """
    Compute the PCA/FA components based on the input data d
    as returned by GeoField bootstrap constructor.
    """
    try:
        U, s, _ = pca_components_gf(d)
        U = U[:, :NUM_COMPONENTS]
        Ur, T, iters = orthomax(U,
                                rtol = np.finfo(np.float32).eps ** 0.5,
                                gamma = GAMMA,
                                maxiter = 500)
        if iters >= 499:
            return None
        else:
            return Ur, T
    except LinAlgError as e: 
        print("**LINALG ERROR** code: %d text : %s" % (e.errno, e.strerror))
    except:
        print("**UNEXPECTED ERROR** %s" % sys.exc_info()[0])
def estimate_components_orthomax(d):
    """
    Compute the PCA/FA components based on the input data d
    as returned by GeoField bootstrap constructor.
    """
    U, s, _ = pca_components_gf(d)
    U = U[:, :NUM_COMPONENTS]
    if not ROTATE_NORMALIZED:
        U *= s[np.newaxis, :NUM_COMPONENTS]
    Ur, _, iters = orthomax(U,
                            rtol=np.finfo(np.float32).eps**0.5,
                            gamma=GAMMA,
                            maxiter=500,
                            norm_rows=ROTATE_NORM_ROWS)
    Ur /= np.sum(Ur**2, axis=0)**0.5
    if iters >= 499:
        print('Warning: max iters reached.')
        return None
    else:
        return Ur
def compute_lno_sample_components(x):
    gf, Urd, i, j = x
    b = gf.data()
    b = np.vstack([b[:i, ...], b[j:, ...]])
    U, _, _ = pca_components_gf(b)
    Ur, _, _ = orthomax(U[:, :NUM_COMPONENTS])

    # compute closeness of components
    C = np.dot(Ur.T, Urd)

    # find optimal matching of components
    m = Munkres()
    match = m.compute(1.0 - np.abs(C))
    perm = np.zeros((NUM_COMPONENTS, ), dtype=np.int)
    for i in range(len(match)):
        m_i = match[i]
        perm[m_i[0]] = m_i[1]
        # flip the sign in the matched boostrap component if the correlation was negative
        Ur[m_i[1]] = -Ur[m_i[1]] if C[m_i[0], m_i[1]] < 0.0 else Ur[m_i[1]]

    # reorder the bootstrap components according to the best matching
    Ur = Ur[:, perm]

    return Ur
def compute_lno_sample_components(x):
    gf, Urd, i, j = x
    b = gf.data()
    b = np.vstack([b[:i,...], b[j:,...]])
    U, _, _ = pca_components_gf(b)
    Ur, _, _ = orthomax(U[:, :NUM_COMPONENTS])
    
    # compute closeness of components
    C = np.dot(Ur.T, Urd)
    
    # find optimal matching of components
    m = Munkres()
    match = m.compute(1.0 - np.abs(C))
    perm = np.zeros((NUM_COMPONENTS,), dtype = np.int)
    for i in range(len(match)):
        m_i = match[i]
        perm[m_i[0]] = m_i[1]
        # flip the sign in the matched boostrap component if the correlation was negative
        Ur[m_i[1]] = - Ur[m_i[1]] if C[m_i[0], m_i[1]] < 0.0 else Ur[m_i[1]]
        
    # reorder the bootstrap components according to the best matching 
    Ur = Ur[:, perm]
    
    return Ur
    sgf.construct_surrogate_with_noise()
    gf = sgf
    gf.d = gf.surr_data().copy()
    
#    # construct "components" from the structural matrix
    Uopt = np.zeros((len(Sr), np.amax(Sr)))   
    for i in range(Uopt.shape[1]):
        Uopt[:,i] = np.where(Sr == (i+1), 1.0, 0.0)
        # remove the first element (it's the driver which is not included in the optimal component)
        Uopt[np.nonzero(Uopt[:,i])[0][0],i] = 0.0
        Uopt[:,i] /= np.sum(Uopt[:,i]**2) ** 0.5

    print("Analyzing data ...")
    
    # compute the eigenvalues and eigenvectors of the (spatial) covariance matrix 
    Ud, sd, Vtd = pca_components_gf(gf.data())
    Ud = Ud[:, :NUM_COMPONENTS]
    if not ROTATE_NORMALIZED:
        Ud *= sd[np.newaxis, :NUM_COMPONENTS]
        
    # estimate the components
    Ur = COMPONENT_ESTIMATOR(gf.data())
    
    print("Running bootstrap analysis [%d samples]" % NUM_BOOTSTRAPS)

    # initialize maximal and minimal boostraps
    max_comp = np.abs(Ur.copy())
    min_comp = np.abs(Ur.copy())
    mean_comp = np.zeros_like(Ur)
    var_comp = np.zeros_like(Ur)
    
#gf.slice_spatial(None, [20, 87])                           # northern hemisphere, extratropical
gf.slice_spatial(None, [-88, 88])
#gf.slice_months([12, 1, 2])

#S = np.zeros(shape = (5, 10), dtype = np.int32)
#S[1:4, 0:2] = 1
#S[0:3, 6:9] = 2
#v, Sr = constructVAR(S, [0.0, 0.191, 0.120], [-0.1, 0.1], [0.00, 0.00], [0.01, 0.01])
#ts = v.simulate(768)
#gf = make_model_geofield(S, ts)

# initialize a parallel pool
pool = Pool(POOL_SIZE)

# compute components for data
Ud, sd, Vtd = pca_components_gf(gf.data())
Ud = Ud[:, :NUM_COMPONENTS]
Ur, _, its = orthomax(Ud)
print("Finished after %d iterations." % its)

t_start = datetime.now()

LNO_COUNT = len(gf.tm) // LNO_PAR
#LNO_COUNT = 4
print("Running leave one out analysis [%d samples] at %s" %
      (LNO_COUNT, str(t_start)))

# initialize maximal and minimal boostraps
EXTREMA_MEMORY = math.ceil(DISCARD_RATE * LNO_COUNT)
max_comp = np.tile(np.abs(Ur.copy()), (EXTREMA_MEMORY + BULK_STEP, 1, 1))
min_comp = np.tile(np.abs(Ur.copy()), (EXTREMA_MEMORY + BULK_STEP, 1, 1))
def estimate_components_tpca(d):
    """
    Compute spatial PCA components.
    """
    U, _, _ = pca_components_gf(d, False)
    return U[:, :NUM_COMPONENTS]
#sgf.copy_field(gf)
#sgf.prepare_surrogates(pool)
#mo = sgf.model_orders()
#render_component_single(mo, gf.lats, gf.lons, plt_name = 'Model orders of AR surrogates',
#                        fname='%s_ar_model_order%s.png' % (DATA_NAME, SUFFIX),
#                        cbticks = np.arange(0,np.amax(mo)+1,2))
#pool.close()
#del pool

log("Analyzing data ...")
d = gf.data()
if COSINE_REWEIGHTING:
    d *= gf.qea_latitude_weights()

# note: s2 is not S from USV, it is already squared and scaled to represent variance
Ud, s2, Vt = pca_components_gf(d)
s_orig = ((Vt.shape[1] - 1) * s2) ** 0.5
du = np.reshape(d, (768, d.shape[1]*d.shape[2])).transpose()
dm = du - np.mean(du, axis=1)[:, np.newaxis]
log("**DEBUG**: reconstruction check, diff from original SVD %g" 
    % np.sum( (np.dot(np.dot(Ud, np.diag(s_orig)), Vt) - dm)**2))


Ud = Ud[:, :NUM_COMPONENTS]
Vt = Vt[:NUM_COMPONENTS, :]
s2n = s2[:NUM_COMPONENTS]
s_orign = s_orig[:NUM_COMPONENTS]
log("Total variance %g explained by selected components %g." % (np.sum(s2n), np.sum(s2n) / np.sum(s2)))

# estimate the components and their variance
Ur, Rot = COMPONENT_ESTIMATOR(d)
    print("Running preparation of surrogates ...")
    sgf.copy_field(gf)
    sgf.prepare_surrogates(pool)
    sgf.construct_surrogate_with_noise()
    sgf.d = sgf.sd  # hack to replace original data with surrogate
    print("Max AR order is %d ..." % sgf.max_ord)
    gf = sgf
    print("Replaced field with surrogate field.")
    pool.close()
    del pool

print("Analyzing data ...")
d = gf.data()
if COSINE_REWEIGHTING:
    d *= gf.qea_latitude_weights()
Ud, sd, Vtd = pca_components_gf(d)
Ud = Ud[:, :NUM_COMPONENTS]
if not ROTATE_NORMALIZED:
    Ud *= sd[np.newaxis, :NUM_COMPONENTS]

# estimate the components
Ur = COMPONENT_ESTIMATOR(d)
print("DONE.")

# <codecell>

print(np.sum(sd[:NUM_COMPONENTS]) / np.sum(sd))
print(1.0 * NUM_COMPONENTS / len(sd))

# <codecell>
    print("Estimate PCA components script version 1.0")

    S = np.zeros(shape=(20, 50), dtype=np.int32)
    S[10:18, 25:45] = 1
    S[0:3, 6:12] = 2
    S[8:15, 2:12] = 3
    v, Sr = constructVAR(S, [0.0, 0.6, 0.9, 0.7], [0.3, 0.5], [0.0, 0.0])
    ts = v.simulate(200)
    gf = make_model_geofield(S, ts)

    # initialize a parallel pool
    pool = Pool(POOL_SIZE)

    # compute the eigenvalues/eigenvectos of the covariance matrix of
    Ud, dlam, _ = pca_components_gf(gf.data())
    drdims = np.zeros((NUM_EIGS, ))
    for i in range(NUM_EIGS):
        drdims[i] = dlam[i] / np.sum(dlam[i:]**2)**0.5

    sd = SurrGeoFieldAR([0, 30], 'sbc')
    sd.copy_field(gf)
    sd.prepare_surrogates(pool)
    srdims = np.zeros((NUM_SURR, NUM_EIGS))

    # generate and compute eigenvalues for 20000 surrogates
    t1 = datetime.now()

    # construct the surrogates in parallel
    # we can duplicate the list here without worry as it will be copied into new python processes
    # thus creating separate copies of sd
    print("Estimate PCA components script version 1.0")
    
    S = np.zeros(shape = (20, 50), dtype = np.int32)
    S[10:18, 25:45] = 1
    S[0:3, 6:12] = 2
    S[8:15, 2:12] = 3
    v, Sr = constructVAR(S, [0.0, 0.6, 0.9, 0.7], [0.3, 0.5], [0.0, 0.0])
    ts = v.simulate(200)
    gf = make_model_geofield(S, ts)
    
    # initialize a parallel pool
    pool = Pool(POOL_SIZE)
    
    # compute the eigenvalues/eigenvectos of the covariance matrix of
    Ud, dlam, _ = pca_components_gf(gf.data())
    drdims = np.zeros((NUM_EIGS,))
    for i in range(NUM_EIGS):
        drdims[i] = dlam[i] / np.sum(dlam[i:]**2)**0.5
    
    sd = SurrGeoFieldAR([0, 30], 'sbc')
    sd.copy_field(gf)
    sd.prepare_surrogates(pool)
    srdims = np.zeros((NUM_SURR, NUM_EIGS))
    
    # generate and compute eigenvalues for 20000 surrogates
    t1 = datetime.now()
    
    # construct the surrogates in parallel
    # we can duplicate the list here without worry as it will be copied into new python processes
    # thus creating separate copies of sd
#    plt.subplot(1,2,2)
#    plt.imshow(S, interpolation = 'nearest')
#    plt.colorbar()

#    with open('data/test_gf.bin', 'r') as f:
#        d = cPickle.load(f)
    
    # initialize a parallel pool
    pool = Pool(POOL_SIZE)
    
    # compute the eigenvalues/eigenvectos of the covariance matrix of
    d = gf.data()
    if COSINE_REWEIGHTING:
        d = d * gf.qea_latitude_weights()
        
    Ud, dlam, _ = pca_components_gf(d)
    Ud = Ud[:, :NUM_EIGS]
    dlam = dlam[:NUM_EIGS]
    
    sd = SurrGeoFieldAR([0, 30], 'sbc')
    sd.copy_field(gf)
    sd.prepare_surrogates(pool)
    slam = np.zeros((NUM_SURR, NUM_EIGS))
    maxU = np.zeros((NUM_SURR, NUM_EIGS))
    
    # generate and compute eigenvalues for 20000 surrogates
    t1 = datetime.now()
    
    # construct the surrogates in parallel
    # we can duplicate the list here without worry as it will be copied into new python processes
    # thus creating separate copies of sd
def estimate_components_tpca(d):
    """
    Compute spatial PCA components.
    """
    U, _, _ = pca_components_gf(d, False)
    return U[:, :NUM_COMPONENTS]
    print("Running preparation of surrogates ...")
    sgf.copy_field(gf)
    sgf.prepare_surrogates(pool)
    sgf.construct_surrogate_with_noise()
    sgf.d = sgf.sd # hack to replace original data with surrogate
    print("Max AR order is %d ..." % sgf.max_ord)
    gf = sgf
    print("Replaced field with surrogate field.")
    pool.close()
    del pool
    
print("Analyzing data ...")
d = gf.data()
if COSINE_REWEIGHTING:
    d *= gf.qea_latitude_weights()
Ud, sd, Vtd = pca_components_gf(d)
Ud = Ud[:, :NUM_COMPONENTS]
if not ROTATE_NORMALIZED:
    Ud *= sd[np.newaxis, :NUM_COMPONENTS]
    
# estimate the components
Ur = COMPONENT_ESTIMATOR(d)
print("DONE.")

# <codecell>

print(np.sum(sd[:NUM_COMPONENTS]) / np.sum(sd))
print(1.0*NUM_COMPONENTS/len(sd))

# <codecell>