d *= gf.qea_latitude_weights()
dlam = pca_eigvals_gf(d)[:NUM_EIGVALS]
    
print("[%s] Data analysis DONE." % (str(datetime.now())))

# <markdowncell>

# **Show the variance of the data (filtered)**

# <markdowncell>

# **Show a plot of the model orders**

# <codecell>

mo = sgf.model_orders()
plt = render_component_single(mo, gf.lats, gf.lons, plt_name = 'Model orders of AR surrogates')

# <codecell>

pool = Pool(POOL_SIZE)
log = open('geodata_estimate_component_count-%s.log' % datetime.now().strftime('%Y%m%d-%H%M'), 'w')

# storage for three types of surrogates
slam_ar = np.zeros((NUM_SURR, NUM_EIGVALS))
slam_w1 = np.zeros((NUM_SURR, NUM_EIGVALS))
slam_f = np.zeros((NUM_SURR, NUM_EIGVALS))

surr_completed = 0

# construct the job queue
    # we can duplicate the list here without worry as it will be copied into new python processes
    # thus creating separate copies of sd
    print("Running parallel generation of surrogates and SVD")
    slam_list = pool.map(compute_surrogate_cov_eigvals, [(sd, Ud)] * NUM_SURR)
    
    # rearrange into numpy array (can I use vstack for this?)
    for i in range(len(slam_list)):
        slam[i, :], maxU[i, :] = slam_list[i]
        
    maxU.sort(axis = 0)
        
    print("Saving computed spectra ...")
                
    # save the results to file
    with open('data/slp_eigvals_surrogates.bin', 'w') as f:
        cPickle.dump([dlam, slam, sd.model_orders(), sd.lons, sd.lats], f)
    
    plt.figure()
    plt.plot(np.arange(NUM_EIGS) + 1, dlam, 'ro-')
    plt.errorbar(np.arange(NUM_EIGS) + 1, np.mean(slam, axis = 0), np.std(slam, axis = 0) * 3, fmt = 'g-')
    
    plt.figure()
    plt.errorbar(np.arange(NUM_EIGS) + 1, np.mean(maxU, axis = 0), np.std(maxU, axis = 0) * 3, fmt = 'g-')
    plt.plot(np.arange(NUM_EIGS) + 1, np.amax(maxU, axis = 0), 'r-')
    plt.plot(np.arange(NUM_EIGS) + 1, np.amin(maxU, axis = 0), 'r-')
    plt.plot(np.arange(NUM_EIGS) + 1, maxU[94, :], 'bo-', linewidth = 2)
    plt.plot(np.arange(NUM_EIGS) + 1, np.amax(np.abs(Ud), axis = 0), 'kx-', linewidth = 2)

    plt.show()
    print("DONE.")
    d *= gf.qea_latitude_weights()
dlam = pca_eigvals_gf(d)[:NUM_EIGVALS]

print("[%s] Data analysis DONE." % (str(datetime.now())))

# <markdowncell>

# **Show the variance of the data (filtered)**

# <markdowncell>

# **Show a plot of the model orders**

# <codecell>

mo = sgf.model_orders()
plt = render_component_single(mo,
                              gf.lats,
                              gf.lons,
                              plt_name='Model orders of AR surrogates')

# <codecell>

pool = Pool(POOL_SIZE)
log = open(
    'geodata_estimate_component_count-%s.log' %
    datetime.now().strftime('%Y%m%d-%H%M'), 'w')

# storage for three types of surrogates
slam_ar = np.zeros((NUM_SURR, NUM_EIGVALS))
slam_w1 = np.zeros((NUM_SURR, NUM_EIGVALS))
        if USE_SURROGATE_MODEL:
            # HACK to replace original data with surrogates
            gf.d = sgf.sd.copy()
            sgf.d = sgf.sd.copy()
            log("** WARNING ** Replaced synth model with surrogate model to check false positives.")

        # analyze data & obtain eigvals and surrogates
        log("Computing eigenvalues of dataset ...")
        d = gf.data()
        if COSINE_REWEIGHTING:
            d *= gf.qea_latitude_weights()
        dlam = pca_eigvals_gf(d)[:NUM_EIGVALS]
            
        log("Rendering orders of fitted AR models.")
        mo = sgf.model_orders()
        render_component_single(mo, gf.lats, gf.lons, plt_name = 'Model orders of AR surrogates',
                                fname='%s_ar_model_order%s.png' % (DATA_NAME, SUFFIX))

        # construct the job queue
        jobq = Queue()
        resq = Queue()
        for i in range(NUM_SURR):
            jobq.put(1)
        for i in range(WORKER_COUNT):
            jobq.put(None)

        log("Starting workers")
        workers = [Process(target = compute_surrogate_cov_eigvals,
                           args = (sgf,jobq,resq)) for i in range(WORKER_COUNT)]