예제 #1
0
def aggregate_parameter_sets(pixels_dom, all_cbr_files, parnames, ens_size,
                             n_chains_agg, conv_chains_pkl):
    # aggregate parameter sets between representative pixels for a given pft, only if representative pixels are also dominant

    # get cbrs
    par_set_agg = []
    for pixel in pixels_dom:
        par_set = []

        if pixel in conv_chains_pkl['pixel'].values:
            print(pixel)

            # get pixel's convergent chain numbers
            best_chains = conv_chains_pkl.loc[
                conv_chains_pkl['pixel'] == pixel]['bestchains'].values[0][1:]
            print(best_chains)

            # aggregate bestchains from optimal posteriors
            par_set_orig = []
            for chain in best_chains:

                file = [
                    i for i in all_cbr_files
                    if pixel + '_' + chain + '.cbr' in i
                ][0]
                par_set.append(
                    autil.modulus_Bday_Fday(
                        rwb.read_cbr_file(file, {'nopars': len(parnames)}),
                        parnames))

        else:
            par_set = np.ones(
                (ens_size * n_chains_agg, len(parnames))) * np.nan

        par_set_agg.append(np.vstack(par_set))

    par_set_agg = np.vstack(par_set_agg)
    print(par_set_agg.shape)

    random_rows = np.random.choice(par_set_agg.shape[0],
                                   ens_size * n_chains_agg,
                                   replace=False)
    best_cbrs_sampled = par_set_agg[random_rows, :]
    print(best_cbrs_sampled.shape)
    print(np.nanmedian(best_cbrs_sampled, axis=0))
    return best_cbrs_sampled
예제 #2
0
def main():

    # set run information to read
    model_id = sys.argv[1]
    mcmc_id = sys.argv[2]  # 119 for normal, 3 for DEMCMC
    n_iter = sys.argv[3]
    nbe_optimization = sys.argv[4]  # 'OFF' or 'ON'
    ens_size = 250
    assim_type = sys.argv[5]
    n_chains_agg = 4

    # set directories
    cur_dir = os.getcwd() + '/'
    misc_dir = cur_dir + '../../misc/'
    cbf_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/'
    cbr_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/'
    plot_dir = cur_dir + '../../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames('../../misc/', model_id)

    # decide which tasks to perform
    find_rep_pixels = True
    agg_parameters = True
    submit_ic_opt = True
    submit_forward = False

    ############################## Identify and save representative pixels #################################################

    n_reps = 5
    if find_rep_pixels:
        # load globcover data
        gl = read_pickle(misc_dir + 'globcover_to_card.pkl')

        # load labels
        gl_lbls = list(
            read_csv(misc_dir + 'Globcover2009_Legend.csv')['Value'].values)
        n_classes = len(gl_lbls)
        print(gl_lbls)

        # load list of land pixels
        pixels = list(
            set([file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')]))
        pixels.sort()

        # open csv for save out
        f = open(misc_dir + 'globcover_fracs.csv', 'w')
        writer = csv.writer(f)
        writer.writerow(
            [item for sublist in [['pixel'], gl_lbls] for item in sublist])

        # get list of average pft fractions by pixel
        av_fracs = np.ones((len(pixels), n_classes)) * np.nan
        types_present = []
        for pixel in pixels:
            ind = pixels.index(pixel)
            if np.mod(ind, 100) == 0: print(ind)

            # get lc information
            types_at_geos_pixel, counts_at_geos_pixel = gl.loc[
                gl['pixel'] == pixel]['types'].values[0][0], gl.loc[
                    gl['pixel'] == pixel]['counts'].values[0][0]

            types_at_geos_pixel, counts_at_geos_pixel = remove_nodata_pixels(
                types_at_geos_pixel, counts_at_geos_pixel)
            types_at_geos_pixel, counts_at_geos_pixel = append_all_types(
                types_at_geos_pixel, counts_at_geos_pixel, gl_lbls)
            types_at_geos_pixel, counts_at_geos_pixel = merge_types(
                types_at_geos_pixel, counts_at_geos_pixel, 170, 160)
            types_at_geos_pixel, counts_at_geos_pixel = merge_types(
                types_at_geos_pixel, counts_at_geos_pixel, 180, 160)
            types_present.append(types_at_geos_pixel[counts_at_geos_pixel > 0])

            if np.sum(counts_at_geos_pixel) > 0:
                av_fracs[ind, :] = counts_at_geos_pixel / np.sum(
                    counts_at_geos_pixel
                )  # average biome fraction across mstmip pixels within coarse pixel

                writer.writerow([
                    item for sublist in [[pixel], av_fracs[ind, :]]
                    for item in sublist
                ])

                #plot_pie(av_fracs[ind], pixel, gl_lbls, autil.rowcol_to_latlon([pixel]), plot_dir+'pie/', 'gl')

        reps, mxs, mxdoms = find_rep(av_fracs, pixels, n_reps)
        plot_reps(mxs, mxdoms, gl_lbls, plot_dir + 'pie/',
                  'rep_pix_gl_merge170+180to160')

        rep_df = fill_df(gl_lbls, reps, mxs, mxdoms)
        #rep_df.to_pickle(misc_dir+ 'rep_pixels_globcover.pkl')
        print(rep_df)

        f.close()

    ############################## Generate aggregated parameter sets ######################################################

    ic_inds = autil.get_inds_ic(model_id)
    conv_chains = read_pickle(cbr_dir + model_id + assim_type + '_ALL' +
                              '_MCMC' + mcmc_id + '_' + n_iter +
                              '_best_subset.pkl')
    conv_chains.columns = ['pixel', 'bestchains',
                           'conv']  #rename columns for easier access

    if agg_parameters:

        #f_pft = open(misc_dir + 'pft/par_preds/par_set_agg_'+ model_id + assim_type+'_MCMC'+mcmc_id + '_'+n_iter + '.csv', 'w')
        #w_pft = csv.writer(f_pft)

        # load list of cbrs
        files = glob.glob(cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter +
                          '_*.cbr')
        files.sort()

        # get aggregated parameter sets from representative pixels
        par_set_agg = []
        for pft in gl_lbls:
            print(pft)
            print('PFT: ' + str(pft))
            # isolate row in dataframe corresponding to given pft
            rep_df_pft = rep_df.loc[rep_df['pft'] == int(pft)]

            # get list of pixels that are dominant
            rep_pixels_pft = [
                rep_df_pft['reppix' + str(i)].values[0]
                for i in range(1, n_reps + 1)
            ]
            doms = [
                rep_df_pft['reppix' + str(i) + 'fracdom'].values[0]
                for i in range(1, n_reps + 1)
            ]
            pixels_dom = [
                pixel for pixel in rep_pixels_pft
                if doms[rep_pixels_pft.index(pixel)] == 1
            ]

            if len(pixels_dom) > 0:
                par_set_agg.append(
                    aggregate_parameter_sets(pixels_dom, files, parnames,
                                             ens_size, n_chains_agg,
                                             conv_chains))
            else:
                par_set_agg.append(
                    np.ones((ens_size * n_chains_agg, len(parnames))) * np.nan)

            #w_pft.writerow(np.nanmedian(par_set_agg[gl_lbls.index(pft)], axis=0))

            #if np.sum(~np.isnan(par_set_agg[gl_lbls.index(pft)]))>0: autil.plot_par_histograms(par_set_agg[gl_lbls.index(pft)], parnames, savepath=plot_dir+'dists/', title='globcover_agg_PFT'+str(pft)+'_'+model_id+assim_type+'_'+mcmc_id+'_'+n_iter+'.pdf')

        #f_pft.close()

    ############################################################################################################################################
    ################################### copy cbfs and substitute pars for IC optimization ######################################################

    # set up cbfs for IC assimilation
    os.chdir(cbf_dir)
    cbf_files = glob.glob('*.cbf')
    cbf_files.sort()
    os.chdir(cur_dir + '/../')

    # set additional directories
    mdf_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_MDF/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_MDF/'
    runmodel_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_GENERAL/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_GENERAL/'
    cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/'
    cbf_pft_ic_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '_pft_ic/' + model_id + '/'
    cbr_pft_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_pft/' + model_id + '/'
    output_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/'
    output_pft_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '_pft/' + model_id + '/'

    if mcmc_id == '119':
        frac_save_out = str(int(int(n_iter) / 500))
    elif mcmc_id == '3':
        frac_save_out = str(int(
            int(n_iter) / 500 *
            100))  # n_iterations/ frac_save_out * 100 will be ensemble size

    par_set_csv = read_csv(misc_dir + 'pft/par_preds/par_set_agg_' + model_id +
                           assim_type + '_MCMC' + mcmc_id + '_' + n_iter +
                           '.csv',
                           header=None).values

    if submit_ic_opt:

        txt_filename = 'pft_ic_assim_list_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.txt'
        txt_file = open(txt_filename, 'w')

        for cbf_file in cbf_files:
            print(cbf_file)

            cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file)
            cbf_pixel = cbf_file[-8:-4]

            if cbf_pixel in conv_chains['pixel'].values:

                for pft in gl_lbls:

                    if (int(pft) in types_present[pixels.index(cbf_pixel)]) & (
                            ~np.isnan(
                                par_set_csv[gl_lbls.index(pft), :]).all()):

                        par_set_agg_cbf = np.copy(
                            par_set_csv[gl_lbls.index(pft), :])
                        # re-transform bday, fday to proper range
                        par_set_agg_cbf[11] += 365.25
                        par_set_agg_cbf[14] += 365.25

                        parpriors = np.concatenate(
                            (par_set_agg_cbf,
                             np.ones(50 - len(parnames)) * -9999.))
                        parpriorunc = np.concatenate(
                            (np.ones(len(parnames)) * 1.001,
                             np.ones(50 - len(parnames)) * -9999.))

                        for ic_ind in ic_inds:
                            parpriors[ic_ind] = -9999.
                            parpriorunc[ic_ind] = -9999.

                        if nbe_optimization == 'ON':
                            parpriors[len(parnames) - 1] = -9999
                            parpriorunc[len(parnames) - 1] = -9999

                        cbf_data['PARPRIORS'] = parpriors.reshape(-1, 1)
                        cbf_data['PARPRIORUNC'] = parpriorunc.reshape(-1, 1)

                        f = cbf_file[:
                                     -9] + '_MCMC' + mcmc_id + '_' + n_iter + '_PFT' + str(
                                         pft) + '_assim_' + cbf_pixel
                        #rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT(cbf_data, cbf_pft_ic_dir + f +'.cbf')

                        txt_file.write(
                            '%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000'
                            % (mdf_dir, cbf_pft_ic_dir[3:], f + '.cbf',
                               cbr_pft_dir, f + '.cbr', n_iter, frac_save_out,
                               mcmc_id))
                        txt_file.write('\n') if types_present[pixels.index(
                            cbf_pixel)][-1] == int(pft) else txt_file.write(
                                ' && ')

        txt_file.close()

        sh_file = open(txt_filename[:-3] + 'sh', 'w')
        autil.fill_in_sh(sh_file,
                         array_size=len(conv_chains['pixel'].values),
                         n_hours=48,
                         txt_file=txt_filename,
                         combined=True)

    if submit_forward:

        txt_filename = 'pft_ic_forward_list_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.txt'
        txt_file = open(txt_filename, 'w')

        for cbf_file in cbf_files:
            print(cbf_file)

            cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file)
            cbf_pixel = cbf_file[-8:-4]

            if cbf_pixel in conv_chains['pixel'].values:

                for pft in gl_lbls:

                    if (int(pft) in types_present[pixels.index(cbf_pixel)]) & (
                            ~np.isnan(
                                par_set_csv[gl_lbls.index(pft), :]).all()):

                        f = cbf_file[:
                                     -9] + '_MCMC' + mcmc_id + '_' + n_iter + '_PFT' + str(
                                         pft) + '_assim_' + cbf_pixel

                        if len(glob.glob(cbr_pft_dir + f + '.cbr')) > 0:
                            cbr_assim = rwb.read_cbr_file(
                                glob.glob(cbr_pft_dir + f + '.cbr')[0],
                                {'nopars': len(parnames)})

                            ff = cbf_file[:
                                          -9] + '_MCMC' + mcmc_id + '_' + n_iter + '_PFT' + str(
                                              pft) + '_forward_' + cbf_pixel
                            cbr_forward = par_set_csv[gl_lbls.index(pft), :]
                            for ic_ind in ic_inds:
                                cbr_forward[ic_ind] = np.nanmedian(
                                    cbr_assim[:, ic_ind])
                            cbr_forward = cbr_forward.reshape(1, len(parnames))

                            rwb.write_cbr_file(cbr_forward,
                                               cbr_pft_dir + ff + '.cbr')

                            txt_file.write(
                                '%sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s'
                                % (runmodel_dir, cbf_dir[3:], cbf_file,
                                   cbr_pft_dir, ff + '.cbr', output_pft_dir,
                                   'fluxfile_' + ff + '.bin', output_pft_dir,
                                   'poolfile_' + ff + '.bin', output_pft_dir,
                                   'edcdfile_' + ff + '.bin', output_pft_dir,
                                   'probfile_' + ff + '.bin'))
                            txt_file.write('\n') if types_present[pixels.index(
                                cbf_pixel)][-1] == int(
                                    pft) else txt_file.write(' && ')

        txt_file.close()

        sh_file = open(txt_filename[:-3] + 'sh', 'w')
        autil.fill_in_sh(sh_file,
                         array_size=len(conv_chains['pixel'].values),
                         n_hours=1,
                         txt_file=txt_filename,
                         combined=True)

    return
예제 #3
0
def main():
    combinations = [['811', '119', '40000000'], ['811', '3', '1000000'],
                    ['911', '119', '40000000']]
    assim_type = '_longadapted'
    metric = sys.argv[1]

    vrs = [
        'NBE', 'cumNBE', 'LAI', 'GPP', 'Reco', 'Rauto', 'Rhet', 'lit', 'root',
        'som', 'wood'
    ]
    pixels = [
        '3809', '3524', '2224', '4170', '1945', '3813', '4054', '3264', '1271',
        '3457'
    ]

    ens_spread = np.ones(
        (len(pixels), len(vrs), len(combinations))) * float('nan')
    conv = np.ones((len(pixels), len(combinations))) * float('nan')

    cur_dir = os.getcwd() + '/'

    for pixel in pixels:

        comb_count = 0
        for comb in combinations:

            model_id = comb[0]
            mcmc_id = comb[1]
            it = comb[2]

            cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/'
            cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/'
            output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/'
            plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
            parnames = autil.get_parnames(cur_dir + '../../misc/', model_id)

            os.chdir(cur_dir + cbr_dir)
            files = glob.glob('*MCMC' + mcmc_id + '_' + it + '_' + pixel +
                              '*.cbr')
            pixel_chains = autil.find_all_chains(files, pixel)
            pixel_chains.sort()  # filenames
            if model_id == '911': pixel_chains = pixel_chains[-4:]
            print(pixel_chains)

            cbf_pixel = rwb.read_cbf_file(
                cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0] +
                '_' + pixel + '.cbf')

            cbr_chain_list = []
            for pixel_chain in pixel_chains:
                print(pixel_chain)
                cbr_chain = rwb.read_cbr_file(pixel_chain,
                                              {'nopars': len(parnames)})
                cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(
                    pixel_chain) == 0 else np.concatenate(
                        (cbr_pixel, cbr_chain), axis=0)

                flux_chain = rwb.readbinarymat(
                    cur_dir + output_dir + 'fluxfile_' + pixel_chain[:-3] +
                    'bin', [
                        cbf_pixel['nodays'],
                        autil.get_nofluxes_nopools_lma(model_id)[0]
                    ])
                pool_chain = rwb.readbinarymat(
                    cur_dir + output_dir + 'poolfile_' + pixel_chain[:-3] +
                    'bin', [
                        cbf_pixel['nodays'] + 1,
                        autil.get_nofluxes_nopools_lma(model_id)[1]
                    ])

                flux_pixel = np.copy(flux_chain) if pixel_chains.index(
                    pixel_chain) == 0 else np.concatenate(
                        (flux_pixel, flux_chain), axis=0)
                pool_pixel = np.copy(pool_chain) if pixel_chains.index(
                    pixel_chain) == 0 else np.concatenate(
                        (pool_pixel, pool_chain), axis=0)

                cbr_chain_list.append(cbr_chain)
                print(np.shape(cbr_chain))
                print(np.shape(cbr_pixel))

            gr = autil.gelman_rubin(cbr_chain_list)
            print('%i of %i parameters converged' %
                  (sum(gr < 1.2), len(parnames)))
            conv[pixels.index(pixel),
                 comb_count] = sum(gr < 1.2) / len(parnames) * 100

            for var in vrs:
                print(var)

                try:
                    obs = cbf_pixel['OBS'][var]
                    obs[obs == -9999] = float('nan')
                except:
                    obs = np.ones(cbf_pixel['nodays']) * np.nan
                n_obs = np.sum(np.isfinite(obs))

                fwd_data = autil.get_output(
                    var, model_id, flux_pixel, pool_pixel, cbr_pixel,
                    autil.get_nofluxes_nopools_lma(model_id)[2])

                if len(fwd_data) > 0:
                    if fwd_data.shape[1] > cbf_pixel['nodays']:
                        fwd_data = fwd_data[:, :-1]

                    fwd_data = autil.remove_outliers(fwd_data)
                    med = np.nanmedian(fwd_data, axis=0)
                    ub = np.nanpercentile(fwd_data, 75, axis=0)
                    lb = np.nanpercentile(fwd_data, 25, axis=0)

                    ens_spread[pixels.index(pixel),
                               vrs.index(var), comb_count] = np.nanmean(
                                   abs(ub -
                                       lb)) if metric == 'spread' else np.sqrt(
                                           np.nansum((med - obs)**2) / n_obs)

            comb_count += 1

    for var in vrs:
        autil.plot_spread_v_iter(
            ens_spread,
            pixels,
            vrs.index(var),
            var,
            it,
            metric,
            cur_dir + plot_dir + 'spread_v_iter',
            'iter_test_compare_' + assim_type + '_' + model_id + '_' + var +
            '_' + metric,
            single_val=True
        )  #'iter_test_MCMC'+mcmc_id+'_'+model_id+'_'+var + '_' + metric)

    autil.plot_conv_v_iter(conv,
                           pixels,
                           it,
                           cur_dir + plot_dir + 'spread_v_iter',
                           'iter_test_compare' + assim_type + '_' + model_id +
                           '_conv',
                           single_val=True)

    return
def main():
    
    # get specifications for run to read
    model_ids = ['811','811','911','911']
    assim_type = '_p25adapted'
    ens_size = 500
    
    # get pixels, ids and number of iterations to read
    cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_ids[0] + '/'
    pixels = ['3809','3524','2224','4170','1945','3813','4054','3264','1271','3457']
    mcmc_ids = ['119','3','3','119']
    n_iters = ['40000000','1000000','1000000','40000000']
    
    
    nbe_mae, lai_mae, abgb_mae, gpp_mae = [], [], [], []
    
    # run through pixels
    for pixel in pixels:
    
        # get that pixel's outputs for each MCMCID
        nbe_pred, lai_pred, abgb_pred, gpp_pred = [], [], [], []
        for model_id, mcmc_id, n_iter in zip(model_ids, mcmc_ids, n_iters):
            
            # set directories
            cur_dir = os.getcwd() + '/'
            cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_id + '/'
            cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'/' + model_id + '/'
            output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'/' + model_id + '/'
            plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
            parnames = autil.get_parnames('../../misc/', model_id)
            
            # read cbf file for that pixel
            cbf_pixel = rwb.read_cbf_file(glob.glob(cbf_dir + '*_' + pixel+'.cbf')[0])
            
            # read obs and obs unc for that pixel
            nbe_obs, lai_obs, abgb_obs, sif_obs = cbf_pixel['OBS']['NBE'], cbf_pixel['OBS']['LAI'], cbf_pixel['OBS']['ABGB'], cbf_pixel['OBS']['GPP']
            nbe_an_unc, nbe_seas_unc, lai_unc, abgb_unc = cbf_pixel['OBSUNC']['NBE']['annual_unc'], cbf_pixel['OBSUNC']['NBE']['seasonal_unc'], cbf_pixel['OTHER_OBS']['MLAI']['unc'], cbf_pixel['OBSUNC']['ABGB']['unc']
            
                
            conv_chains_pkl = read_pickle(glob.glob(cbr_dir + model_id + assim_type + '*_MCMC'+mcmc_id + '_'+n_iter+'_best_subset.pkl')[0])
            conv_chains_pkl.columns = ['pixel','bestchains','conv']# if model_id!='911' else ['pixel','bestchains'] #rename columns for easier access
            
            # grab cbrs corresponding to that pixel, MCMCID and number of iterations
            files = glob.glob(cbr_dir + '*MCMC'+mcmc_id+'_' + n_iter + '_'+ pixel+'*.cbr')
            files.sort()
            best_chains = conv_chains_pkl.loc[conv_chains_pkl['pixel']==pixel]['bestchains'].values[0][1:]
            
            # run through cbrs
            cbr_chain_list = []
            for chain in best_chains:
                print(chain)
                
                # read cbr for one file and transform Bday, Fday
                file = [i for i in files if pixel+'_'+chain+'.cbr' in i][0]
                cbr_chain = autil.modulus_Bday_Fday(rwb.read_cbr_file(file, {'nopars': len(parnames)}), parnames)
                print(cbr_chain.shape)
            
                
                # read forward run for that cbr
                
                flux_chain = rwb.readbinarymat(output_dir + 'fluxfile_' + file.partition(cbr_dir)[-1][:-3]+'bin', [cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model_id)[0]])
                pool_chain = rwb.readbinarymat(output_dir + 'poolfile_' + file.partition(cbr_dir)[-1][:-3]+'bin', [cbf_pixel['nodays']+1, autil.get_nofluxes_nopools_lma(model_id)[1]])
                    
                # add chain to list for GR calculation
                if np.shape(cbr_chain)[0]==ens_size: 
                    
                    cbr_chain_list.append(cbr_chain)
                    
                    # add forward run chain to aggregated matrix
                    flux_pixel = np.copy(flux_chain) if best_chains.index(chain)==0 else np.concatenate((flux_pixel, flux_chain), axis=0)
                    pool_pixel = np.copy(pool_chain) if best_chains.index(chain)==0 else np.concatenate((pool_pixel, pool_chain), axis=0)
                
            # compute gelman rubin
            if len(cbr_chain_list)>1:
                gr = autil.gelman_rubin(cbr_chain_list)
                print('%i of %i parameters converged' % (sum(gr<1.2), len(parnames)))
            else:
                gr = np.nan
                
            cbr_pixel = np.vstack(cbr_chain_list)
            
            
            print(pool_pixel.shape)
            print(cbr_pixel.shape)
            # nbe, lai, and abgb predictions at pixel
            # list with elements corresponding to MCMCIDs considered (e.g. first element is MCMCID 119)
            nbe_pred.append(autil.get_output('NBE', model_id, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma(model_id)[2]))
            lai_pred.append(autil.get_output('LAI', model_id, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma(model_id)[2])[:,:-1])
            abgb_pred.append(autil.get_output('ABGB', model_id, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma(model_id)[2])[:,:-1])
            gpp_pred.append(autil.get_output('GPP', model_id, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma(model_id)[2]))
            
        # plot time series
        lbls = [model_id+'_MCMC'+mcmc_id for model_id, mcmc_id in zip(model_ids, mcmc_ids)]
        plot_output_ts(cbf_pixel, nbe_pred, nbe_obs, nbe_an_unc, lbls=lbls, var='NBE', savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models'+'_NBE_'+pixel+'.png')
        plot_output_ts(cbf_pixel, lai_pred, lai_obs, lai_unc, lbls=lbls, var='LAI', savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models'+'_LAI_'+pixel+'.png')
        plot_output_ts(cbf_pixel, gpp_pred, sif_obs, 0, lbls=lbls, var='GPP', savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models'+'_GPP_'+pixel+'.png')
        
        # plot box plots
        plot_dist_compare(nbe_pred, nbe_obs, [nbe_an_unc, nbe_seas_unc], lbls=lbls, var='NBE', savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models'+'_NBE_'+pixel+'_dist_')
        plot_dist_compare(lai_pred, lai_obs, lai_unc, lbls=lbls, var='LAI', savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models'+'_LAI_'+pixel+'_dist_')
        plot_dist_compare(abgb_pred, abgb_obs, abgb_unc, lbls=lbls, var='ABGB', savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models'+'_ABGB_'+pixel+'_dist_')

        # plot obs vs median comparison
        nbe_mae.append([mae_real_numbers_only(f, nbe_obs)[0] for f in nbe_pred])
        lai_mae.append([mae_real_numbers_only(f, lai_obs)[0] for f in lai_pred])
        abgb_mae.append([mae_real_numbers_only(f, abgb_obs)[0] for f in abgb_pred])
        
        print(rank_mae(nbe_mae, lbls))
        print(rank_mae(lai_mae, lbls))
        print(rank_mae(abgb_mae, lbls))
    
    plot_maes(nbe_mae, pixels, savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models_NBE_mae')
    plot_maes(lai_mae, pixels, savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models_LAI_mae')
    plot_maes(abgb_mae, pixels, savepath=cur_dir+plot_dir+'demcmc_mcmc/', title='all_models_ABGB_mae')
    
    return
예제 #5
0
def main():

    # set run information to read
    model_id = sys.argv[1]
    mcmc_id = sys.argv[2]  # 119 for normal, 3 for DEMCMC
    n_iter = sys.argv[3]
    ens_size = 500
    assim_type = '_longadapted'

    # set directories
    cur_dir = os.getcwd() + '/'
    misc_dir = cur_dir + '/../../misc/'
    cbf_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/'
    cbr_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/'
    plot_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames('../../misc/', model_id)

    # load map containing the location of each mstmip pixel on the GEOSCHEM grid
    pixel_nums = np.load(misc_dir + 'mstmip_pixel_nums.npy')

    # load map of biome fractions from mstmip
    with np.load(misc_dir + 'mstmip_biome_frac.npz') as data:
        biome_frac = data['arr_0']
    n_classes = biome_frac.shape[0]

    # load list of land pixels
    pixels = list(set([file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')]))

    # load list of cbrs
    files = glob.glob(cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter + '_*.cbr')

    # fill X and Y
    n_regr_models = len(parnames)
    X = np.ones(
        (len(pixels), n_classes)) * np.nan  # shape n_samples, n_features
    y = np.ones(
        (n_regr_models, len(pixels))) * np.nan  # shape n_pars, n_samples
    for pixel in pixels:
        ind = pixels.index(pixel)
        if np.mod(ind, 10) == 0: print(ind)

        # get lc information
        locs = [pixel_nums == float(pixel)][0]
        fracs_at_geos_pixel = no_water_pixels(biome_frac[:, locs])
        av_fracs = np.nanmean(
            fracs_at_geos_pixel, axis=1
        )  # average biome fraction across mstmip pixels within coarse pixel
        X[ind, :] = av_fracs

        # get parameter information
        pixel_chains = autil.find_all_chains(files, pixel)
        pixel_chains.sort()  # filenames

        # concatenate across chains
        if len(pixel_chains) > 0:
            for pixel_chain in pixel_chains:
                cbr_chain = rwb.read_cbr_file(pixel_chain,
                                              {'nopars': len(parnames)})
                cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(
                    pixel_chain) == 0 else np.concatenate(
                        (cbr_pixel, cbr_chain), axis=0)

            y[:, ind] = np.nanmedian(cbr_pixel, axis=0)

    # remove nan values so regression runs
    Xr, yr = drop_nan(X, y)

    # set up regression models
    y_test_all_pars, y_pred_all_pars = [], []
    for regr_model in range(n_regr_models):
        print('running regression for ' + parnames[regr_model] + ' . . . ')
        # split train and test sets, 60-40
        X_train, X_test, y_train, y_test = train_test_split(Xr,
                                                            yr[regr_model, :],
                                                            test_size=0.4)
        y_test_all_pars.append(y_test)

        # fit regression model on train
        regr = LinearRegression().fit(X_train, y_train)

        # make predictions on test set
        y_pred_all_pars.append(regr.predict(X_test))

    # make summary scatter plot
    plot_scatter_test_pred(
        y_test_all_pars, y_pred_all_pars, parnames, plot_dir + 'lc_scat/',
        'par_preds_' + model_id + '_MCMC' + mcmc_id + '_' + n_iter +
        assim_type)

    return
def main():

    cur_dir = os.getcwd() + '/'
    plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'

    os.chdir(plot_dir + 'dists/')

    # get list of model ids
    models_full = list(set([el.split('_')[0] for el in glob.glob('*.png')]))

    # remove 101, temporary until 102-->101
    models_full.remove('102')
    os.chdir(cur_dir)

    # set lists of variables and pixels
    vrs = [
        'NBE', 'cumNBE', 'LAI', 'GPP', 'Reco', 'Rauto', 'Rhet', 'lit', 'root',
        'som', 'wood'
    ]
    pixels = [
        '3809', '3524', '2224', '4170', '1945', '3813', '4054', '3264', '1271',
        '3457'
    ]

    # set MCMC ID
    mcmc_id = sys.argv[1]
    n_iter = sys.argv[2]
    assim_type = '_longadapted'

    nmodels_leave_out = sys.argv[3]
    models = random.sample(models_full,
                           len(models_full) - int(nmodels_leave_out))
    print(models)

    # dataframe will hold model structural uncertainty (Ms) and model parametric uncertainty (Mp) for each pixel-var combination
    # n is number of models that make up the suite
    partitioning = DataFrame(columns={'Ms', 'Mp', 'n'})
    df_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/processed_df/'

    for var in vrs:
        print('Variable: ' + var)

        Mp_pixels = np.zeros(
            len(pixels)) * np.nan  # list of Mp for each pixel, for mapping
        for pixel in pixels:
            print('Pixel: ' + pixel)

            nsteps = 228 if assim_type == '_longadapted' else 240
            meds, ub, lb = np.zeros((len(models), nsteps)) * np.nan, np.zeros(
                (len(models), nsteps)
            ) * np.nan, np.zeros(
                (len(models), nsteps)
            ) * np.nan  # medians, upper bounds, lower bounds of prediction through time
            Mp, n = 0, 0

            for model in models:
                print(model)

                cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model + '/'
                cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model + '/'
                output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model + '/'
                parnames = autil.get_parnames(cur_dir + '../../misc/', model)

                os.chdir(cur_dir + cbr_dir)
                #files = set(glob.glob('*.cbr')) - set(glob.glob('*MCMC'+mcmc_id+'*.cbr'))
                #files = glob.glob('*MCMC'+mcmc_id+'*.cbr')
                files = set(
                    glob.glob('*MCMC' + mcmc_id + '_' + n_iter + '_*.cbr'))

                pixel_chains = autil.find_all_chains(
                    files, pixel
                )  # list of files corresponding to each chain at that pixel, e.g. 2224_1, 2224_2, 2224_3, 2222_4
                pixel_chains.sort()
                n_chains = len(pixel_chains)

                if n_chains > 0:
                    cbf_pixel = rwb.read_cbf_file(
                        cur_dir + cbf_dir +
                        pixel_chains[0].partition('_MCMC')[0] + '_' + pixel +
                        '.cbf')

                    cbr_chain_list = []
                    for pixel_chain in pixel_chains:
                        print(pixel_chain)
                        cbr_chain = rwb.read_cbr_file(
                            pixel_chain, {'nopars': len(parnames)
                                          })  # cbr file for one chain
                        cbr_chain_list.append(
                            cbr_chain
                        )  # list of separate cbrs for each chain, use for gelman rubin
                        cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(
                            pixel_chain) == 0 else np.concatenate(
                                (cbr_pixel, cbr_chain),
                                axis=0)  # concatenate all chain cbrs
                        #autil.plot_par_histograms(cbr_chain, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model+'_'+pixel_chain[:-3]+'png')

                        flux_chain = rwb.readbinarymat(
                            cur_dir + output_dir + 'fluxfile_' +
                            pixel_chain[:-3] + 'bin', [
                                cbf_pixel['nodays'],
                                autil.get_nofluxes_nopools_lma(model)[0]
                            ])
                        pool_chain = rwb.readbinarymat(
                            cur_dir + output_dir + 'poolfile_' +
                            pixel_chain[:-3] + 'bin', [
                                cbf_pixel['nodays'] + 1,
                                autil.get_nofluxes_nopools_lma(model)[1]
                            ])
                        #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_chain, flux_chain, pool_chain, autil.get_nofluxes_nopools_lma(model)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model+'_'+pixel_chain[:-3]+'png')

                        flux_pixel = np.copy(flux_chain) if pixel_chains.index(
                            pixel_chain) == 0 else np.concatenate(
                                (flux_pixel, flux_chain),
                                axis=0)  # concatenate all chain flux outputs
                        pool_pixel = np.copy(pool_chain) if pixel_chains.index(
                            pixel_chain) == 0 else np.concatenate(
                                (pool_pixel, pool_chain),
                                axis=0)  # concatenate all chain pool outputs

                    gr = autil.gelman_rubin(
                        cbr_chain_list)  # gelman rubin function from matt
                    gr_thresh = 1.2  # below this value parameters are assumed to be convergent
                    print('%i of %i parameters converged with GR<%.1f' %
                          (sum(gr < gr_thresh), len(parnames), gr_thresh))

                    #autil.plot_par_histograms(cbr_pixel, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model+'_'+pixel_chain[:-6]+'.png')
                    #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_pixel, flux_pixel, pool_pixel, autil.get_nofluxes_nopools_lma(model)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model+'_'+pixel_chain[:-6]+'.png')

                    if (sum(gr < gr_thresh) / len(parnames) < .9
                        ):  # don't include nonconvergent runs in analysis
                        continue
                    else:
                        fwd_data = autil.get_output(
                            var, model, flux_pixel, pool_pixel, cbr_pixel,
                            autil.get_nofluxes_nopools_lma(
                                model)[2])  # get forward data for var

                        if len(fwd_data) > 0:
                            if fwd_data.shape[1] > nsteps:
                                fwd_data = fwd_data[:, :-1]

                            fwd_data = autil.remove_outliers(fwd_data)
                            # fill medians, upper bounds, and lower bounds
                            meds[models.index(model), :] = np.nanmedian(
                                fwd_data, axis=0)
                            ub[models.index(model), :] = np.nanpercentile(
                                fwd_data, 75, axis=0)
                            lb[models.index(model), :] = np.nanpercentile(
                                fwd_data, 25, axis=0)

                            fwd_data = autil.remove_below_25_above_75(
                                fwd_data
                            )  # set values outside of 25th-75th range to nan
                            Mp += np.nanvar(
                                fwd_data, axis=0
                            )  # sum of intra-ensemble variance, only compute on 25th-75th
                            n += 1

            Ms = np.nanvar(meds, axis=0)  # inter-median variance
            Mp = Mp / n if n != 0 else float('nan')

            Ms_div_sum = Ms / (Ms + Mp)
            Mp_div_sum = Mp / (Ms + Mp)

            partitioning.loc[pixel + '_' + var] = {
                'Ms': np.nanmean(Ms_div_sum),
                'Mp': np.nanmean(Mp_div_sum),
                'n': n
            }
            Mp_pixels[pixels.index(pixel)] = np.nanmean(Mp_div_sum)

    print(partitioning.to_string())
    partitioning.sort_index(
        axis=1).to_pickle(cur_dir + df_dir + 'summary' + assim_type + '_MCMC' +
                          mcmc_id + '_' + date.today().strftime("%m%d%y") +
                          '_' + str(len(models)) + '.pkl')

    return
def main():
    
    ### set specifications
    model_id = sys.argv[1]
    run_type = 'ALL' 
    mcmc_id = '119'
    n_iter = '40000000'
    ens_size = 500
    assim_type = '_longadapted'
    
    ### set directories
    cur_dir = os.getcwd() + '/'
    cbf_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_id + '/'
    cbr_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'/' + model_id + '/'
    output_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'/' + model_id + '/'
    plot_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames(cur_dir + '../../misc/', model_id)
    
    # get list of cbfs
    os.chdir(cbf_dir)
    cbf_files = glob.glob('*.cbf')
    cbf_files.sort()
    os.chdir(cur_dir) 
    
    # initialize lists of pixel names and rmses 
    pixels_plot = []
    nbe_rmse, lai_rmse = [], []
    
    for cbf_file in cbf_files:
        print(cbf_file, cbf_files.index(cbf_file))
        
        cbf_pixel = rwb.read_cbf_file(cbf_dir + cbf_file)
        pixel = cbf_file[-8:-4]
        
        cbr_files = glob.glob(cbr_dir + '*MCMC'+mcmc_id+'_'+n_iter+'_' + pixel + '_*.cbr')
        cbr_files = sorted(cbr_files, key=lambda x: int(x.partition(pixel+'_')[-1].partition('.cbr')[0]))
        
        # get all possible XX member combinations of cbr files 
        n_chains_to_converge = 4
        cbr_files_all_subsets = [list(i) for i in itertools.combinations(cbr_files, n_chains_to_converge)]
        
        continue_check = True
        for subset in cbr_files_all_subsets:

            if continue_check:
                
                # read parameters and compute gelman rubin
                cbr_chain_list = []
                
                for cbr_file in subset:
                    cbr_chain = rwb.read_cbr_file(cbr_file, {'nopars': len(parnames)})
                    cbr_chain = autil.modulus_Bday_Fday(cbr_chain, parnames)
                    
                    if np.shape(cbr_chain)[0]==ens_size:
                        cbr_chain_list.append(cbr_chain)
                        
                if len(cbr_chain_list)>1:
                    gr = autil.gelman_rubin(cbr_chain_list)
                
                    if sum(gr<1.2)/len(parnames)>=0.9:
                        continue_check = False
                        cbr_agg = np.vstack(cbr_chain_list)
                        pixels_plot.append(pixel)
                        best_subset = subset.copy()
                        
                else:
                    gr = np.nan
        
        # if there is a convergent subset, read fluxes and pools
        if not continue_check: 
            convergent_chain_nums = [el.partition('.cbr')[0].partition(pixel)[-1][1:] for el in best_subset]
            convergent_files = [el.partition('.cbr')[0].partition(model_id+'/')[-1] for el in best_subset]
            
            flux_pixel = []
            pool_pixel = []
    
            for filename in convergent_files: 
                flux_chain = rwb.readbinarymat(output_dir + 'fluxfile_' + filename+'.bin', [cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model_id)[0]])
                pool_chain = rwb.readbinarymat(output_dir + 'poolfile_' + filename+'.bin', [cbf_pixel['nodays']+1, autil.get_nofluxes_nopools_lma(model_id)[1]])
                
                if (flux_chain.shape[0]==ens_size) & (pool_chain.shape[0]==ens_size): 
                    flux_pixel.append(flux_chain)
                    pool_pixel.append(pool_chain)
            
            nbe_pred = autil.get_output('NBE', model_id, np.vstack(flux_pixel), np.vstack(pool_pixel), cbr_agg, autil.get_nofluxes_nopools_lma(model_id)[2])
            lai_pred = autil.get_output('LAI', model_id, np.vstack(flux_pixel), np.vstack(pool_pixel), cbr_agg, autil.get_nofluxes_nopools_lma(model_id)[2])
            nbe_obs, lai_obs = cbf_pixel['OBS']['NBE'], cbf_pixel['OBS']['LAI']
            
            nbe_rmse.append(rmse_real_numbers_only(nbe_pred, nbe_obs))
            lai_rmse.append(rmse_real_numbers_only(lai_pred, lai_obs))
            print(rmse_real_numbers_only(nbe_pred, nbe_obs), rmse_real_numbers_only(lai_pred, lai_obs))
            
    
    autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')], pixel_value_list=pixels_plot, value_list=nbe_rmse, savepath=plot_dir+'maps/', savename='rmse_nbe_' + model_id + assim_type+ '_MCMC' + mcmc_id + '_' + n_iter)
    autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')], pixel_value_list=pixels_plot, value_list=lai_rmse, savepath=plot_dir+'maps/', savename='rmse_lai_' + model_id + assim_type+ '_MCMC' + mcmc_id + '_' + n_iter)
    
    rmse_df = DataFrame(list(zip(pixels_plot, nbe_rmse, lai_rmse)))
    rmse_df.columns = ['pixel','nbe_rmse','lai_rmse']
    rmse_df.to_pickle(cur_dir + '../../misc/rmse_' + model_id + assim_type+ '_MCMC' + mcmc_id + '_' + n_iter + '.pkl')
    
    
    #################################################################################################################################################################
    # analyze regionally
    
    '''region_mask = Dataset(cur_dir + '../../misc/fourregion_maskarrays.nc')
    region_mask.set_auto_mask(False)
    regionmat, lat, lon = region_mask['4region'][:], region_mask['lat'][:], region_mask['lon'][:]
    lat[0] = -90
    lat[-1] = 90
    
    model_ids = ['811', '911']
    rmse_dfs = []
    for model_id in model_ids:
        rmse_df = read_pickle(cur_dir + '../../misc/rmse_' + model_id + assim_type+ '_MCMC' + mcmc_id + '_' + n_iter + '.pkl')
        rmse_df.columns = ['pixel','nbe_rmse','lai_rmse']
        
        regions = []
        for pixel in rmse_df[rmse_df.columns[0]].tolist():
            pixlat, pixlon = rwb.rowcol_to_latlon(pixel)
            regions.append(regionmat[np.argwhere(lat==pixlat)[0][0], np.argwhere(lon==pixlon)[0][0]])
        
        rmse_df.insert(loc=1, column='region', value=regions)
        rmse_dfs.append(rmse_df)
    
    print(rmse_dfs[0].groupby('region')['nbe_rmse'].mean(), rmse_dfs[0].groupby('region')['lai_rmse'].mean())
    print(rmse_dfs[1].groupby('region')['nbe_rmse'].mean(), rmse_dfs[1].groupby('region')['lai_rmse'].mean())'''
                        
    return
예제 #8
0
def main():

    # set run information to read
    model_id = sys.argv[1]
    mcmc_id = sys.argv[2]  # 119 for normal, 3 for DEMCMC
    n_iter = sys.argv[3]
    nbe_optimization = sys.argv[4]  # OFF OR ON
    ens_size = 500
    assim_type = '_p25adapted'
    suffix = '_clipped_'

    if mcmc_id == '119':
        frac_save_out = str(int(int(n_iter) / 500))
        n_chains_agg = 4
    elif mcmc_id == '3':
        frac_save_out = str(int(
            int(n_iter) / 500 *
            100))  # n_iterations/ frac_save_out * 100 will be ensemble size
        n_chains_agg = 2

    # set directories
    cur_dir = os.getcwd() + '/'
    misc_dir = cur_dir + '/../../misc/'
    cbf_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/'
    cbr_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/'
    cbr_ef_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_ef/' + model_id + '/'
    plot_dir = cur_dir + '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames('../../misc/', model_id)

    # choose which features to use
    include_soilgrids = True
    include_poolobs = True
    include_gl_fracs = False

    # choose which model formulation to use
    train_full_ensemble = False
    rescale = True
    include_interactions = False
    include_squares = False
    include_all_polys = False
    do_feature_selection = False
    do_PLS = True
    n_features_select = int(sys.argv[5])
    write_to_csv = False

    # choose which tasks to run
    opt_feature_select = True
    submit_ic_opt = True
    submit_forward = False

    ############################################################################################################################################
    ############################# develop and train EF models ###################################################################################

    # load list of land pixels
    pixels = list(set([file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')]))
    pixels.sort()

    # load list of cbrs
    cbr_files = glob.glob(cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter +
                          '_*.cbr')

    # load bestchains for cbr_files
    conv_chains = read_pickle(cbr_dir + model_id + assim_type + '_ALL' +
                              '_MCMC' + mcmc_id + '_' + n_iter +
                              '_best_subset.pkl')
    conv_chains.columns = ['pixel', 'bestchains',
                           'conv']  #rename columns for easier access
    ic_inds = autil.get_inds_ic(
        model_id)  # get indices of initial condition parameters

    # load globcover csv for land cover regression comparison
    gl_fracs = read_csv(misc_dir + 'globcover_fracs.csv', header=0)
    n_features_gl = len(gl_fracs.columns) - 1
    suffix_gl = 'gl_'

    # get number of predictors
    n_features = (
        rwb.read_cbf_file(glob.glob(cbf_dir + '*.cbf')[0])['nomet'] - 3
    ) * 2  # remove 3 corresponding to day number and CO2, multiply by 2 (mean and sd)

    if do_PLS:
        suffix += 'PLS_'

    if include_soilgrids:
        soilgrids = read_csv('../../misc/soilgrids_defined_pixels_manual.csv',
                             header=0)
        n_soilgrids = len(soilgrids.columns) - 1
        n_features += n_soilgrids
        suffix += 'soilgrids_'

    if include_poolobs:
        n_poolobs = 4
        n_features += n_poolobs
        suffix += 'poolobs_'

    if include_gl_fracs:
        n_features += n_features_gl
        suffix += suffix_gl

    # fill X and Y
    n_regr_models = len(parnames)
    X = np.ones(
        (len(pixels), n_features)) * np.nan  # shape n_samples, n_features
    y = np.ones(
        (n_regr_models, len(pixels))) * np.nan  # shape n_pars, n_samples
    y_full_ens = np.ones((ens_size, n_regr_models,
                          len(pixels))) * np.nan  # shape n_pars, n_samples

    X_gl = np.ones((len(pixels), n_features_gl)) * np.nan
    y_gl = np.ones((n_regr_models, len(pixels))) * np.nan

    for pixel in pixels:
        if (len(
                glob.glob(cbr_dir + '*MCMC' + mcmc_id + '_' + n_iter + '_' +
                          pixel + '*.cbr')) >
                0) & (pixel in conv_chains['pixel'].values):
            if conv_chains.loc[conv_chains['pixel'] ==
                               pixel]['conv'].values[0] == 0:
                continue
            else:
                ind = pixels.index(pixel)
                print(pixel)

                # get met
                cbf_file = glob.glob(cbf_dir + '*' + pixel + '.cbf')[0]
                met = rwb.read_cbf_file(cbf_file)['MET']
                met = met[:,
                          [1, 2, 3, 6, 7,
                           8]]  # don't use index 0, 5 (day numbers) or 4 (Co2)
                X_end = met.shape[1] * 2
                X[ind, :X_end] = np.concatenate(
                    (np.nanmean(met, axis=0), np.nanstd(met, axis=0)))
                #X[ind,:met.shape[1]*12] = fill_X_met_12mo(X[ind,:met.shape[1]*12], met)#np.nanmean(met, axis=0)

                # append to X if include_soil_canopy_vars
                if include_soilgrids:
                    if (int(pixel) in soilgrids['pixel'].values):
                        X[ind, X_end:(X_end + n_soilgrids)] = soilgrids[
                            soilgrids['pixel'] == int(pixel)].values[0][1:]
                    X_end = X_end + n_soilgrids

                if include_poolobs:
                    lai, agb, som = rwb.read_cbf_file(
                        cbf_file)['OBS']['LAI'], rwb.read_cbf_file(
                            cbf_file)['OBS']['ABGB'], rwb.read_cbf_file(
                                cbf_file)['OBS']['SOM']

                    if (len(lai) > 0) & (len(agb) > 0) & (len(som) > 0):
                        X[ind, X_end:(X_end + n_poolobs)] = np.array([
                            np.nanmean(lai[lai > 0]),
                            np.nanstd(lai[lai > 0]),
                            np.nanmean(agb[agb > 0]),
                            np.nanmean(som[som > 0])
                        ])
                    X_end = X_end + n_poolobs

                if include_gl_fracs:
                    if (int(pixel) in gl_fracs['pixel'].values):
                        X[ind, X_end:(X_end + n_features_gl)] = gl_fracs.loc[
                            gl_fracs['pixel'] == int(pixel)].values[0][1:]
                    X_end = X_end + n_features_gl

                # fill globcover X
                if int(pixel) in gl_fracs['pixel'].values:
                    X_gl[ind, :] = gl_fracs.loc[gl_fracs['pixel'] == int(
                        pixel)].values[0][1:]

                # get parameter information
                # get pixel's convergent chain numbers
                best_chains = conv_chains.loc[
                    conv_chains['pixel'] == pixel]['bestchains'].values[0][1:]
                print(best_chains)

                # aggregate bestchains from optimal posteriors
                cbr_data = []
                for chain in best_chains:

                    file = [
                        i for i in cbr_files
                        if pixel + '_' + chain + '.cbr' in i
                    ][0]
                    cbr_data.append(
                        autil.modulus_Bday_Fday(
                            rwb.read_cbr_file(file, {'nopars': len(parnames)}),
                            parnames))
                    #cbr_data.append(rwb.read_cbr_file(file, {'nopars': len(parnames)}))

                cbr_data = np.vstack(cbr_data)
                y[:, ind] = np.nanmedian(cbr_data, axis=0)
                y_gl[:, ind] = np.nanmedian(cbr_data, axis=0)

                indices = np.random.choice(
                    cbr_data.shape[0], ens_size,
                    replace=False)  # only take a subset of cbr rows

                y_full_ens[:, :, ind] = cbr_data[
                    indices, :]  #reshape_cbr(cbr_data, ens_size*n_chains_agg)

    if not train_full_ensemble:

        f_bic = open(
            misc_dir + 'env_filter_manual/fs/bic_fs' +
            suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' +
            n_iter + assim_type + '.csv', 'a')
        w_bic = csv.writer(f_bic)

        # EF regressions
        reg_test_preds_list, card_test_preds_list, reg_train_preds_list, card_train_preds_list, pixels_r, suffix, k = run_regressions(
            X, y, pixels, rescale, include_interactions, include_squares,
            include_all_polys, do_feature_selection, do_PLS, write_to_csv,
            w_bic, n_features_select, suffix, ens_size, n_regr_models,
            n_features)

        f_bic.close()

        # globcover comparison
        '''gl_reg_test_preds_list, gl_card_test_preds_list, gl_reg_train_preds_list, gl_card_train_preds_list, gl_pixels_r, gl_suffix, gl_k = run_regressions(X_gl, y_gl, pixels, 
            rescale, False, False, False, False, False, False, w_bic, n_features_select, 
            suffix_gl, ens_size, n_regr_models, n_features_gl)'''

    else:
        suffix += 'full_ens_'

        icount = 0
        for i in sample(range(y_full_ens.shape[0]), 100):
            print(icount)
            rtest, ctest, rtrain, ctrain, pixels_r, suffix, k = run_regressions(
                X, y_full_ens[i, :, :], pixels, rescale, include_interactions,
                include_squares, include_all_polys, do_feature_selection,
                n_features_select, suffix, ens_size, n_regr_models, n_features)

            reg_test_preds_list = [np.nanmedian(
                ri, axis=0) for ri in rtest] if icount == 0 else [
                    np.vstack((np.nanmedian(ri, axis=0), rfull))
                    for ri, rfull in zip(rtest, reg_test_preds_list)
                ]
            card_test_preds_list = np.copy(ctest) if icount == 0 else [
                np.vstack((ci, cfull))
                for ci, cfull in zip(ctest, card_test_preds_list)
            ]
            reg_train_preds_list = [np.nanmedian(
                ri, axis=0) for ri in rtrain] if icount == 0 else [
                    np.vstack((np.nanmedian(ri, axis=0), rfull))
                    for ri, rfull in zip(rtrain, reg_train_preds_list)
                ]
            card_train_preds_list = np.copy(ctrain) if icount == 0 else [
                np.vstack((ci, cfull))
                for ci, cfull in zip(ctrain, card_train_preds_list)
            ]

            icount += 1

    # fill csv

    f_test = open(
        misc_dir + 'env_filter_manual/fs/fs_test' + suffix.partition('fs')[0] +
        model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a')
    wr_test = csv.writer(f_test)

    f_train = open(
        misc_dir + 'env_filter_manual/fs/fs_train' +
        suffix.partition('fs')[0] + model_id + '_MCMC' + mcmc_id + '_' +
        n_iter + assim_type + '.csv', 'a')
    wr_train = csv.writer(f_train)

    f_test_preds = open(
        misc_dir + 'env_filter_manual/par_preds/par_preds_test' + suffix +
        model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a')
    wr_test_preds = csv.writer(f_test_preds)

    f_train_preds = open(
        misc_dir + 'env_filter_manual/par_preds/par_preds_train' + suffix +
        model_id + '_MCMC' + mcmc_id + '_' + n_iter + assim_type + '.csv', 'a')
    wr_train_preds = csv.writer(f_train_preds)

    print('TEST:')
    #plot_scatter_test_pred(card_test_preds_list, reg_test_preds_list, k, pixels_r, parnames, wr_test, wr_test_preds, plot_dir+'env_filter/', 'par_preds_test'+suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv)
    #plot_scatter_test_pred(gl_card_test_preds_list, gl_reg_test_preds_list, gl_k, gl_pixels_r, parnames, wr_test, wr_test_preds, plot_dir+'env_filter/', 'par_preds_test'+gl_suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv)

    print('. . . . . \n\nTRAIN:')
    #plot_scatter_test_pred(card_train_preds_list, reg_train_preds_list, k, pixels_r, parnames, wr_train, wr_train_preds, plot_dir+'env_filter/', 'par_preds_train'+suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv)
    #plot_scatter_test_pred(gl_card_train_preds_list, gl_reg_train_preds_list, gl_k, gl_pixels_r, parnames, wr_train, wr_train_preds, plot_dir+'env_filter/', 'par_preds_train'+gl_suffix+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type, train_full_ensemble, write_to_csv)

    f_test.close()
    f_train.close()
    f_test_preds.close()
    f_train_preds.close()

    ############################################################################################################################################
    ################################### find optimal number of features for each parameter #####################################################

    if opt_feature_select:

        test_rmse = read_csv(misc_dir + 'env_filter_manual/fs/fs_test' +
                             suffix.partition('fs')[0] + model_id + '_MCMC' +
                             mcmc_id + '_' + n_iter + assim_type + '.csv',
                             header=None)
        test_rmse.columns = [
            item for sublist in [['n_features_select'], parnames]
            for item in sublist
        ]
        test_rmse.sort_values('n_features_select')

        train_rmse = read_csv(misc_dir + 'env_filter_manual/fs/fs_train' +
                              suffix.partition('fs')[0] + model_id + '_MCMC' +
                              mcmc_id + '_' + n_iter + assim_type + '.csv',
                              header=None)
        train_rmse.columns = [
            item for sublist in [['n_features_select'], parnames]
            for item in sublist
        ]
        train_rmse.sort_values('n_features_select')

        x = test_rmse['n_features_select'].values

        opt_fs = plot_train_test(x,
                                 train_rmse,
                                 test_rmse,
                                 parnames,
                                 savepath=plot_dir + 'train_test/',
                                 savename=model_id + '_MCMC' + mcmc_id +
                                 suffix.partition('fs')[0],
                                 norm=False)
        opt_fs = plot_train_test(x,
                                 train_rmse,
                                 test_rmse,
                                 parnames,
                                 savepath=plot_dir + 'train_test/',
                                 savename=model_id + '_MCMC' + mcmc_id +
                                 suffix.partition('fs')[0],
                                 norm=True)
        print(opt_fs)
        '''bic_data = read_csv(misc_dir +'env_filter_manual/fs/bic_fs_soilgrids_poolobs_'+model_id+'_MCMC'+mcmc_id+'_'+n_iter+assim_type + '.csv', header=None)
        bic_data.columns = [item for sublist in [['n_features_select'],parnames] for item in sublist]
        bic_data.columns.sort_values('n_features_select')
        
        x = bic_data['n_features_select'].values
        
        opt_fs = plot_train_test(x, bic_data, bic_data*np.nan, parnames, savepath=plot_dir+'train_test/', savename='bic_'+model_id+'_MCMC'+mcmc_id+suffix.partition('fs')[0])
        print(opt_fs)'''

    ############################################################################################################################################
    ################################### copy cbfs and substitute pars for IC optimization ######################################################

    # set directories for CARDAMOM runs
    mdf_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_MDF/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_MDF/'
    runmodel_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_GENERAL/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_GENERAL/'
    cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/'
    cbf_ef_ic_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '_ef_ic/' + model_id + '/'
    cbr_ef_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_ef/' + model_id + '/'
    output_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/'
    output_ef_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '_ef/' + model_id + '/'

    # select which pixels to submit
    os.chdir(cbf_dir)
    cbf_files = glob.glob('*.cbf')
    cbf_files.sort()
    os.chdir(cur_dir + '/../')

    if submit_ic_opt:

        txt_filename = 'ef_ic_assim_list_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.txt'
        txt_file = open(txt_filename, 'w')

        for cbf_file in cbf_files:
            print(cbf_file)

            cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file)
            cbf_pixel = cbf_file[-8:-4]

            if cbf_pixel in pixels_r:

                parpriors = np.concatenate(
                    (retrieve_preds(cbf_pixel, opt_fs, suffix,
                                    misc_dir + 'env_filter_manual/par_preds/'),
                     np.ones(50 - len(parnames)) * -9999.))
                parpriorunc = np.concatenate(
                    (np.ones(len(parnames)) * 1.001,
                     np.ones(50 - len(parnames)) * -9999.))

                # except ICs
                for ic_ind in ic_inds:
                    parpriors[ic_ind] = -9999.
                    parpriorunc[ic_ind] = -9999.

                # except NBE unc
                if nbe_optimization == 'ON':
                    parpriors[len(parnames) - 1] = -9999.
                    parpriorunc[len(parnames) - 1] = -9999.

                cbf_data['PARPRIORS'] = parpriors.reshape(-1, 1)
                cbf_data['PARPRIORUNC'] = parpriorunc.reshape(-1, 1)

                fp = cbf_file[:-9] + suffix.partition('fs')[0] + cbf_pixel
                fa = cbf_file[:
                              -9] + '_MCMC' + mcmc_id + '_' + n_iter + suffix.partition(
                                  'fs')[0] + 'assim_' + cbf_pixel
                rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT(
                    cbf_data, cbf_ef_ic_dir + fp + '.cbf')

                txt_file.write(
                    '%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000' %
                    (mdf_dir, cbf_ef_ic_dir[3:], fp + '.cbf', cbr_ef_dir,
                     fa + '.cbr', n_iter, frac_save_out, mcmc_id))
                txt_file.write('\n')

        txt_file.close()

        sh_file = open(txt_filename[:-3] + 'sh', 'w')
        autil.fill_in_sh(sh_file,
                         array_size=len(pixels_r),
                         n_hours=6,
                         txt_file=txt_filename,
                         combined=False)

    if submit_forward:

        txt_filename = 'ef_ic_forward_list_' + model_id + assim_type + '_MCMC' + mcmc_id + '_' + n_iter + '.txt'
        txt_file = open(txt_filename, 'w')

        for cbf_file in cbf_files:
            print(cbf_file)

            cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file)
            cbf_pixel = cbf_file[-8:-4]

            if cbf_pixel in pixels_r:

                fa = cbf_file[:
                              -9] + '_MCMC' + mcmc_id + '_' + n_iter + suffix.partition(
                                  'fs')[0] + 'assim_' + cbf_pixel
                cbr_assim = rwb.read_cbr_file(
                    glob.glob(cbr_ef_dir + fa + '.cbr')[0],
                    {'nopars': len(parnames)})

                ff = cbf_file[:
                              -9] + '_MCMC' + mcmc_id + '_' + n_iter + suffix.partition(
                                  'fs')[0] + 'forward_' + cbf_pixel
                cbr_forward = retrieve_preds(
                    cbf_pixel, opt_fs, suffix,
                    misc_dir + 'env_filter_manual/par_preds/')
                for ic_ind in ic_inds:
                    cbr_forward[ic_ind] = np.nanmedian(cbr_assim[:, ic_ind])
                cbr_forward = cbr_forward.reshape(1, len(parnames))

                rwb.write_cbr_file(cbr_forward, cbr_ef_dir + ff + '.cbr')

                txt_file.write(
                    '%sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s' %
                    (runmodel_dir, cbf_dir[3:], cbf_file, cbr_ef_dir,
                     ff + '.cbr', output_ef_dir, 'fluxfile_' + ff + '.bin',
                     output_ef_dir, 'poolfile_' + ff + '.bin', output_ef_dir,
                     'edcdfile_' + ff + '.bin', output_ef_dir,
                     'probfile_' + ff + '.bin'))
                txt_file.write('\n')

        txt_file.close()

        sh_file = open(txt_filename[:-3] + 'sh', 'w')
        autil.fill_in_sh(sh_file,
                         array_size=len(pixels_r),
                         n_hours=1,
                         txt_file=txt_filename,
                         combined=False)

    return
예제 #9
0
def main():
    model_id = sys.argv[1]
    run_type = sys.argv[2] # ALL or SUBSET
    mcmc_id = sys.argv[3] # 119 for normal, 3 for DEMCMC
    n_iter = sys.argv[4]
    var_to_plot = sys.argv[5] # GR, a flux or pool, or PARXX
    ens_size = 500
    assim_type = '_longadapted'
    
    cur_dir = os.getcwd() + '/'
    if 'scripts' not in cur_dir:
        cur_dir = cur_dir + 'scripts/'
    
    cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_id + '/'
    cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'/' + model_id + '/'
    output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'/' + model_id + '/'
    plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames(cur_dir + '../../misc/', model_id)
    
    os.chdir(cbr_dir)
    files = glob.glob('*MCMC'+mcmc_id+'_'+n_iter+'_*.cbr')
    pixel = sys.argv[6]
    print(pixel)
    
    pixel_chains = autil.find_all_chains(files, pixel)
    pixel_chains.sort() # filenames
    print(pixel_chains)
    
    cbf_pixel = rwb.read_cbf_file(cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0]+'_'+pixel+'.cbf')
    
    cbr_chain_list = []
    for pixel_chain in pixel_chains:
        print(pixel_chain)
        cbr_chain = rwb.read_cbr_file(pixel_chain, {'nopars': len(parnames)})
        cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((cbr_pixel, cbr_chain), axis=0)
        
        flux_chain = rwb.readbinarymat(cur_dir + output_dir + 'fluxfile_' + pixel_chain[:-3]+'bin', [cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model_id)[0]])
        pool_chain = rwb.readbinarymat(cur_dir + output_dir + 'poolfile_' + pixel_chain[:-3]+'bin', [cbf_pixel['nodays']+1, autil.get_nofluxes_nopools_lma(model_id)[1]])

        flux_pixel = np.copy(flux_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((flux_pixel, flux_chain), axis=0)
        pool_pixel = np.copy(pool_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((pool_pixel, pool_chain), axis=0)
        
        if np.shape(cbr_chain)[0]==ens_size:
            cbr_chain_list.append(cbr_chain)
            print(np.shape(cbr_chain))
      
    ### COMPUTE GELMAN RUBIN  
    if len(cbr_chain_list)>1:
        gr = autil.gelman_rubin(cbr_chain_list)
        gr_pixel = sum(gr<1.2)/len(parnames)
    else:
        gr_pixel = -9999.
       
    ### DETERMINE DATA TO WRITE TO FILE
    if var_to_plot == 'GR':
        data = np.copy(gr_pixel)
    elif 'PAR' in var_to_plot:
        parnum = int(var_to_plot.partition('PAR')[-1])
        if gr_pixel>0.9:
            data = np.nanmedian(cbr_pixel[:,parnum-1])
        else:
            data = -9999.
    else:
        if gr_pixel>0.9:
            data = np.nanmean(np.nanmedian(autil.get_output(var_to_plot, model_id, flux_pixel, pool_pixel, cbr_pixel, autil.get_nofluxes_nopools_lma(model_id)[2]), axis=0))
        else:
            data = -9999.
        
    with open(cur_dir + '../../misc/' + model_id + '_' + pixel_chains[0].partition('_MCMC')[0] + '_MCMC' + mcmc_id + '_' + n_iter + '_' + var_to_plot + '.csv','a') as f:
        writer = csv.writer(f)
        new_row = [pixel, data]
        assert len(new_row)==2
        writer.writerow(new_row)
        
    return
def main():
    model_id = sys.argv[1]
    run_type = sys.argv[2]  # ALL or SUBSET
    mcmc_id = sys.argv[3]  # 119 for normal, 3 for DEMCMC
    nbe_optimization = sys.argv[4]  # 'OFF' or 'ON'
    assim_type = '_p25adapted'

    cur_dir = os.getcwd() + '/'
    mdf_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_MDF/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_MDF/'
    runmodel_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_GENERAL/' if nbe_optimization == 'OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_GENERAL/'
    cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/'
    cbr_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/'
    output_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/'
    plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames('../../misc/', model_id)

    n_iterations = sys.argv[5]
    runtime_assim = int(sys.argv[6])
    resubmit_num = sys.argv[7]
    n_chains_resubmit = 4
    ens_size = 500

    if mcmc_id == '119':
        frac_save_out = str(int(int(n_iterations) / 500))
    elif mcmc_id == '3':
        frac_save_out = str(int(
            int(n_iterations) / 500 *
            100))  # n_iterations/ frac_save_out * 100 will be ensemble size

    # select which pixels to submit
    os.chdir(cbf_dir)
    if run_type == 'ALL':
        cbf_files = glob.glob('*.cbf')
    elif run_type == 'SUBSET_RANDOM':
        cbf_files = sample(glob.glob('*.cbf'), 10)
    elif run_type == 'SUBSET_INPUT':
        cbf_files = select_cbf_files(glob.glob('*.cbf'), [
            '3809', '3524', '2224', '4170', '1945', '3813', '4054', '3264',
            '1271', '3457'
        ])
    os.chdir(cur_dir + '/../')

    cbf_files.sort()

    # create one combined submission file with all assimilation and forward commands for each pixel's chain on one line

    txt_filename = 'combined_assim_forward_list_' + model_id + '_' + run_type + assim_type + '_MCMC' + mcmc_id + '_' + n_iterations + '_resubmit' + resubmit_num + '.txt'
    txt_file = open(txt_filename, 'w')

    resubmit_count = 0
    gr_pixels = np.zeros(
        len(cbf_files)) * np.nan  # list of GR for each pixel, for mapping
    pixels = []
    best_subset = []
    conv_bool_lst = []
    for cbf_file in cbf_files:
        best_subset_pixel = []
        resubmit = False
        print(cbf_file, cbf_files.index(cbf_file))

        cbf_pixel = rwb.read_cbf_file(cur_dir + cbf_dir + cbf_file)
        pixel = cbf_file[-8:-4]

        cbr_files = glob.glob(cur_dir + '../' + cbr_dir + '*MCMC' + mcmc_id +
                              '_' + n_iterations + '_' + pixel + '_*.cbr')
        cbr_files = sorted(
            cbr_files,
            key=lambda x: int(
                x.partition(pixel + '_')[-1].partition('.cbr')[0]))

        if len(cbr_files) >= n_chains_resubmit: pixels.append(pixel)
        #cbr_files = cbr_files[:16] ############ TEMP

        if len(cbr_files) > 0:
            end_chain = int(
                cbr_files[-1].partition(pixel + '_')[-1].partition('.cbr')[0])
            #print('ENDCHAIN: '+str(end_chain))
        else:
            end_chain = 0
            resubmit = True

        # get all possible XX member combinations of cbr files
        n_chains_to_converge = n_chains_resubmit
        cbr_files_all_subsets = [
            list(i)
            for i in itertools.combinations(cbr_files, n_chains_to_converge)
        ]
        continue_check = True
        for subset in cbr_files_all_subsets:
            if continue_check:

                # read parameters and compute gelman rubin
                cbr_chain_list = []
                chain_nums = ['0']

                for cbr_file in subset:
                    #print(cbr_file[-10:-4])
                    cbr_chain = rwb.read_cbr_file(cbr_file,
                                                  {'nopars': len(parnames)})
                    cbr_chain = autil.modulus_Bday_Fday(cbr_chain, parnames)
                    chain_nums.append(
                        cbr_file.partition('.cbr')[0].partition(pixel + '_')
                        [-1])  # append chain number

                    if np.shape(cbr_chain)[0] == ens_size:
                        cbr_chain_list.append(cbr_chain)
                        #print(np.shape(cbr_chain))
                    else:
                        print('incorrect ensemble size)')
                        resubmit = True

                if len(cbr_chain_list) > 1:
                    gr = autil.gelman_rubin(cbr_chain_list)
                    #print(gr)
                    print(
                        '%i/%i' % (sum(gr < 1.2), len(parnames))
                    )  #print('%i of %i parameters converged' % (sum(gr<1.2), len(parnames)))

                    if (np.isnan(gr_pixels[cbf_files.index(cbf_file)])):
                        gr_pixels[cbf_files.index(cbf_file)] = sum(
                            gr < 1.2) / len(parnames)
                        #if len(cbr_files_all_subsets)==1: best_subset_pixel.append(chain_nums)

                    if sum(gr < 1.2) / len(parnames) < 0.9:
                        #print('gr too low')
                        resubmit = True

                        if (sum(gr < 1.2) / len(parnames) >=
                                gr_pixels[cbf_files.index(cbf_file)]):
                            gr_pixels[cbf_files.index(cbf_file)] = sum(
                                gr < 1.2) / len(parnames)
                            best_subset_pixel.append(chain_nums)
                            conv_bool = 0

                    else:
                        resubmit = False
                        continue_check = False
                        gr_pixels[cbf_files.index(cbf_file)] = sum(
                            gr < 1.2) / len(parnames)
                        best_subset_pixel.append(chain_nums)
                        conv_bool = 1

                else:
                    gr = np.nan
                    print('gr undefined')
                    best_subset_pixel.append(chain_nums)
                    conv_bool = 0
                    resubmit = True

        if len(best_subset_pixel) > 0:
            best_subset.append(best_subset_pixel[-1])
            conv_bool_lst.append(conv_bool)

        # write into text file if pixel needs to be resubmitted
        if resubmit:
            first_resubmit_chain = end_chain + 1
            last_resubmit_chain = end_chain + n_chains_resubmit
            for chain in range(first_resubmit_chain, last_resubmit_chain + 1):
                c = '_' + str(chain)
                txt_file.write(
                    '%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000' %
                    (mdf_dir, cbf_dir[3:], cbf_file, cbr_dir,
                     cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations +
                     '_' + cbf_file[-8:-4] + c + '.cbr', n_iterations,
                     frac_save_out, mcmc_id))
                txt_file.write(
                    ' && %sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s'
                    % (runmodel_dir, cbf_dir[3:], cbf_file, cbr_dir,
                       cbf_file[:-8] + 'MCMC' + mcmc_id + '_' + n_iterations +
                       '_' + cbf_file[-8:-4] + c + '.cbr', output_dir,
                       'fluxfile_' + cbf_file[:-8] + 'MCMC' + mcmc_id + '_' +
                       n_iterations + '_' + cbf_file[-8:-4] + c + '.bin',
                       output_dir, 'poolfile_' + cbf_file[:-8] + 'MCMC' +
                       mcmc_id + '_' + n_iterations + '_' + cbf_file[-8:-4] +
                       c + '.bin', output_dir, 'edcdfile_' + cbf_file[:-8] +
                       'MCMC' + mcmc_id + '_' + n_iterations + '_' +
                       cbf_file[-8:-4] + c + '.bin', output_dir,
                       'probfile_' + cbf_file[:-8] + 'MCMC' + mcmc_id + '_' +
                       n_iterations + '_' + cbf_file[-8:-4] + c + '.bin'))
                txt_file.write(
                    ' && ') if chain < last_resubmit_chain else txt_file.write(
                        '\n')
            resubmit_count += 1

    txt_file.close()

    sh_file = open(txt_filename[:-3] + 'sh', 'w')
    autil.fill_in_sh(sh_file,
                     array_size=resubmit_count,
                     n_hours=runtime_assim,
                     txt_file=txt_filename,
                     combined=True)

    autil.plot_map(nrows=46,
                   ncols=73,
                   land_pixel_list=pixels,
                   pixel_value_list=pixels,
                   value_list=gr_pixels * 100,
                   savepath=cur_dir + plot_dir + 'maps/',
                   savename='gr_' + model_id + assim_type + '_' + run_type +
                   '_MCMC' + mcmc_id + '_' + n_iterations + '_resubmit' +
                   resubmit_num)

    #print(pixels, best_subset, conv_bool_lst)
    print(len(pixels), len(best_subset), len(conv_bool_lst))
    DataFrame(list(
        zip(pixels, best_subset,
            conv_bool_lst))).to_pickle(cur_dir + '../' + cbr_dir + model_id +
                                       assim_type + '_' + run_type + '_MCMC' +
                                       mcmc_id + '_' + n_iterations +
                                       '_best_subset.pkl')

    return
def main():
    
    # get specifications for run to read
    model_ids = ['811','811']
    assim_type = '_p25adapted'
    ens_size = 500
    mcmc_ids = ['119','3']
    n_iters = ['40000000','1000000']
    
    # set directories
    cur_dir = os.getcwd() + '/'
    plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    
    n_pixels = 928
    demcmc_pred, mcmc_pred = [np.ones(34)*np.nan for i in range(n_pixels)], [np.ones(34)*np.nan for i in range(n_pixels)]
    # run through pixels
    for mcmc_id, n_iter, model_id in zip(mcmc_ids, n_iters, model_ids):
            
        # get list of directories
        cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_id + '/'
        cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'/' + model_id + '/'
        output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'/' + model_id + '/'
        parnames = autil.get_parnames('../../misc/', model_id)
        
        # get list of pixels
        pixels = [cbf[-8:-4] for cbf in glob.glob(cbf_dir + '*.cbf')]
        pixels.sort()
        
        # get best chains
        conv_chains = read_pickle(cbr_dir + model_id + assim_type + '_ALL' + '_MCMC'+mcmc_id + '_'+n_iter+'_best_subset.pkl')
        conv_chains.columns = ['pixel','bestchains','conv'] #rename columns for easier access
        
        for pixel in pixels:
            
            ind = pixels.index(pixel)
            
            if (len(glob.glob(cbr_dir + '*MCMC'+mcmc_id+'_'+n_iter+'_' + pixel + '*.cbr'))>0) & (pixel in conv_chains['pixel'].values):
                    
                # read cbf file for that pixel
                cbf_pixel = rwb.read_cbf_file(glob.glob(cbf_dir + '*_' + pixel+'.cbf')[0])
                
                # grab cbrs corresponding to that pixel, MCMCID and number of iterations
                cbr_files = glob.glob(cbr_dir + '*MCMC'+mcmc_id+'_' + n_iter + '_'+ pixel+'*.cbr')
                cbr_files.sort()
            
                # run through cbrs
                best_chains = conv_chains.loc[conv_chains['pixel']==pixel]['bestchains'].values[0][1:]
                print(pixel, best_chains)
                
                cbr_data = []
                conv = conv_chains.loc[conv_chains['pixel']==pixel]['conv'].values[0]
                if conv==1:
                    # aggregate bestchains from optimal posteriors
                    for chain in best_chains:
            
                        file = [i for i in cbr_files if pixel+'_'+chain+'.cbr' in i][0]
                        cbr_data.append(autil.modulus_Bday_Fday(rwb.read_cbr_file(file, {'nopars': len(parnames)}), parnames))
                        
                    cbr_data = np.vstack(cbr_data)
        
                else: cbr_data = np.ones((ens_size, len(parnames)))*np.nan
                
                
            
                if mcmc_id=='119': 
                    mcmc_pred[ind] = np.nanmedian(cbr_data, axis=0)
                elif mcmc_id=='3': 
                    demcmc_pred[ind] = np.nanmedian(cbr_data, axis=0)

    plot_scatter_compare(demcmc_pred, mcmc_pred, parnames, cur_dir+plot_dir+'demcmc_mcmc/', 'par_compare_811')
    
    
    return
예제 #12
0
def main():
    model_id = sys.argv[1]
    mcmc_id = sys.argv[2]  # 119 for normal, 3 for DEMCMC
    n_iter = sys.argv[3]
    ens_size = 500
    assim_type = '_p25adapted'

    # EF comparison
    ef_spec = 'clipped_PLS_soilgrids_poolobs_rescaled_forward'

    # directories
    cur_dir = os.getcwd() + '/'
    cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id + '/'
    cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id + '/'
    cbr_ef_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '_ef/' + model_id + '/'
    output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id + '/'
    output_ef_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '_ef/' + model_id + '/'
    plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames('../../misc/', model_id)

    # get cbfs to run through
    os.chdir(cbf_dir)
    cbf_files = glob.glob('*.cbf')
    cbf_files.sort()
    os.chdir(cur_dir + '/../')

    opt_preds = np.zeros((len(cbf_files), len(parnames))) * np.nan
    ef_preds = np.zeros((len(cbf_files), len(parnames))) * np.nan

    for cbf_file in cbf_files:

        pixel = cbf_file[-8:-4]
        print(pixel)

        pixel_chains_opt = autil.find_all_chains(
            glob.glob(cbr_dir + '*_MCMC' + mcmc_id + '_' + n_iter + '_' +
                      pixel + '*.cbr'), pixel)
        pixel_chains_opt.sort()  # filenames

        pixel_chains_ef = autil.find_all_chains(
            glob.glob(cbr_ef_dir + '*_MCMC' + mcmc_id + '_' + n_iter + '_' +
                      ef_spec + '_' + pixel + '.cbr'), pixel)
        pixel_chains_ef.sort()

        for pc_opt in pixel_chains_opt:
            cbr_chain_opt = rwb.read_cbr_file(pc_opt,
                                              {'nopars': len(parnames)})
            cbr_chain_opt = autil.modulus_Bday_Fday(cbr_chain_opt, parnames)
            cbr_pixel_opt = np.copy(cbr_chain_opt) if pixel_chains_opt.index(
                pc_opt) == 0 else np.concatenate(
                    (cbr_pixel_opt, cbr_chain_opt), axis=0)

        for pc_ef in pixel_chains_ef:
            cbr_chain_ef = rwb.read_cbr_file(pc_ef, {'nopars': len(parnames)})
            cbr_chain_ef = autil.modulus_Bday_Fday(cbr_chain_ef, parnames)
            cbr_pixel_ef = np.copy(cbr_chain_ef) if pixel_chains_ef.index(
                pc_ef) == 0 else np.concatenate(
                    (cbr_pixel_ef, cbr_chain_ef), axis=0)

        opt_preds[cbf_files.index(cbf_file), :] = np.nanmedian(cbr_pixel_opt,
                                                               axis=0)
        ef_preds[cbf_files.index(cbf_file), :] = np.nanmedian(cbr_pixel_ef,
                                                              axis=0)

    plot_scatter_compare(ef_preds, opt_preds, parnames, plot_dir + 'scatters/',
                         model_id + '_MCMC' + mcmc_id + '_' + n_iter)

    return
def main():
    model_id_start = sys.argv[1]
    run_type = sys.argv[2]  # ALL or SUBSET
    metric = sys.argv[3]  # spread or RMSE
    assim_type = '_p25adapted'
    compare_between = sys.argv[4]  # MCMCID or MODEL or NBEUNC

    n_iters = [
        ['40000000'], ['40000000']
    ]  #['500000','1000000','2500000','5000000','10000000'],['40000000']]#[['100000', '250000', '500000', '1000000', '1750000', '2500000', '5000000'], ['100000', '250000', '500000', '1000000', '5000000', '10000000', '25000000','50000000']]
    vrs = [
        'NBE', 'cumNBE', 'LAI', 'GPP', 'Reco', 'Rauto', 'Rhet', 'lit', 'root',
        'som', 'wood'
    ]
    pixels = [
        '3809', '3524', '2224', '4170', '1945', '3813', '4054', '3264', '1271',
        '3457'
    ]

    cur_dir = os.getcwd() + '/'
    cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + model_id_start + '/'
    cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + model_id_start + '/'
    output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + model_id_start + '/'
    plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames('../../misc/', model_id_start)

    if compare_between == 'MCMCID':
        comps = ['3', '119']

    elif compare_between == 'MODEL':
        comps = [model_id_start, '911']
        mcmc_id = '119'

    elif compare_between == 'NBEUNC':
        comps = [assim_type, '_p25adapted_NBEuncreduced']
        mcmc_id = '119'

    ens_spread = [
        np.ones((len(pixels), len(vrs), len(n_iters[0]))) * float('nan'),
        np.ones((len(pixels), len(vrs), len(n_iters[1]))) * float('nan')
    ]
    conv = [
        np.ones((len(pixels), len(n_iters[0]))) * float('nan'),
        np.ones((len(pixels), len(n_iters[1]))) * float('nan')
    ]

    for pixel in pixels:

        for comp in comps:
            if compare_between == 'MCMCID':
                mcmc_id = comp
            elif compare_between == 'MODEL':
                model_id_start = comp
                cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + assim_type + '/' + comp + '/'
                cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + assim_type + '/' + comp + '/'
                output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + assim_type + '/' + comp + '/'
                plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
                parnames = autil.get_parnames(cur_dir + '../../misc/', comp)
            elif compare_between == 'NBEUNC':
                assim_type = comp
                cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf' + comp + '/' + model_id_start + '/'
                cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr' + comp + '/' + model_id_start + '/'
                output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output' + comp + '/' + model_id_start + '/'
                plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
                parnames = autil.get_parnames(cur_dir + '../../misc/',
                                              model_id_start)

            os.chdir(cur_dir + cbr_dir)
            for it in n_iters[comps.index(comp)]:
                files = glob.glob('*MCMC' + mcmc_id + '_' + it + '_' + pixel +
                                  '*.cbr')
                pixel_chains = autil.find_all_chains(files, pixel)
                pixel_chains.sort()  # filenames
                #if ((comp=='911') & (pixel_chains[0][-5]=='1')): pixel_chains.pop(0)
                #if ((comp=='911') & (pixel_chains[0][-5]=='2')): pixel_chains.pop(0)
                print(pixel_chains)

                cbf_pixel = rwb.read_cbf_file(
                    cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0] +
                    '_' + pixel + '.cbf')

                cbr_chain_list = []
                for pixel_chain in pixel_chains[:4]:
                    print(pixel_chain)
                    cbr_chain = rwb.read_cbr_file(pixel_chain,
                                                  {'nopars': len(parnames)})
                    cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(
                        pixel_chain) == 0 else np.concatenate(
                            (cbr_pixel, cbr_chain), axis=0)

                    flux_chain = rwb.readbinarymat(
                        cur_dir + output_dir + 'fluxfile_' + pixel_chain[:-3] +
                        'bin', [
                            cbf_pixel['nodays'],
                            autil.get_nofluxes_nopools_lma(model_id_start)[0]
                        ])
                    pool_chain = rwb.readbinarymat(
                        cur_dir + output_dir + 'poolfile_' + pixel_chain[:-3] +
                        'bin', [
                            cbf_pixel['nodays'] + 1,
                            autil.get_nofluxes_nopools_lma(model_id_start)[1]
                        ])

                    flux_pixel = np.copy(flux_chain) if pixel_chains.index(
                        pixel_chain) == 0 else np.concatenate(
                            (flux_pixel, flux_chain), axis=0)
                    pool_pixel = np.copy(pool_chain) if pixel_chains.index(
                        pixel_chain) == 0 else np.concatenate(
                            (pool_pixel, pool_chain), axis=0)

                    cbr_chain_list.append(cbr_chain)
                    print(np.shape(cbr_chain))
                    print(np.shape(cbr_pixel))

                gr = autil.gelman_rubin(cbr_chain_list)
                print('%i of %i parameters converged' %
                      (sum(gr < 1.2), len(parnames)))
                conv[comps.index(
                    comp)][pixels.index(pixel),
                           n_iters[comps.index(comp)].index(it)] = sum(
                               gr < 1.2) / len(parnames) * 100

                for var in vrs:
                    print(var)

                    try:
                        obs = cbf_pixel['OBS'][var]
                        obs[obs == -9999] = float('nan')
                    except:
                        obs = np.ones(cbf_pixel['nodays']) * np.nan
                    n_obs = np.sum(np.isfinite(obs))

                    fwd_data = autil.get_output(
                        var, model_id_start, flux_pixel, pool_pixel, cbr_pixel,
                        autil.get_nofluxes_nopools_lma(model_id_start)[2])

                    if len(fwd_data) > 0:
                        if fwd_data.shape[1] > cbf_pixel['nodays']:
                            fwd_data = fwd_data[:, :-1]

                        fwd_data = autil.remove_outliers(fwd_data)
                        med = np.nanmedian(fwd_data, axis=0)
                        ub = np.nanpercentile(fwd_data, 75, axis=0)
                        lb = np.nanpercentile(fwd_data, 25, axis=0)

                        ens_spread[comps.index(comp)][
                            pixels.index(pixel),
                            vrs.index(var),
                            n_iters[comps.index(comp)].index(it)] = np.nanmean(
                                abs(ub -
                                    lb)) if metric == 'spread' else np.sqrt(
                                        np.nansum((med - obs)**2) / n_obs)
                        print(ens_spread[comps.index(comp)]
                              [pixels.index(pixel),
                               vrs.index(var),
                               n_iters[comps.index(comp)].index(it)])

    print(ens_spread)
    for var in vrs:
        autil.plot_spread_v_iter(
            ens_spread,
            pixels,
            vrs.index(var),
            var,
            n_iters,
            metric,
            cur_dir + plot_dir + 'spread_v_iter',
            'iter_test' + assim_type + '_' + compare_between + '_' +
            model_id_start + '_' + var + '_' + metric,
            single_val=True
        )  #'iter_test_MCMC'+mcmc_id+'_'+model_id_start+'_'+var + '_' + metric)

    autil.plot_conv_v_iter(conv,
                           pixels,
                           n_iters,
                           cur_dir + plot_dir + 'spread_v_iter',
                           'iter_test' + assim_type + '_' + compare_between +
                           '_' + model_id_start + '_conv',
                           single_val=True)

    return
예제 #14
0
def main():
    
    # set run information to read
    model_id = sys.argv[1]
    run_type = sys.argv[2] # ALL OR SUBSET
    mcmc_id = sys.argv[3] # 119 for normal, 3 for DEMCMC
    n_iter = sys.argv[4]
    nbe_optimization = sys.argv[5] # OFF OR ON
    runtime_assim = int(sys.argv[6])
    ens_size = 500
    assim_type = '_p25adapted'
    
    # set directories
    cur_dir = os.getcwd() + '/'
    mdf_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_MDF/' if nbe_optimization=='OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_MDF/'
    runmodel_dir = '../code/CARDAMOM_2.1.6c/C/projects/CARDAMOM_GENERAL/' if nbe_optimization=='OFF' else '../code/CARDAMOM_Uma_2.1.6c-master/C/projects/CARDAMOM_GENERAL/'
    cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/' + model_id + '/'
    cbf_ic_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'/ic_test/' + model_id + '/'
    cbr_pft_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'_pft/' + model_id + '/'
    cbr_ic_dir = '../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'_pft/ic_test/' + model_id + '/'
    output_ic_dir = '../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'_pft/ic_test/' + model_id + '/'
    plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    
    # get model specific information
    parnames = autil.get_parnames('../../misc/', model_id)
    ic_inds = autil.get_inds_ic(model_id) # get indices of initial condition parameters
    
    if mcmc_id=='119':
        frac_save_out = str(int(int(n_iter)/500))
    elif mcmc_id=='3':
        frac_save_out = str(int(int(n_iter)/500*100)) # n_iterations/ frac_save_out * 100 will be ensemble size
    
    # select which pixels to submit
    os.chdir(cbf_dir)
    if run_type=='ALL':
        cbf_files = glob.glob('*.cbf')
    elif run_type=='SUBSET_INPUT':
        cbf_files = select_cbf_files(glob.glob('*.cbf'), ['3809','3524','2224','4170','1945','3813','4054','3264','1271','3457'])
    os.chdir(cur_dir + '/../')
    
    cbf_files.sort()
    
    ############################################################################################################################################
    
    # run through pixel cbfs
    for cbf_file in cbf_files:
        
        pixel = cbf_file[-8:-4]
        cbf_data = rwb.read_cbf_file(cbf_dir + cbf_file)
        
        # get list of pft cbrs for pixel
        cbr_files = glob.glob(cbr_pft_dir + '*' + pixel + '*.cbr')
        
        for cbr_file in cbr_files:
            
            cbr_data = rwb.read_cbr_file(cbr_file, {'nopars': len(parnames)})
            parpriors = np.concatenate((np.nanmedian(cbr_data, axis=0), np.ones(50-len(parnames))*-9999.))
            parpriorunc = np.concatenate((np.ones(len(parnames))*1.001, np.ones(50-len(parnames))*-9999.))
            
            parpriors[ic_inds[0]:ic_inds[1]] = -9999.
            parpriorunc[ic_inds[0]:ic_inds[1]] = -9999.
            
            cbf_data['PARPRIORS'] = parpriors.reshape(-1,1)
            cbf_data['PARPRIORUNC'] = parpriorunc.reshape(-1,1)
            
            #rwb.CARDAMOM_WRITE_BINARY_FILEFORMAT(cbf_data, cbf_ic_dir + cbr_file.partition(cbr_pft_dir)[-1].partition('cbr')[0]+'cbf')
            
    ############################################################################################################################################
    
    txt_filename = 'combined_assim_forward_list_' + model_id + '_' + run_type  + assim_type+ '_MCMC'+mcmc_id + '_'+n_iter + '_ic_test.txt'
    txt_file = open(txt_filename, 'w')
    
    for cbf_ic_file in glob.glob(cbf_ic_dir + '*.cbf'):
        f = cbf_ic_file.partition(cbf_ic_dir)[-1]
        txt_file.write('%sCARDAMOM_MDF.exe %s%s %s%s %s 0 %s 0.001 %s 1000' % (mdf_dir, cbf_ic_dir[3:], f, cbr_ic_dir, f[:-4] + '.cbr', n_iter, frac_save_out, mcmc_id))
        txt_file.write(' && %sCARDAMOM_RUN_MODEL.exe %s%s %s%s %s%s %s%s %s%s %s%s' % (runmodel_dir, cbf_ic_dir[3:], f, cbr_ic_dir, f[:-4] + '.cbr', 
            output_ic_dir, 'fluxfile_'+ f[:-4] +'.bin', output_ic_dir, 'poolfile_'+ f[:-4] +'.bin', 
            output_ic_dir, 'edcdfile_'+ f[:-4] +'.bin', output_ic_dir, 'probfile_'+ f[:-4] +'.bin'))
        txt_file.write('\n')
                
    txt_file.close()
    
    sh_file = open(txt_filename[:-3] + 'sh', 'w')
    autil.fill_in_sh(sh_file, array_size=len(glob.glob(cbf_ic_dir + '*.cbf')), n_hours=runtime_assim, txt_file=txt_filename, combined=True)
    
    return
예제 #15
0
def main():
    model_id = sys.argv[1]
    run_type = sys.argv[2] # ALL or SUBSET
    mcmc_id = sys.argv[3] # 119 for normal, 3 for DEMCMC
    n_iter = sys.argv[4]
    ens_size = 500
    assim_type = '_p25adapted'
    use_bestchains_pkl = False
    
    cur_dir = os.getcwd() + '/'
    cbf_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbf'+assim_type+'_ef_ic/' + model_id + '/'
    cbr_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/cbr'+assim_type+'_ef/' + model_id + '/'
    output_dir = '../../../../../../scratch/users/cfamigli/cardamom/files/output'+assim_type+'_ef/' + model_id + '/'
    plot_dir = '../../../../../../scratch/users/cfamigli/cardamom/plots/'
    parnames = autil.get_parnames('../../misc/', model_id)
    
    
    # load list of land pixels
    pixels = list(set([file[-8:-4] for file in glob.glob(cbf_dir + '*.cbf')])) if run_type=='ALL' else ['3809','3524','2224','4170','1945','3813','4054','3264','1271','3457']
    pixels.sort()
    
    # load list of cbrs
    cbr_files = glob.glob(cbr_dir+'*MCMC'+mcmc_id+'_'+n_iter+'_*PLS*forward*.cbr')

    # for loop over pixels    
    gr_pixels = np.zeros(len(pixels))*np.nan # list of GR for each pixel, for mapping
    par_pixels = np.zeros((len(pixels), len(parnames)))*np.nan
    for pixel in pixels:
        print(pixel, pixels.index(pixel))
        
        pixel_chains = autil.find_all_chains(cbr_files, pixel)
        pixel_chains.sort() # filenames
        
        if use_bestchains_pkl:
            conv_chains_pkl = read_pickle(glob.glob(cbr_dir + model_id + assim_type + '*_MCMC'+mcmc_id + '_'+n_iter+'_best_subset.pkl')[0])
            conv_chains_pkl.columns = ['pixel','bestchains','conv'] #rename columns for easier access
            
            if pixel in conv_chains_pkl['pixel'].values:
                bestchains = conv_chains_pkl.loc[conv_chains_pkl['pixel']==pixel]['bestchains'].values[0][1:]
                print(bestchains)
                pixel_chains = [pixel_chain for pixel_chain in pixel_chains if pixel_chain.partition(pixel+'_')[-1][:-4] in bestchains]
            
            else:
                continue

        #cbf_pixel = rwb.read_cbf_file(cur_dir + cbf_dir + pixel_chains[0].partition('_MCMC')[0]+'_'+pixel+'.cbf')
        cbf_filename = glob.glob(cur_dir + cbf_dir + '*'+pixel+'.cbf')[0]
        cbf_pixel = rwb.read_cbf_file(cbf_filename)
        
        cbr_chain_list = []
        for pixel_chain in pixel_chains:
            print(pixel_chain)
            cbr_chain = rwb.read_cbr_file(pixel_chain, {'nopars': len(parnames)})
            cbr_chain = autil.modulus_Bday_Fday(cbr_chain, parnames)
            cbr_pixel = np.copy(cbr_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((cbr_pixel, cbr_chain), axis=0)
            #autil.plot_par_histograms(cbr_chain, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model_id+'_'+pixel_chain[:-3]+'png')
            
            try:
                flux_chain = rwb.readbinarymat(cur_dir + output_dir + 'fluxfile_' + pixel_chain.partition(cbr_dir)[-1][:-3]+'bin', [cbf_pixel['nodays'], autil.get_nofluxes_nopools_lma(model_id)[0]])
                pool_chain = rwb.readbinarymat(cur_dir + output_dir + 'poolfile_' + pixel_chain.partition(cbr_dir)[-1][:-3]+'bin', [cbf_pixel['nodays']+1, autil.get_nofluxes_nopools_lma(model_id)[1]])
                #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_chain, flux_chain, pool_chain, autil.get_nofluxes_nopools_lma(model_id)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model_id+'_'+pixel_chain[:-3]+'png')
    
                flux_pixel = np.copy(flux_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((flux_pixel, flux_chain), axis=0)
                pool_pixel = np.copy(pool_chain) if pixel_chains.index(pixel_chain)==0 else np.concatenate((pool_pixel, pool_chain), axis=0)
                
            except Exception as e:
                pass
                
            if np.shape(cbr_chain)[0]==ens_size:
                cbr_chain_list.append(cbr_chain)
                #print(np.shape(cbr_chain))
            
        if len(cbr_chain_list)>1:
            gr = autil.gelman_rubin(cbr_chain_list)
            #print(gr)
            print('%i of %i parameters converged' % (sum(gr<1.2), len(parnames)))
            gr_pixels[pixels.index(pixel)] = sum(gr<1.2)/len(parnames)
        else:
            gr = np.nan

        par_pixels[pixels.index(pixel),:] = np.nanmedian(cbr_pixel, axis=0)
        #autil.plot_par_histograms(cbr_pixel, parnames=parnames, savepath=cur_dir+plot_dir+'dists/', title=model_id+assim_type+'_MCMC'+mcmc_id+'_'+cbf_filename.partition(cbf_dir)[-1][:-4]+'.png')    
        #autil.plot_flux_pool_timeseries(cbf_pixel, cbr_pixel, flux_pixel, pool_pixel, autil.get_nofluxes_nopools_lma(model_id)[2], savepath=cur_dir+plot_dir+'timeseries/', title=model_id+assim_type+'_MCMC'+mcmc_id+'_'+cbf_filename.partition(cbf_dir)[-1][:-4]+'.png')
        
    #vmax = [None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,None,210,200,215,6600,195,24000,None,None,None,900,None,None,None,None,None,None,None] #np.nanpercentile(par_pixels[:,par], 90)
    for par in range(len(parnames)): autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cur_dir + cbf_dir + '*.cbf')], pixel_value_list=pixels, value_list=par_pixels[:,par], vmax=np.nanpercentile(par_pixels[:,par], 90), savepath=cur_dir+plot_dir+'maps/', savename='par'+str(par)+'_' + model_id +assim_type+ '_MCMC' + mcmc_id +'_'+ n_iter+'_EF_clipped_PLS_soilgrids_poolobs_rescaled_forward')
    #autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cur_dir + cbf_dir + '*.cbf')], pixel_value_list=pixels, value_list=np.ones(len(pixels)), savepath=cur_dir+plot_dir+'maps/', title='test_pixels.png')
    #autil.plot_map(nrows=46, ncols=73, land_pixel_list=[file[-8:-4] for file in glob.glob(cur_dir + cbf_dir + '*.cbf')], pixel_value_list=pixels, value_list=gr_pixels*100, savepath=cur_dir+plot_dir+'maps/', savename='gr_' + model_id + assim_type+ '_' +run_type+ '_MCMC' + mcmc_id + '_' + n_iter)
        
    return