Esempio n. 1
0
def run(mod, time_s, time_e, rzsc_type='', pheix='', call='global_30min'):
    t1 = time.time()  # time it
    model = 'mod: ' + mod.lower()

    # time information
    time_ix = [time_s, time_e]

    year_start, year_end = toolkit.se_extract(time_ix[0], time_ix[1])
    ncfiles = toolkit.namestr_long(model, file_syb, forcing, variable,
                                   year_start, year_end)
    if mod.lower() == 'ways':
        path_simu = os.path.join(path_mod_simu,
                                 'simu_' + rzsc_type + '_' + pheix)
    else:
        path_simu = os.path.join(path_mod_simu, 'ISIMIP2a', mod)
    files = toolkit.fullfile(path_simu, ncfiles['mod'])
    ds = xr.open_mfdataset(files)
    years = np.arange(int(time_ix[0][:4]), int(time_ix[1][:4]) + 1, 1)
    # prepare for data
    for year in years:
        ds1 = ds.sel(time=slice(str(year) + '-01-01', str(year) + '-12-31'))
        ds1 = ds1.rename({variable: 'runoff'})
        if mod.lower() != 'ways':
            if ds1.runoff.attrs['units'].lower() == 'kg m-2 s-1':
                prcp_ratio = 24 * 60 * 60
                ds1.runoff.attrs['units'] = 'mm'
            else:
                prcp_ratio = 1
            ds1.runoff.values *= prcp_ratio
        ds1.to_netcdf(os.path.join(path_CaMa_inp,
                                   'runoff' + str(year) + '.nc'))

    t2 = time.time()  # time it
    print('Elapsed Time for data preparing:', (t2 - t1) / 3600, 'Hours')

    # run CaMa-Flood
    # os.chdir(os.path.join(path_CaMa_Flood, 'gosh'))
    subprocess.call(
        [os.path.join(path_CaMa_Flood, 'gosh', './' + call + '.sh')])

    t3 = time.time()  # time it
    print('Elapsed Time for Calculating:', (t3 - t2) / 3600, 'Hours')

    # move the data to right folder
    # os.chdir(os.path.join('../out', call))

    if mod.lower() == 'ways':
        path_store = os.path.join(
            path_des_, 'discharge_' + rzsc_type + '_' + pheix + '_' + call)
    else:
        path_store = os.path.join(path_des_, mod.lower())

    if not os.path.exists(path_store):
        os.makedirs(path_store)

    for f in glob.glob(os.path.join('./CaMa_Flood/out/', call, '*.nc')):
        shutil.move(f, path_store)

    t4 = time.time()  # time it
    print('Elapsed Time for Calculating:', (t4 - t3) / 3600, 'Hours')
Esempio n. 2
0
def run(time_s,
        time_e,
        module,
        rzsc_type,
        phenology='no',
        config_name='WAYS.txt',
        debug=[],
        coord=[]):

    # calibration: '1986-01-01' to '1995-12-31'
    # if file is filled, a sigle run will be performed
    from multiprocessing import Pool

    global time_ix, rzsc, par_replace, par_replace_deep, dat_pr, dat_tas, dat_pet, sinit, simax_cal, file_config, lon, lat

    # configuration file
    file_config = os.path.join('./configs', config_name)
    # update the time index if in cali mode (based on the warm_t)
    if module == 'cali':
        warm_t = obj_setups['warm_t']
        time_s = (pd.to_datetime(time_s) -
                  pd.DateOffset(months=warm_t)).strftime('%Y-%m-%d')

    # time information
    time_ix = [time_s, time_e]

    if phenology == 'yes':
        simax_cal = True
        phe_type = 'phe'
    elif phenology == 'no':
        simax_cal = False
        phe_type = 'nophe'

    if module == 'simu':
        # par
        file_par = os.path.join(path_cf,
                                'par_' + rzsc_type + '_' + phe_type + '.nc4')
        ds_par = xr.open_dataset(file_par)
        var_list = [var_name for var_name in ds_par.data_vars]
        par_replace = np.empty(
            [len(var_list),
             len(ds_par.lat.values),
             len(ds_par.lon.values)])
        for k in range(len(var_list)):
            par_replace[k, :, :] = ds_par[var_list[k]].values

        # par_deep
        if len(par_rix_deep) > 0:
            file_par_deep = os.path.join(
                path_cf, 'par_deep_' + rzsc_type + '_' + phe_type + '.nc4')
            ds_par_deep = xr.open_dataset(file_par_deep)
            var_list = [var_name for var_name in ds_par_deep.data_vars]
            par_replace_deep = np.empty([
                len(var_list),
                len(ds_par_deep.lat.values),
                len(ds_par_deep.lon.values)
            ])
            for k in range(len(var_list)):
                par_replace_deep[k, :, :] = ds_par_deep[var_list[k]].values

    # rzsc
    file_rzsc = os.path.join(path_cf, 'rzsc_xx.nc4').replace('xx', rzsc_type)
    ds_rzsc = xr.open_dataset(file_rzsc)
    rzsc = ds_rzsc.rzsc.values

    # extract domain coordinations
    lon = ds_rzsc.lon.values
    lat = ds_rzsc.lat.values

    # reading data (forcing)
    year_s, year_e = toolkit.se_extract(time_s, time_e, step=step)
    ncfiles = toolkit.namestr(var_id,
                              file_syb,
                              f_name,
                              year_s,
                              year_e,
                              step=step)
    ds_pr = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['pr']))
    ds_tas = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['tas']))
    ds_pet = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['pet']))
    ds_pet['time'] = ds_pr['time'].copy()

    ds_pr = ds_pr.sel(time=slice(time_s, time_e))
    ds_tas = ds_tas.sel(time=slice(time_s, time_e))
    ds_pet = ds_pet.sel(time=slice(time_s, time_e))

    # check the unit
    if ds_pr.pr.attrs['units'].lower() == 'kg m-2 s-1':
        prcp_ratio = 24 * 60 * 60
    else:
        prcp_ratio = 1

    if ds_tas.tas.attrs['units'].lower() == 'k':
        tas_ratio = -273.15
    else:
        tas_ratio = 0

    dat_pr = ds_pr.pr.values * prcp_ratio
    dat_tas = ds_tas.tas.values + tas_ratio
    dat_pet = ds_pet.pet.values

    # s_init
    path_sinit = path_sinit_ + '_' + rzsc_type + '_' + phe_type
    file_sinit = os.path.join(path_sinit, 'sinit_' + year_s + '.nc4')
    if os.path.isfile(file_sinit):
        ds = xr.open_dataset(file_sinit)
        sinit = ds[list(ds.data_vars)[0]].values
    else:
        sinit = np.empty([5, len(lat), len(lon)])

    if simax_cal:
        global dat_tmin, dat_lu, igs
        # tmin
        ds_tmin = xr.open_mfdataset(toolkit.fullfile(path_i,
                                                     ncfiles['tasmin']))
        ds_tmin = ds_tmin.sel(time=slice(time_s, time_e))
        if ds_tmin.tasmin.attrs['units'].lower() == 'k':
            tmin_ratio = -273.15
        else:
            tmin_ratio = 0
        dat_tmin = ds_tmin.tasmin.values + tmin_ratio

        # lu
        file_lu = os.path.join(path_cf, 'modis_lu2001.nc')
        ds_lu = xr.open_dataset(file_lu)
        dat_lu = ds_lu.lu.values

        # igs
        path_igss = path_igss_ + '_' + rzsc_type + '_' + phe_type
        file_igs = os.path.join(path_igss, 'igs_' + year_s + '.nc4')
        if os.path.isfile(file_igs):
            ds_igs = xr.open_dataset(file_igs)
            igs = ds_igs[list(ds_igs.data_vars)[0]].values
        else:
            igs = ''

    # calibration data (additional)
    if module == 'cali':
        global dat_obs
        ds_obs = xr.open_dataset(file_obs)
        ds_obs = ds_obs.sel(time=slice(time_s, time_e))
        dat_obs = ds_obs.qtot.values

    # time
    pdtime = pd.date_range(start=time_s, end=time_e, freq='d')
    n = len(pdtime)

    # methods
    M = {}
    M['cali'] = cali
    M['simu'] = simu

    # debug code
    if coord:
        return M[module](coord)

    if module == 'cali':
        # drop out pixels that are not in the domain based on rzsc
        ds = xr.open_dataset(file_rzsc)
        mask = ds[[x for x in ds.data_vars
                   ][0]].where(~np.isnan(ds[[x for x in ds.data_vars][0]]))
        coords = mask.to_dataframe().dropna(how='all').index.values.tolist()
        if debug:
            coords = coords[debug[0]:debug[1]]
    elif module == 'simu':
        # drop out pixels that are not in the domain based on fitted parameters
        ds = xr.open_dataset(file_par)
        mask = ds[[x for x in ds.data_vars
                   ][0]].where(~np.isnan(ds[[x for x in ds.data_vars][0]]))
        coords = mask.to_dataframe().dropna(how='all').index.values.tolist()
        if debug:
            coords = coords[debug[0]:debug[1]]

    # processing
    t1 = time.time()  # time it
    pool = Pool()
    results = pool.map(M[module], coords)
    pool.close()
    t2 = time.time()  # time it
    print('Elapsed Time for Calculation:', (t2 - t1) / 3600, 'Hours')

    # debug code
    if debug:
        return

    # control the data extract and write out
    if module == 'cali':
        # output file name
        fname_o = 'par_' + rzsc_type + '_' + phe_type + '.nc4'

        # number of parameters
        n = len(gof_setups['fit_ix'])

        # initialize the matrix
        R = np.empty((n, len(lat), len(lon)))
        R[:] = np.nan

        # extract the results from the mp-pool
        for element in results:
            R[:, element[0], element[1]] = element[2]

        # construct the output netcdf file
        P = {}
        for k in range(n):
            P['par' + str(gof_setups['fit_ix'][k])] = (['lat',
                                                        'lon'], R[k, :, :])

        # xarray dataset
        ds = xr.Dataset(P,
                        coords={
                            'lon': (['lon'], lon),
                            'lat': (['lat'], lat)
                        })

        # write out
        file_o = os.path.join(path_cf, fname_o)
        ds.to_netcdf(file_o, format='netCDF4', engine='netcdf4')

    elif module == 'simu':
        # initialize the matrix
        R = np.empty((5, len(lat), len(lon)))
        R[:] = np.nan
        G = np.empty((21, len(lat), len(lon)))
        G[:] = np.nan
        X0 = np.empty((n, len(lat), len(lon)))
        X0[:] = np.nan
        X1 = X0.copy()
        X2 = X0.copy()

        # extract the results from the mp-pool
        for element in results:
            R[:, element[0], element[1]] = element[2]
            if simax_cal:
                G[:, element[0], element[1]] = element[3]
            X0[:, element[0], element[1]] = element[4][0, :]
            X1[:, element[0], element[1]] = element[4][1, :]
            X2[:, element[0], element[1]] = element[4][2, :]

        # construct the output netcdf file
        ds_init = xr.Dataset({'s_init': (['time', 'lat', 'lon'], R)},
                             coords={
                                 'lon': (['lon'], lon),
                                 'lat': (['lat'], lat),
                                 'time': pd.date_range('1901-01-01', periods=5)
                             })
        ds_evap = xr.Dataset({'evap': (['time', 'lat', 'lon'], X0)},
                             coords={
                                 'lon': (['lon'], lon),
                                 'lat': (['lat'], lat),
                                 'time': pdtime
                             })
        ds_rzws = xr.Dataset({'rzws': (['time', 'lat', 'lon'], X1)},
                             coords={
                                 'lon': (['lon'], lon),
                                 'lat': (['lat'], lat),
                                 'time': pdtime
                             })
        ds_qtot = xr.Dataset({'qtot': (['time', 'lat', 'lon'], X2)},
                             coords={
                                 'lon': (['lon'], lon),
                                 'lat': (['lat'], lat),
                                 'time': pdtime
                             })
        if simax_cal:
            ds_igss = xr.Dataset({'igs': (['time', 'lat', 'lon'], G)},
                                 coords={
                                     'lon': (['lon'], lon),
                                     'lat': (['lat'], lat),
                                     'time':
                                     pd.date_range('1901-01-01', periods=21)
                                 })

        # generate the standard filename
        syb = 'ways_ffff_hist_nosoc_co2_vvvv_global_tttt_ssss_eeee.nc4'
        syb = syb.replace('ffff', config['f_name'])
        syb = syb.replace('tttt', 'daily')
        syb = syb.replace('ssss', year_s)
        syb = syb.replace('eeee', year_e)

        path_o = path_o_ + '_' + rzsc_type + '_' + phe_type
        if len(par_rix_deep) > 0:
            path_o = path_o + '_' + 'deep'
        file_o_evap = os.path.join(path_o, syb.replace('vvvv', 'evap'))
        file_o_rzws = os.path.join(path_o, syb.replace('vvvv', 'rzws'))
        file_o_qtot = os.path.join(path_o, syb.replace('vvvv', 'qtot'))

        fname_o_init = 's_init_' + str(int(year_e) + 1) + '.nc4'
        file_o_init = os.path.join(path_sinit, fname_o_init)

        if simax_cal:
            fname_o_igss = 'igs_' + str(int(year_e) + 1) + '.nc4'
            file_o_igss = os.path.join(path_igss, fname_o_igss)

        # path
        if not os.path.exists(path_sinit):
            os.makedirs(path_sinit)
        if not os.path.exists(path_o):
            os.makedirs(path_o)
        if simax_cal:
            if not os.path.exists(path_igss):
                os.makedirs(path_igss)

        # saving
        ds_evap.to_netcdf(file_o_evap,
                          format='netCDF4',
                          engine='netcdf4',
                          encoding={'evap': {
                              'zlib': True,
                              'complevel': 5
                          }})
        ds_rzws.to_netcdf(file_o_rzws,
                          format='netCDF4',
                          engine='netcdf4',
                          encoding={'rzws': {
                              'zlib': True,
                              'complevel': 5
                          }})
        ds_qtot.to_netcdf(file_o_qtot,
                          format='netCDF4',
                          engine='netcdf4',
                          encoding={'qtot': {
                              'zlib': True,
                              'complevel': 5
                          }})
        ds_init.to_netcdf(file_o_init,
                          format='netCDF4',
                          engine='netcdf4',
                          encoding={'s_init': {
                              'zlib': True,
                              'complevel': 5
                          }})
        if simax_cal:
            ds_igss.to_netcdf(file_o_igss,
                              format='netCDF4',
                              engine='netcdf4',
                              encoding={'igs': {
                                  'zlib': True,
                                  'complevel': 5
                              }})

    t3 = time.time()  # time it
    print('Elapsed Time for Saving:', (t3 - t2) / 3600, 'Hours')

    print('Job Done!')
Esempio n. 3
0
def run(time_s,
        time_e,
        rzsc_type,
        phenology='no',
        mctimes='100',
        par_ch='01',
        config_name='WAYS.txt',
        debug=[],
        coord=[]):

    from multiprocessing import Pool

    global time_ix, rzsc, par_replace, dat_pr, dat_tas, dat_pet, sinit, simax_cal, file_config, n_mc, num_keep

    if len(mctimes) > 0:
        n_mc = int(mctimes)

    if len(par_ch) > 0:
        num_ch = []
        for i_ch in par_ch:
            num_ch.append(int(i_ch))
        num_full = set([0, 1])
        num_keep = list(num_full - set(num_ch))

    # configuration file
    file_config = os.path.join('./configs', config_name)

    # time information
    time_ix = [time_s, time_e]

    if phenology == 'yes':
        simax_cal = True
        phe_type = 'phe'
    elif phenology == 'no':
        simax_cal = False
        phe_type = 'nophe'

    # par
    file_par = os.path.join(path_cf,
                            'par_' + rzsc_type + '_' + phe_type + '.nc4')
    ds_par = xr.open_dataset(file_par)
    var_list = [var_name for var_name in ds_par.data_vars]
    par_replace = np.empty(
        [len(var_list),
         len(ds_par.lat.values),
         len(ds_par.lon.values)])
    for k in range(len(var_list)):
        par_replace[k, :, :] = ds_par[var_list[k]].values

    # rzsc
    file_rzsc = os.path.join(path_cf, 'rzsc_xx.nc4').replace('xx', rzsc_type)
    ds_rzsc = xr.open_dataset(file_rzsc)
    rzsc = ds_rzsc.rzsc.values

    # reading data (forcing)
    year_s, year_e = toolkit.se_extract(time_s, time_e)
    ncfiles = toolkit.namestr(var_id, file_syb, f_name, year_s, year_e)
    ds_pr = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['pr']))
    ds_tas = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['tas']))
    ds_pet = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['pet']))
    ds_pet['time'] = ds_pr['time'].copy()

    ds_pr = ds_pr.sel(time=slice(time_s, time_e))
    ds_tas = ds_tas.sel(time=slice(time_s, time_e))
    ds_pet = ds_pet.sel(time=slice(time_s, time_e))

    # check the unit
    if ds_pr.pr.attrs['units'].lower() == 'kg m-2 s-1':
        prcp_ratio = 24 * 60 * 60
    else:
        prcp_ratio = 1

    if ds_tas.tas.attrs['units'].lower() == 'k':
        tas_ratio = -273.15
    else:
        tas_ratio = 0

    dat_pr = ds_pr.pr.values * prcp_ratio
    dat_tas = ds_tas.tas.values + tas_ratio
    dat_pet = ds_pet.pet.values

    # s_init
    path_sinit = path_sinit_ + '_' + rzsc_type + '_' + phe_type
    file_sinit = os.path.join(path_sinit, 'sinit_' + year_s + '.nc4')
    if os.path.isfile(file_sinit):
        ds = xr.open_dataset(file_sinit)
        sinit = ds[list(ds.data_vars)[0]].values
    else:
        sinit = np.empty([5, len(lat), len(lon)])

    if simax_cal:
        global dat_tmin, dat_lu, igs
        # tmin
        ds_tmin = xr.open_mfdataset(toolkit.fullfile(path_i,
                                                     ncfiles['tasmin']))
        ds_tmin = ds_tmin.sel(time=slice(time_s, time_e))
        if ds_tmin.tasmin.attrs['units'].lower() == 'k':
            tmin_ratio = -273.15
        else:
            tmin_ratio = 0
        dat_tmin = ds_tmin.tasmin.values + tmin_ratio

        # lu
        file_lu = os.path.join(path_cf, 'modis_lu2001.nc')
        ds_lu = xr.open_dataset(file_lu)
        dat_lu = ds_lu.lu.values

        # igs
        path_igss = path_igss_ + '_' + rzsc_type + '_' + phe_type
        file_igs = os.path.join(path_igss, 'igs_' + year_s + '.nc4')
        if os.path.isfile(file_igs):
            ds_igs = xr.open_dataset(file_igs)
            igs = ds_igs[list(ds_igs.data_vars)[0]].values
        else:
            igs = ''

    # debug code
    if coord:
        return simu(coord)

    # drop out pixels that are not in the domain based on fitted parameters
    ds = xr.open_dataset(file_par)
    mask = ds[[x for x in ds.data_vars
               ][0]].where(~np.isnan(ds[[x for x in ds.data_vars][0]]))
    coords = mask.to_dataframe().dropna(how='all').index.values.tolist()
    if debug:
        coords = coords[debug[0]:debug[1]]

    # processing
    t1 = time.time()  # time it
    pool = Pool()
    results = pool.map(simu, coords)
    pool.close()
    t2 = time.time()  # time it
    print('Elapsed Time for Calculation:', (t2 - t1) / 3600, 'Hours')

    # debug code
    if debug:
        return

    # initialize the matrix
    R = np.empty((5, 360, 720))
    R[:] = np.nan
    G = np.empty((21, 360, 720))
    G[:] = np.nan
    X0 = np.empty((360, 720))
    X0[:] = np.nan
    X1 = X0.copy()
    X2 = X0.copy()

    # extract the results from the mp-pool
    for element in results:
        R[:, element[0], element[1]] = element[2]
        if simax_cal:
            G[:, element[0], element[1]] = element[3]
        X0[element[0], element[1]] = element[4]
        X1[element[0], element[1]] = element[5]
        X2[element[0], element[1]] = element[6]

    # construct the output netcdf file
    ds_init = xr.Dataset({'s_init': (['time', 'lat', 'lon'], R)},
                         coords={
                             'lon': (['lon'], lon),
                             'lat': (['lat'], lat),
                             'time': pd.date_range('1901-01-01', periods=5)
                         })
    ds_evap = xr.Dataset({'evap': (['lat', 'lon'], X0)},
                         coords={
                             'lon': (['lon'], lon),
                             'lat': (['lat'], lat)
                         })
    ds_rzws = xr.Dataset({'rzws': (['lat', 'lon'], X1)},
                         coords={
                             'lon': (['lon'], lon),
                             'lat': (['lat'], lat)
                         })
    ds_qtot = xr.Dataset({'qtot': (['lat', 'lon'], X2)},
                         coords={
                             'lon': (['lon'], lon),
                             'lat': (['lat'], lat)
                         })
    if simax_cal:
        ds_igss = xr.Dataset({'igs': (['time', 'lat', 'lon'], G)},
                             coords={
                                 'lon': (['lon'], lon),
                                 'lat': (['lat'], lat),
                                 'time': pd.date_range('1901-01-01',
                                                       periods=21)
                             })

    # generate the standard filename
    syb = 'ways_ffff_hist_nosoc_co2_vvvv_global_tttt_ssss_eeee.nc4'
    syb = syb.replace('ffff', config['f_name'])
    syb = syb.replace('tttt', 'daily')
    syb = syb.replace('ssss', year_s)
    syb = syb.replace('eeee', year_e)

    path_o = path_o_ + '_' + rzsc_type + '_' + phe_type + '_mc_' + par_ch + '_' + mctimes
    file_o_evap = os.path.join(path_o, syb.replace('vvvv', 'evap'))
    file_o_rzws = os.path.join(path_o, syb.replace('vvvv', 'rzws'))
    file_o_qtot = os.path.join(path_o, syb.replace('vvvv', 'qtot'))

    fname_o_init = 's_init_' + str(int(year_e) + 1) + '.nc4'
    file_o_init = os.path.join(path_sinit, fname_o_init)

    if simax_cal:
        fname_o_igss = 'igs_' + str(int(year_e) + 1) + '.nc4'
        file_o_igss = os.path.join(path_igss, fname_o_igss)

    # path
    if not os.path.exists(path_sinit):
        os.makedirs(path_sinit)
    if not os.path.exists(path_o):
        os.makedirs(path_o)
    if simax_cal:
        if not os.path.exists(path_igss):
            os.makedirs(path_igss)

    # saving
    ds_evap.to_netcdf(file_o_evap,
                      format='netCDF4',
                      engine='netcdf4',
                      encoding={'evap': {
                          'zlib': True,
                          'complevel': 5
                      }})
    ds_rzws.to_netcdf(file_o_rzws,
                      format='netCDF4',
                      engine='netcdf4',
                      encoding={'rzws': {
                          'zlib': True,
                          'complevel': 5
                      }})
    ds_qtot.to_netcdf(file_o_qtot,
                      format='netCDF4',
                      engine='netcdf4',
                      encoding={'qtot': {
                          'zlib': True,
                          'complevel': 5
                      }})
    ds_init.to_netcdf(file_o_init,
                      format='netCDF4',
                      engine='netcdf4',
                      encoding={'s_init': {
                          'zlib': True,
                          'complevel': 5
                      }})
    if simax_cal:
        ds_igss.to_netcdf(file_o_igss,
                          format='netCDF4',
                          engine='netcdf4',
                          encoding={'igs': {
                              'zlib': True,
                              'complevel': 5
                          }})

    t3 = time.time()  # time it
    print('Elapsed Time for Saving:', (t3 - t2) / 3600, 'Hours')

    print('Job Done!')
Esempio n. 4
0
def run(time_s, time_e, rzsc_type, pheix, scale='M'):
    from multiprocessing import Pool

    global montly_do, var, site, flux, time_ix

    if scale == 'M':
        montly_do = True

    path_evap = os.path.join(path_simu, 'simu_' + rzsc_type + '_' + pheix)

    # time information
    time_ix = [time_s, time_e]

    # prepare the file name for reading
    year_start, year_end = toolkit.se_extract(time_ix[0], time_ix[1])
    ncfiles = toolkit.namestr_long(model, file_syb, forcing, variable,
                                   year_start, year_end)

    files = toolkit.fullfile(path_evap, ncfiles['ways'])

    # FLUXNET
    # site information
    files_fluxnet = glob.glob(os.path.join(path_fluxnet, '*.csv'))
    site = pd.read_csv(file_fluxnetsite)
    site = site.sort_values('SITE_ID')[[
        'SITE_ID', 'LOCATION_LAT', 'LOCATION_LONG'
    ]]
    site = site.set_index('SITE_ID')

    # extract flux values & change the unit to mm/day
    flux = dict()
    for file in files_fluxnet:
        key = os.path.basename(file).rsplit('_')[1]
        value = toolkit.read_fluxnet(file, time_ix)
        flux[key] = value

    # evap simulation
    ds = xr.open_mfdataset(files)
    ds = ds.sel(time=slice(time_ix[0], time_ix[1]))
    var = ds.evap.values
    # caution: assume start from the first of the year and end in the last day
    anave_evap = np.sum(var, axis=0) / (int(time_e[:4]) - int(time_s[:4]) + 1)

    # sites
    sites = site.index.tolist()
    # processing
    t1 = time.time()  # time it
    pool = Pool()
    results = pool.map(fluxcombine, sites)
    pool.close()
    t2 = time.time()  # time it
    print('Elapsed Time for Calculation:', (t2 - t1) / 3600, 'Hours')

    # data in pandas dataframe (FLUXNET & WAYS)
    data = dict()
    for element in results:
        data[element[0]] = element[1]

    # statistics
    ops = site.copy()
    header = [
        'N', 'SLOP', 'INTERCEPT', 'MEAN_OBS', 'MEAN_SIM', 'R', 'P', 'STD_ERR',
        'RMSE', 'NRMSE1'
    ]
    ops = ops.reindex(columns=ops.columns.tolist() + header)
    for site_id in site.index:
        try:
            fv = data[site_id]
        except KeyError:
            print(site_id + ': no observation is found!')
        fv = fv.dropna()
        N = len(fv)
        if fv.empty:
            slope, intercept, r_value, p_value, std_err, rmse, nrmse = [
                np.NaN
            ] * 7
        else:
            o = fv['LE_CORR'].tolist()
            s = fv['WAYS'].tolist()
            slope, intercept, r_value, p_value, std_err = errlib.linergress(
                s, o)
            m_o = np.nanmean(o)
            m_s = np.nanmean(s)
            rmse, nrmse = errlib.rmse(s, o)
        ops.loc[site_id] = [
            ops.loc[site_id].LOCATION_LAT, ops.loc[site_id].LOCATION_LONG
        ] + [
            N, slope, intercept, m_o, m_s, r_value, p_value, std_err, rmse,
            nrmse
        ]

    # return values
    return anave_evap, data, ops