def run(mod, time_s, time_e, rzsc_type='', pheix='', call='global_30min'): t1 = time.time() # time it model = 'mod: ' + mod.lower() # time information time_ix = [time_s, time_e] year_start, year_end = toolkit.se_extract(time_ix[0], time_ix[1]) ncfiles = toolkit.namestr_long(model, file_syb, forcing, variable, year_start, year_end) if mod.lower() == 'ways': path_simu = os.path.join(path_mod_simu, 'simu_' + rzsc_type + '_' + pheix) else: path_simu = os.path.join(path_mod_simu, 'ISIMIP2a', mod) files = toolkit.fullfile(path_simu, ncfiles['mod']) ds = xr.open_mfdataset(files) years = np.arange(int(time_ix[0][:4]), int(time_ix[1][:4]) + 1, 1) # prepare for data for year in years: ds1 = ds.sel(time=slice(str(year) + '-01-01', str(year) + '-12-31')) ds1 = ds1.rename({variable: 'runoff'}) if mod.lower() != 'ways': if ds1.runoff.attrs['units'].lower() == 'kg m-2 s-1': prcp_ratio = 24 * 60 * 60 ds1.runoff.attrs['units'] = 'mm' else: prcp_ratio = 1 ds1.runoff.values *= prcp_ratio ds1.to_netcdf(os.path.join(path_CaMa_inp, 'runoff' + str(year) + '.nc')) t2 = time.time() # time it print('Elapsed Time for data preparing:', (t2 - t1) / 3600, 'Hours') # run CaMa-Flood # os.chdir(os.path.join(path_CaMa_Flood, 'gosh')) subprocess.call( [os.path.join(path_CaMa_Flood, 'gosh', './' + call + '.sh')]) t3 = time.time() # time it print('Elapsed Time for Calculating:', (t3 - t2) / 3600, 'Hours') # move the data to right folder # os.chdir(os.path.join('../out', call)) if mod.lower() == 'ways': path_store = os.path.join( path_des_, 'discharge_' + rzsc_type + '_' + pheix + '_' + call) else: path_store = os.path.join(path_des_, mod.lower()) if not os.path.exists(path_store): os.makedirs(path_store) for f in glob.glob(os.path.join('./CaMa_Flood/out/', call, '*.nc')): shutil.move(f, path_store) t4 = time.time() # time it print('Elapsed Time for Calculating:', (t4 - t3) / 3600, 'Hours')
def run(time_s, time_e, module, rzsc_type, phenology='no', config_name='WAYS.txt', debug=[], coord=[]): # calibration: '1986-01-01' to '1995-12-31' # if file is filled, a sigle run will be performed from multiprocessing import Pool global time_ix, rzsc, par_replace, par_replace_deep, dat_pr, dat_tas, dat_pet, sinit, simax_cal, file_config, lon, lat # configuration file file_config = os.path.join('./configs', config_name) # update the time index if in cali mode (based on the warm_t) if module == 'cali': warm_t = obj_setups['warm_t'] time_s = (pd.to_datetime(time_s) - pd.DateOffset(months=warm_t)).strftime('%Y-%m-%d') # time information time_ix = [time_s, time_e] if phenology == 'yes': simax_cal = True phe_type = 'phe' elif phenology == 'no': simax_cal = False phe_type = 'nophe' if module == 'simu': # par file_par = os.path.join(path_cf, 'par_' + rzsc_type + '_' + phe_type + '.nc4') ds_par = xr.open_dataset(file_par) var_list = [var_name for var_name in ds_par.data_vars] par_replace = np.empty( [len(var_list), len(ds_par.lat.values), len(ds_par.lon.values)]) for k in range(len(var_list)): par_replace[k, :, :] = ds_par[var_list[k]].values # par_deep if len(par_rix_deep) > 0: file_par_deep = os.path.join( path_cf, 'par_deep_' + rzsc_type + '_' + phe_type + '.nc4') ds_par_deep = xr.open_dataset(file_par_deep) var_list = [var_name for var_name in ds_par_deep.data_vars] par_replace_deep = np.empty([ len(var_list), len(ds_par_deep.lat.values), len(ds_par_deep.lon.values) ]) for k in range(len(var_list)): par_replace_deep[k, :, :] = ds_par_deep[var_list[k]].values # rzsc file_rzsc = os.path.join(path_cf, 'rzsc_xx.nc4').replace('xx', rzsc_type) ds_rzsc = xr.open_dataset(file_rzsc) rzsc = ds_rzsc.rzsc.values # extract domain coordinations lon = ds_rzsc.lon.values lat = ds_rzsc.lat.values # reading data (forcing) year_s, year_e = toolkit.se_extract(time_s, time_e, step=step) ncfiles = toolkit.namestr(var_id, file_syb, f_name, year_s, year_e, step=step) ds_pr = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['pr'])) ds_tas = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['tas'])) ds_pet = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['pet'])) ds_pet['time'] = ds_pr['time'].copy() ds_pr = ds_pr.sel(time=slice(time_s, time_e)) ds_tas = ds_tas.sel(time=slice(time_s, time_e)) ds_pet = ds_pet.sel(time=slice(time_s, time_e)) # check the unit if ds_pr.pr.attrs['units'].lower() == 'kg m-2 s-1': prcp_ratio = 24 * 60 * 60 else: prcp_ratio = 1 if ds_tas.tas.attrs['units'].lower() == 'k': tas_ratio = -273.15 else: tas_ratio = 0 dat_pr = ds_pr.pr.values * prcp_ratio dat_tas = ds_tas.tas.values + tas_ratio dat_pet = ds_pet.pet.values # s_init path_sinit = path_sinit_ + '_' + rzsc_type + '_' + phe_type file_sinit = os.path.join(path_sinit, 'sinit_' + year_s + '.nc4') if os.path.isfile(file_sinit): ds = xr.open_dataset(file_sinit) sinit = ds[list(ds.data_vars)[0]].values else: sinit = np.empty([5, len(lat), len(lon)]) if simax_cal: global dat_tmin, dat_lu, igs # tmin ds_tmin = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['tasmin'])) ds_tmin = ds_tmin.sel(time=slice(time_s, time_e)) if ds_tmin.tasmin.attrs['units'].lower() == 'k': tmin_ratio = -273.15 else: tmin_ratio = 0 dat_tmin = ds_tmin.tasmin.values + tmin_ratio # lu file_lu = os.path.join(path_cf, 'modis_lu2001.nc') ds_lu = xr.open_dataset(file_lu) dat_lu = ds_lu.lu.values # igs path_igss = path_igss_ + '_' + rzsc_type + '_' + phe_type file_igs = os.path.join(path_igss, 'igs_' + year_s + '.nc4') if os.path.isfile(file_igs): ds_igs = xr.open_dataset(file_igs) igs = ds_igs[list(ds_igs.data_vars)[0]].values else: igs = '' # calibration data (additional) if module == 'cali': global dat_obs ds_obs = xr.open_dataset(file_obs) ds_obs = ds_obs.sel(time=slice(time_s, time_e)) dat_obs = ds_obs.qtot.values # time pdtime = pd.date_range(start=time_s, end=time_e, freq='d') n = len(pdtime) # methods M = {} M['cali'] = cali M['simu'] = simu # debug code if coord: return M[module](coord) if module == 'cali': # drop out pixels that are not in the domain based on rzsc ds = xr.open_dataset(file_rzsc) mask = ds[[x for x in ds.data_vars ][0]].where(~np.isnan(ds[[x for x in ds.data_vars][0]])) coords = mask.to_dataframe().dropna(how='all').index.values.tolist() if debug: coords = coords[debug[0]:debug[1]] elif module == 'simu': # drop out pixels that are not in the domain based on fitted parameters ds = xr.open_dataset(file_par) mask = ds[[x for x in ds.data_vars ][0]].where(~np.isnan(ds[[x for x in ds.data_vars][0]])) coords = mask.to_dataframe().dropna(how='all').index.values.tolist() if debug: coords = coords[debug[0]:debug[1]] # processing t1 = time.time() # time it pool = Pool() results = pool.map(M[module], coords) pool.close() t2 = time.time() # time it print('Elapsed Time for Calculation:', (t2 - t1) / 3600, 'Hours') # debug code if debug: return # control the data extract and write out if module == 'cali': # output file name fname_o = 'par_' + rzsc_type + '_' + phe_type + '.nc4' # number of parameters n = len(gof_setups['fit_ix']) # initialize the matrix R = np.empty((n, len(lat), len(lon))) R[:] = np.nan # extract the results from the mp-pool for element in results: R[:, element[0], element[1]] = element[2] # construct the output netcdf file P = {} for k in range(n): P['par' + str(gof_setups['fit_ix'][k])] = (['lat', 'lon'], R[k, :, :]) # xarray dataset ds = xr.Dataset(P, coords={ 'lon': (['lon'], lon), 'lat': (['lat'], lat) }) # write out file_o = os.path.join(path_cf, fname_o) ds.to_netcdf(file_o, format='netCDF4', engine='netcdf4') elif module == 'simu': # initialize the matrix R = np.empty((5, len(lat), len(lon))) R[:] = np.nan G = np.empty((21, len(lat), len(lon))) G[:] = np.nan X0 = np.empty((n, len(lat), len(lon))) X0[:] = np.nan X1 = X0.copy() X2 = X0.copy() # extract the results from the mp-pool for element in results: R[:, element[0], element[1]] = element[2] if simax_cal: G[:, element[0], element[1]] = element[3] X0[:, element[0], element[1]] = element[4][0, :] X1[:, element[0], element[1]] = element[4][1, :] X2[:, element[0], element[1]] = element[4][2, :] # construct the output netcdf file ds_init = xr.Dataset({'s_init': (['time', 'lat', 'lon'], R)}, coords={ 'lon': (['lon'], lon), 'lat': (['lat'], lat), 'time': pd.date_range('1901-01-01', periods=5) }) ds_evap = xr.Dataset({'evap': (['time', 'lat', 'lon'], X0)}, coords={ 'lon': (['lon'], lon), 'lat': (['lat'], lat), 'time': pdtime }) ds_rzws = xr.Dataset({'rzws': (['time', 'lat', 'lon'], X1)}, coords={ 'lon': (['lon'], lon), 'lat': (['lat'], lat), 'time': pdtime }) ds_qtot = xr.Dataset({'qtot': (['time', 'lat', 'lon'], X2)}, coords={ 'lon': (['lon'], lon), 'lat': (['lat'], lat), 'time': pdtime }) if simax_cal: ds_igss = xr.Dataset({'igs': (['time', 'lat', 'lon'], G)}, coords={ 'lon': (['lon'], lon), 'lat': (['lat'], lat), 'time': pd.date_range('1901-01-01', periods=21) }) # generate the standard filename syb = 'ways_ffff_hist_nosoc_co2_vvvv_global_tttt_ssss_eeee.nc4' syb = syb.replace('ffff', config['f_name']) syb = syb.replace('tttt', 'daily') syb = syb.replace('ssss', year_s) syb = syb.replace('eeee', year_e) path_o = path_o_ + '_' + rzsc_type + '_' + phe_type if len(par_rix_deep) > 0: path_o = path_o + '_' + 'deep' file_o_evap = os.path.join(path_o, syb.replace('vvvv', 'evap')) file_o_rzws = os.path.join(path_o, syb.replace('vvvv', 'rzws')) file_o_qtot = os.path.join(path_o, syb.replace('vvvv', 'qtot')) fname_o_init = 's_init_' + str(int(year_e) + 1) + '.nc4' file_o_init = os.path.join(path_sinit, fname_o_init) if simax_cal: fname_o_igss = 'igs_' + str(int(year_e) + 1) + '.nc4' file_o_igss = os.path.join(path_igss, fname_o_igss) # path if not os.path.exists(path_sinit): os.makedirs(path_sinit) if not os.path.exists(path_o): os.makedirs(path_o) if simax_cal: if not os.path.exists(path_igss): os.makedirs(path_igss) # saving ds_evap.to_netcdf(file_o_evap, format='netCDF4', engine='netcdf4', encoding={'evap': { 'zlib': True, 'complevel': 5 }}) ds_rzws.to_netcdf(file_o_rzws, format='netCDF4', engine='netcdf4', encoding={'rzws': { 'zlib': True, 'complevel': 5 }}) ds_qtot.to_netcdf(file_o_qtot, format='netCDF4', engine='netcdf4', encoding={'qtot': { 'zlib': True, 'complevel': 5 }}) ds_init.to_netcdf(file_o_init, format='netCDF4', engine='netcdf4', encoding={'s_init': { 'zlib': True, 'complevel': 5 }}) if simax_cal: ds_igss.to_netcdf(file_o_igss, format='netCDF4', engine='netcdf4', encoding={'igs': { 'zlib': True, 'complevel': 5 }}) t3 = time.time() # time it print('Elapsed Time for Saving:', (t3 - t2) / 3600, 'Hours') print('Job Done!')
def run(time_s, time_e, rzsc_type, phenology='no', mctimes='100', par_ch='01', config_name='WAYS.txt', debug=[], coord=[]): from multiprocessing import Pool global time_ix, rzsc, par_replace, dat_pr, dat_tas, dat_pet, sinit, simax_cal, file_config, n_mc, num_keep if len(mctimes) > 0: n_mc = int(mctimes) if len(par_ch) > 0: num_ch = [] for i_ch in par_ch: num_ch.append(int(i_ch)) num_full = set([0, 1]) num_keep = list(num_full - set(num_ch)) # configuration file file_config = os.path.join('./configs', config_name) # time information time_ix = [time_s, time_e] if phenology == 'yes': simax_cal = True phe_type = 'phe' elif phenology == 'no': simax_cal = False phe_type = 'nophe' # par file_par = os.path.join(path_cf, 'par_' + rzsc_type + '_' + phe_type + '.nc4') ds_par = xr.open_dataset(file_par) var_list = [var_name for var_name in ds_par.data_vars] par_replace = np.empty( [len(var_list), len(ds_par.lat.values), len(ds_par.lon.values)]) for k in range(len(var_list)): par_replace[k, :, :] = ds_par[var_list[k]].values # rzsc file_rzsc = os.path.join(path_cf, 'rzsc_xx.nc4').replace('xx', rzsc_type) ds_rzsc = xr.open_dataset(file_rzsc) rzsc = ds_rzsc.rzsc.values # reading data (forcing) year_s, year_e = toolkit.se_extract(time_s, time_e) ncfiles = toolkit.namestr(var_id, file_syb, f_name, year_s, year_e) ds_pr = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['pr'])) ds_tas = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['tas'])) ds_pet = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['pet'])) ds_pet['time'] = ds_pr['time'].copy() ds_pr = ds_pr.sel(time=slice(time_s, time_e)) ds_tas = ds_tas.sel(time=slice(time_s, time_e)) ds_pet = ds_pet.sel(time=slice(time_s, time_e)) # check the unit if ds_pr.pr.attrs['units'].lower() == 'kg m-2 s-1': prcp_ratio = 24 * 60 * 60 else: prcp_ratio = 1 if ds_tas.tas.attrs['units'].lower() == 'k': tas_ratio = -273.15 else: tas_ratio = 0 dat_pr = ds_pr.pr.values * prcp_ratio dat_tas = ds_tas.tas.values + tas_ratio dat_pet = ds_pet.pet.values # s_init path_sinit = path_sinit_ + '_' + rzsc_type + '_' + phe_type file_sinit = os.path.join(path_sinit, 'sinit_' + year_s + '.nc4') if os.path.isfile(file_sinit): ds = xr.open_dataset(file_sinit) sinit = ds[list(ds.data_vars)[0]].values else: sinit = np.empty([5, len(lat), len(lon)]) if simax_cal: global dat_tmin, dat_lu, igs # tmin ds_tmin = xr.open_mfdataset(toolkit.fullfile(path_i, ncfiles['tasmin'])) ds_tmin = ds_tmin.sel(time=slice(time_s, time_e)) if ds_tmin.tasmin.attrs['units'].lower() == 'k': tmin_ratio = -273.15 else: tmin_ratio = 0 dat_tmin = ds_tmin.tasmin.values + tmin_ratio # lu file_lu = os.path.join(path_cf, 'modis_lu2001.nc') ds_lu = xr.open_dataset(file_lu) dat_lu = ds_lu.lu.values # igs path_igss = path_igss_ + '_' + rzsc_type + '_' + phe_type file_igs = os.path.join(path_igss, 'igs_' + year_s + '.nc4') if os.path.isfile(file_igs): ds_igs = xr.open_dataset(file_igs) igs = ds_igs[list(ds_igs.data_vars)[0]].values else: igs = '' # debug code if coord: return simu(coord) # drop out pixels that are not in the domain based on fitted parameters ds = xr.open_dataset(file_par) mask = ds[[x for x in ds.data_vars ][0]].where(~np.isnan(ds[[x for x in ds.data_vars][0]])) coords = mask.to_dataframe().dropna(how='all').index.values.tolist() if debug: coords = coords[debug[0]:debug[1]] # processing t1 = time.time() # time it pool = Pool() results = pool.map(simu, coords) pool.close() t2 = time.time() # time it print('Elapsed Time for Calculation:', (t2 - t1) / 3600, 'Hours') # debug code if debug: return # initialize the matrix R = np.empty((5, 360, 720)) R[:] = np.nan G = np.empty((21, 360, 720)) G[:] = np.nan X0 = np.empty((360, 720)) X0[:] = np.nan X1 = X0.copy() X2 = X0.copy() # extract the results from the mp-pool for element in results: R[:, element[0], element[1]] = element[2] if simax_cal: G[:, element[0], element[1]] = element[3] X0[element[0], element[1]] = element[4] X1[element[0], element[1]] = element[5] X2[element[0], element[1]] = element[6] # construct the output netcdf file ds_init = xr.Dataset({'s_init': (['time', 'lat', 'lon'], R)}, coords={ 'lon': (['lon'], lon), 'lat': (['lat'], lat), 'time': pd.date_range('1901-01-01', periods=5) }) ds_evap = xr.Dataset({'evap': (['lat', 'lon'], X0)}, coords={ 'lon': (['lon'], lon), 'lat': (['lat'], lat) }) ds_rzws = xr.Dataset({'rzws': (['lat', 'lon'], X1)}, coords={ 'lon': (['lon'], lon), 'lat': (['lat'], lat) }) ds_qtot = xr.Dataset({'qtot': (['lat', 'lon'], X2)}, coords={ 'lon': (['lon'], lon), 'lat': (['lat'], lat) }) if simax_cal: ds_igss = xr.Dataset({'igs': (['time', 'lat', 'lon'], G)}, coords={ 'lon': (['lon'], lon), 'lat': (['lat'], lat), 'time': pd.date_range('1901-01-01', periods=21) }) # generate the standard filename syb = 'ways_ffff_hist_nosoc_co2_vvvv_global_tttt_ssss_eeee.nc4' syb = syb.replace('ffff', config['f_name']) syb = syb.replace('tttt', 'daily') syb = syb.replace('ssss', year_s) syb = syb.replace('eeee', year_e) path_o = path_o_ + '_' + rzsc_type + '_' + phe_type + '_mc_' + par_ch + '_' + mctimes file_o_evap = os.path.join(path_o, syb.replace('vvvv', 'evap')) file_o_rzws = os.path.join(path_o, syb.replace('vvvv', 'rzws')) file_o_qtot = os.path.join(path_o, syb.replace('vvvv', 'qtot')) fname_o_init = 's_init_' + str(int(year_e) + 1) + '.nc4' file_o_init = os.path.join(path_sinit, fname_o_init) if simax_cal: fname_o_igss = 'igs_' + str(int(year_e) + 1) + '.nc4' file_o_igss = os.path.join(path_igss, fname_o_igss) # path if not os.path.exists(path_sinit): os.makedirs(path_sinit) if not os.path.exists(path_o): os.makedirs(path_o) if simax_cal: if not os.path.exists(path_igss): os.makedirs(path_igss) # saving ds_evap.to_netcdf(file_o_evap, format='netCDF4', engine='netcdf4', encoding={'evap': { 'zlib': True, 'complevel': 5 }}) ds_rzws.to_netcdf(file_o_rzws, format='netCDF4', engine='netcdf4', encoding={'rzws': { 'zlib': True, 'complevel': 5 }}) ds_qtot.to_netcdf(file_o_qtot, format='netCDF4', engine='netcdf4', encoding={'qtot': { 'zlib': True, 'complevel': 5 }}) ds_init.to_netcdf(file_o_init, format='netCDF4', engine='netcdf4', encoding={'s_init': { 'zlib': True, 'complevel': 5 }}) if simax_cal: ds_igss.to_netcdf(file_o_igss, format='netCDF4', engine='netcdf4', encoding={'igs': { 'zlib': True, 'complevel': 5 }}) t3 = time.time() # time it print('Elapsed Time for Saving:', (t3 - t2) / 3600, 'Hours') print('Job Done!')
def run(time_s, time_e, rzsc_type, pheix, scale='M'): from multiprocessing import Pool global montly_do, var, site, flux, time_ix if scale == 'M': montly_do = True path_evap = os.path.join(path_simu, 'simu_' + rzsc_type + '_' + pheix) # time information time_ix = [time_s, time_e] # prepare the file name for reading year_start, year_end = toolkit.se_extract(time_ix[0], time_ix[1]) ncfiles = toolkit.namestr_long(model, file_syb, forcing, variable, year_start, year_end) files = toolkit.fullfile(path_evap, ncfiles['ways']) # FLUXNET # site information files_fluxnet = glob.glob(os.path.join(path_fluxnet, '*.csv')) site = pd.read_csv(file_fluxnetsite) site = site.sort_values('SITE_ID')[[ 'SITE_ID', 'LOCATION_LAT', 'LOCATION_LONG' ]] site = site.set_index('SITE_ID') # extract flux values & change the unit to mm/day flux = dict() for file in files_fluxnet: key = os.path.basename(file).rsplit('_')[1] value = toolkit.read_fluxnet(file, time_ix) flux[key] = value # evap simulation ds = xr.open_mfdataset(files) ds = ds.sel(time=slice(time_ix[0], time_ix[1])) var = ds.evap.values # caution: assume start from the first of the year and end in the last day anave_evap = np.sum(var, axis=0) / (int(time_e[:4]) - int(time_s[:4]) + 1) # sites sites = site.index.tolist() # processing t1 = time.time() # time it pool = Pool() results = pool.map(fluxcombine, sites) pool.close() t2 = time.time() # time it print('Elapsed Time for Calculation:', (t2 - t1) / 3600, 'Hours') # data in pandas dataframe (FLUXNET & WAYS) data = dict() for element in results: data[element[0]] = element[1] # statistics ops = site.copy() header = [ 'N', 'SLOP', 'INTERCEPT', 'MEAN_OBS', 'MEAN_SIM', 'R', 'P', 'STD_ERR', 'RMSE', 'NRMSE1' ] ops = ops.reindex(columns=ops.columns.tolist() + header) for site_id in site.index: try: fv = data[site_id] except KeyError: print(site_id + ': no observation is found!') fv = fv.dropna() N = len(fv) if fv.empty: slope, intercept, r_value, p_value, std_err, rmse, nrmse = [ np.NaN ] * 7 else: o = fv['LE_CORR'].tolist() s = fv['WAYS'].tolist() slope, intercept, r_value, p_value, std_err = errlib.linergress( s, o) m_o = np.nanmean(o) m_s = np.nanmean(s) rmse, nrmse = errlib.rmse(s, o) ops.loc[site_id] = [ ops.loc[site_id].LOCATION_LAT, ops.loc[site_id].LOCATION_LONG ] + [ N, slope, intercept, m_o, m_s, r_value, p_value, std_err, rmse, nrmse ] # return values return anave_evap, data, ops