def run_scaling(args): '''wrapper function for scaling tests''' config = hosts[args.host] vic_exe = VIC(args.vic_exe) # write timing file header header = string.Template(table_header) header_kwargs = get_header_info(args.vic_exe, args.global_param) header = header.safe_substitute(**header_kwargs) with open(args.timing, 'w') as f: f.write(header) for i, kwargs in enumerate(config.profile): if config.template: # run on a cluster of some kind # start by printing the template print('-'.ljust(OUT_WIDTH, '-')) print('{host} template'.format(host=args.host).center(OUT_WIDTH)) print('-'.ljust(OUT_WIDTH, '-')) print(config.template) print('-'.ljust(OUT_WIDTH, '-')) template = string.Template(config.template) run_string = template.safe_substitute( vic_exe=args.vic_exe, vic_global=args.global_param, timing_table_file=args.timing, i=i, mpiexec=config.mpiexec, **kwargs) run_file = 'vic_{host}_{i}.sh'.format(host=args.host, i=i) with open(run_file, 'w') as f: f.write(run_string) cmd = '{submit} {run_file}'.format(submit=config.submit, run_file=run_file) print(cmd) if not args.test: check_call(cmd, shell=True) if args.clean: os.remove(run_file) else: # run locally n = kwargs['np'] print('Running {} with {} processors'.format(args.vic_exe, n)) if not args.test: start = time.time() vic_exe.run(args.global_param, mpi_proc=int(n)) end = time.time() diff = end - start with open(args.timing, 'a') as f: f.write('%5s | %.2f\n' % (n, diff)) print('See %s for scaling table' % args.timing)
def run_scaling(args): '''wrapper function for scaling tests''' config = hosts[args.host] vic_exe = VIC(args.vic_exe) # write timing file header header = string.Template(table_header) header_kwargs = get_header_info(args.vic_exe, args.global_param) header = header.safe_substitute(**header_kwargs) with open(args.timing, 'w') as f: f.write(header) for i, kwargs in enumerate(config.profile): if config.template: # run on a cluster of some kind # start by printing the template print('-'.ljust(OUT_WIDTH, '-')) print('{host} template'.format( host=args.host).center(OUT_WIDTH)) print('-'.ljust(OUT_WIDTH, '-')) print(config.template) print('-'.ljust(OUT_WIDTH, '-')) template = string.Template(config.template) run_string = template.safe_substitute( vic_exe=args.vic_exe, vic_global=args.global_param, timing_table_file=args.timing, i=i, mpiexec=config.mpiexec, **kwargs) run_file = 'vic_{host}_{i}.sh'.format(host=args.host, i=i) with open(run_file, 'w') as f: f.write(run_string) cmd = '{submit} {run_file}'.format(submit=config.submit, run_file=run_file) print(cmd) if not args.test: check_call(cmd, shell=True) if args.clean: os.remove(run_file) else: # run locally n = kwargs['np'] print('Running {} with {} processors'.format(args.vic_exe, n)) if not args.test: start = time.time() vic_exe.run(args.global_param, mpi_proc=int(n)) end = time.time() diff = end - start with open(args.timing, 'a') as f: f.write('%5s | %.2f\n' % (n, diff)) print('See %s for scaling table' % args.timing)
def main(cfg_file, nproc=1): ''' Main function Parameters ---------- cfg_file: <str> Input config file nproc: <int> Number of processors to use ''' # ====================================================== # # Load in config file # ====================================================== # cfg = read_configobj(cfg_file) # ====================================================== # # Process some cfg variables # ====================================================== # start_date = pd.to_datetime(cfg['FORCING']['start_date']) end_date = pd.to_datetime(cfg['FORCING']['end_date']) start_year = start_date.year end_year = end_date.year ens_list = range(cfg['FORCING']['ens_start'], cfg['FORCING']['ens_end'] + 1) # ====================================================== # # Set up output directories # ====================================================== # dirs = setup_output_dirs(cfg['OUTPUT']['out_basedir'], mkdirs=['forc_orig_nc', 'forc_orig_asc', 'forc_disagg_asc', 'forc_disagg_nc', 'config_files', 'logs_vic']) # Subdirs for config files for ensemble subdirs_config = setup_output_dirs( dirs['config_files'], mkdirs=['netcdf2vic', 'vic4', 'vic2nc']) # ====================================================== # # Load in domain file # ====================================================== # ds_domain = xr.open_dataset(cfg['DOMAIN']['domain_nc']) da_domain = ds_domain[cfg['DOMAIN']['mask_name']] lat_min = da_domain['lat'].min().values lat_max = da_domain['lat'].max().values lon_min = da_domain['lon'].min().values lon_max = da_domain['lon'].max().values # ====================================================== # # Load in and process Newman ensemble forcings (for prec, Tmax and Tmin) # and orig. Maurer forcing (for wind speed) # ====================================================== # # --- Load Maurer forcings --- # print('Processing Maurer forcings...') # Loop over each year list_da_wind = [] for year in range(start_year, end_year+1): print('Year {}'.format(year)) # --- Load in netCDF file for this year --- # da_wind = xr.open_dataset(os.path.join( cfg['FORCING']['maurer_dir'], 'nldas_met_update.obs.daily.wind.{}.nc'.format(year)))['wind'] # --- Mask out the target area --- # da_wind = da_wind.sel(latitude=slice(lat_min, lat_max), longitude=slice(lon_min, lon_max)) da_wind = da_wind.where(da_domain.values) # --- Rename lat and lon --- # da_wind = da_wind.rename({'latitude': 'lat', 'longitude': 'lon'}) # --- Put in list --- # list_da_wind.append(da_wind) # Concat all years together da_wind_allyears = xr.concat(list_da_wind, dim='time') # --- Load Newman forcings --- # print('Processing Newman forcings...') # If 1 processor, do a regular process if nproc == 1: # Loop over each ensemble member for ens in ens_list: load_and_process_Newman(ens, cfg, da_domain, lat_min, lat_max, lon_min, lon_max, start_date, end_date, dirs, da_wind_allyears) # If multiple processors, use mp elif nproc > 1: # Set up multiprocessing pool = mp.Pool(processes=nproc) # Loop over each ensemble member for ens in ens_list: pool.apply_async(load_and_process_Newman, (ens, cfg, da_domain, lat_min, lat_max, lon_min, lon_max, start_date, end_date, dirs, da_wind_allyears,)) # Finish multiprocessing pool.close() pool.join() # ====================================================== # # Convert orig. forcings to ascii format # ====================================================== # print('Converting orig. netCDF forcings to VIC ascii...') # --- Setup subdirs for asc VIC orig. forcings for each ensemble member # --- # list_ens = [] for ens in ens_list: list_ens.append('ens_{}'.format(ens)) subdirs_output = setup_output_dirs( dirs['forc_orig_asc'], mkdirs=list_ens) # --- Prepare netcdf2vic config file --- # dict_cfg_file = {} for ens in ens_list: cfg_file = os.path.join(subdirs_config['netcdf2vic'], 'ens_{}.cfg'.format(ens)) dict_cfg_file[ens] = cfg_file with open(cfg_file, 'w') as f: f.write('[options]\n') f.write('files: forc_orig.{}.nc\n') f.write('verbose: True\n') f.write('output_format: ASCII\n') f.write('out_prefix: forc_orig_\n') f.write('coord_keys: lon,lat\n') f.write('var_keys: pr,tasmax,tasmin,wind\n') f.write('start_year: {}\n'.format(start_year)) f.write('end_year: {}\n'.format(end_year)) f.write('latlon_precision: {}\n'.format( cfg['OUTPUT']['latlon_precision'])) f.write('\n[paths]\n') f.write('in_path: {}\n'.format(os.path.join( dirs['forc_orig_nc'], 'ens_{}'.format(ens)))) f.write('mask_path: {}\n'.format(cfg['DOMAIN']['domain_nc'])) f.write('mask_varname: {}\n'.format(cfg['DOMAIN']['mask_name'])) f.write('ASCIIoutPath: {}\n'.format( subdirs_output['ens_{}'.format(ens)])) # --- Run nc_to_vic --- # # If 1 processor, do a regular process if nproc == 1: for ens in ens_list: nc_to_vic(dict_cfg_file[ens]) # If multiple processors, use mp elif nproc > 1: # Set up multiprocessing pool = mp.Pool(processes=nproc) # Loop over each ensemble member for ens in ens_list: pool.apply_async(nc_to_vic, (dict_cfg_file[ens],)) # Finish multiprocessing pool.close() pool.join() # ====================================================== # # Run VIC forcing disaggregator # ====================================================== # print('Running VIC as a disaggregator...') # --- Setup subdirs for asc VIC disagg. forcings and VIC log files for # each ensemble member --- # list_ens = [] for ens in ens_list: list_ens.append('ens_{}'.format(ens)) subdirs_output = setup_output_dirs( dirs['forc_disagg_asc'], mkdirs=list_ens) subdirs_logs = setup_output_dirs( dirs['logs_vic'], mkdirs=list_ens) # --- Prepare VIC global file for the disaggregation run --- # # Load in global file template with open(cfg['VIC_DISAGG']['global_template'], 'r') as f: global_param = f.read() # Create string template s = string.Template(global_param) # Loop over each ensemble member dict_global_file = {} for ens in ens_list: # Fill in variables in the template global_param = s.safe_substitute( time_step=cfg['VIC_DISAGG']['time_step'], startyear=start_year, startmonth=start_date.month, startday=start_date.day, endyear=end_year, endmonth=end_date.month, endday=end_date.day, forcing1=os.path.join(dirs['forc_orig_asc'], 'ens_{}'.format(ens), 'forc_orig_'), grid_decimal=cfg['OUTPUT']['latlon_precision'], prec='PREC', tmax='TMAX', tmin='TMIN', wind='WIND', forceyear=start_year, forcemonth=start_date.month, forceday=start_date.day, result_dir=subdirs_output['ens_{}'.format(ens)]) # Write global param file global_file = os.path.join(subdirs_config['vic4'], 'vic.global.ens_{}.txt'.format(ens)) dict_global_file[ens] = global_file with open(global_file, mode='w') as f: for line in global_param: f.write(line) # --- Run VIC --- # # Prepare VIC exe vic_exe = VIC(cfg['VIC_DISAGG']['vic4_exe']) # If 1 processor, do a regular process if nproc == 1: for ens in ens_list: vic_exe.run(dict_global_file[ens], logdir=subdirs_logs['ens_{}'.format(ens)]) # If multiple processors, use mp elif nproc > 1: # Set up multiprocessing pool = mp.Pool(processes=nproc) # Loop over each ensemble member for ens in ens_list: pool.apply_async(run_vic_for_multiprocess, (vic_exe, dict_global_file[ens], subdirs_logs['ens_{}'.format(ens)],)) # Finish multiprocessing pool.close() pool.join() # ====================================================== # # Convert disaggregated forcings to netCDF format # ====================================================== # # --- Prepare config file for vic2nc --- # print('Converting disaggregated forcings to netCDF...') # --- Setup subdirs for VIC disagg. netCDF forcings for each ensemble # member --- # list_ens = [] for ens in ens_list: list_ens.append('ens_{}'.format(ens)) subdirs_output = setup_output_dirs( dirs['forc_disagg_nc'], mkdirs=list_ens) # --- Prepare netcdf2vic config file --- # # Extract disaggregated forcing variable names and order with open(cfg['VIC_DISAGG']['global_template'], 'r') as f: global_param = f.read() outvar_list = find_outvar_global_param(global_param) for i, var in enumerate(outvar_list): outvar_list[i] = var.strip('OUT_') # Extract end date and hour end_date_with_hour = end_date + pd.DateOffset(days=1) -\ pd.DateOffset(hours=cfg['VIC_DISAGG']['time_step']) # Loop over each ensemble member dict_cfg_file = {} for ens in ens_list: cfg_file = os.path.join(subdirs_config['vic2nc'], 'ens_{}.cfg'.format(ens)) dict_cfg_file[ens] = cfg_file with open(cfg_file, 'w') as f: f.write('[OPTIONS]\n') f.write('input_files: {}\n'.format( os.path.join(dirs['forc_disagg_asc'], 'ens_{}'.format(ens), 'force_*'))) f.write('input_file_format: ascii\n') f.write('bin_dt_sec: {}\n'.format(cfg['VIC_DISAGG']['time_step']*3600)) f.write('bin_start_date: {}\n'.format(start_date.strftime("%Y-%m-%d-%H"))) f.write('bin_end_date: {}\n'.format(end_date_with_hour.strftime("%Y-%m-%d-%H"))) f.write('regular_grid: False\n') f.write('out_directory: {}\n'.format(subdirs_output['ens_{}'.format(ens)])) f.write('memory_mode: big_memory\n') f.write('chunksize: 100\n') f.write('out_file_prefix: force\n') f.write('out_file_format: NETCDF4\n') f.write('precision: single\n') f.write('start_date: {}\n'.format(start_date.strftime("%Y-%m-%d-%H"))) f.write('end_date: {}\n'.format(end_date_with_hour.strftime("%Y-%m-%d-%H"))) f.write('calendar: proleptic_gregorian\n') f.write('time_segment: year\n') f.write('snow_bands: False\n') f.write('veg_tiles: False\n') f.write('soil_layers: False\n') f.write('\n[DOMAIN]\n') f.write('filename: {}\n'.format(cfg['DOMAIN']['domain_nc'])) f.write('longitude_var: {}\n'.format(cfg['DOMAIN']['lon_name'])) f.write('latitude_var: {}\n'.format(cfg['DOMAIN']['lat_name'])) f.write('y_x_dims: {}, {}\n'.format(cfg['DOMAIN']['lat_name'], cfg['DOMAIN']['lon_name'])) f.write('copy_vars: {}, {}, {}\n'.format(cfg['DOMAIN']['mask_name'], cfg['DOMAIN']['lat_name'], cfg['DOMAIN']['lon_name'])) f.write('\n[GLOBAL_ATTRIBUTES]\n') f.write('title: VIC forcings\n') f.write('version: VIC4.2\n') f.write('grid: 1/8\n') for i, var in enumerate(outvar_list): if var == 'AIR_TEMP': f.write('\n[AIR_TEMP]\n') f.write('column: {}\n'.format(i)) f.write('units: C\n') f.write('standard_name: air_temperature\n') f.write('description: air temperature\n') elif var == 'PREC': f.write('\n[PREC]\n') f.write('column: {}\n'.format(i)) f.write('units: mm/step\n') f.write('standard_name: precipitation\n') f.write('description: precipitation\n') elif var == 'PRESSURE': f.write('\n[PRESSURE]\n') f.write('column: {}\n'.format(i)) f.write('units: kPa\n') f.write('standard_name: surface_air_pressure\n') f.write('description: near-surface atmospheric pressure\n') elif var == 'SHORTWAVE': f.write('\n[SHORTWAVE]\n') f.write('column: {}\n'.format(i)) f.write('units: W m-2\n') f.write('standard_name: incoming_shortwave_radiation\n') f.write('description: incoming shortwave radiation\n') elif var == 'LONGWAVE': f.write('\n[LONGWAVE]\n') f.write('column: {}\n'.format(i)) f.write('units: W m-2\n') f.write('standard_name: incoming_longwave_radiation\n') f.write('description: incoming longwave radiation\n') elif var == 'VP': f.write('\n[VP]\n') f.write('column: {}\n'.format(i)) f.write('units: kPa\n') f.write('standard_name: water_vapor_pressure\n') f.write('description: near surface vapor pressure\n') elif var == 'WIND': f.write('\n[WIND]\n') f.write('column: {}\n'.format(i)) f.write('units: m/s\n') f.write('standard_name: surface_air_pressure\n') f.write('description: near-surface wind speed\n') # --- Run vic2nc --- # # If 1 processor, do a regular process if nproc == 1: for ens in ens_list: cfg_vic2nc = read_config(dict_cfg_file[ens]) options = cfg_vic2nc.pop('OPTIONS') global_atts = cfg_vic2nc.pop('GLOBAL_ATTRIBUTES') if not options['regular_grid']: domain_dict = cfg_vic2nc.pop('DOMAIN') else: domain_dict = None # Set aside fields dict fields = cfg_vic2nc # Run vic2nc vic2nc(options, global_atts, domain_dict, fields) # If multiple processors, use mp elif nproc > 1: # Set up multiprocessing pool = mp.Pool(processes=nproc) # Loop over each ensemble member for ens in ens_list: cfg_vic2nc = read_config(dict_cfg_file[ens]) options = cfg_vic2nc.pop('OPTIONS') global_atts = cfg_vic2nc.pop('GLOBAL_ATTRIBUTES') if not options['regular_grid']: domain_dict = cfg_vic2nc.pop('DOMAIN') else: domain_dict = None # set aside fields dict fields = cfg_vic2nc pool.apply_async(vic2nc, (options, global_atts, domain_dict, fields,)) # Finish multiprocessing pool.close() pool.join()
# Load config file # =========================================================== # cfg = read_configobj(sys.argv[1]) # =========================================================== # # Set random generation seed # =========================================================== # np.random.seed(cfg['CONTROL']['seed']) # =========================================================== # # Run VIC # =========================================================== # # Create class VIC vic_exe = VIC(cfg['VIC']['exe']) # Run VIC vic_exe.run(cfg['VIC']['global'], logdir=cfg['OUTPUT']['vic_log_dir']) # =========================================================== # # Extract VIC output soil moisture (layer 1) at the end of # each day, and perturb # =========================================================== # # Load VIC output ds = xr.open_dataset(cfg['OUTPUT']['vic_output_hist_path']) # Resample surface sm to daily mean da_sm1_true = ds['OUT_SOIL_MOIST'].sel(nlayer=0) da_sm1_true_daily = da_sm1_true.resample(dim='time', freq='D', how='mean') # Reset time index to noon on each day da_sm1_true_daily['time'] = pd.date_range( '{}-12'.format(cfg['TIME_INDEX']['start_date']),