Example #1
0
def run_scaling(args):
    '''wrapper function for scaling tests'''
    config = hosts[args.host]
    vic_exe = VIC(args.vic_exe)

    # write timing file header
    header = string.Template(table_header)
    header_kwargs = get_header_info(args.vic_exe, args.global_param)
    header = header.safe_substitute(**header_kwargs)
    with open(args.timing, 'w') as f:
        f.write(header)

    for i, kwargs in enumerate(config.profile):
        if config.template:
            # run on a cluster of some kind
            # start by printing the template
            print('-'.ljust(OUT_WIDTH, '-'))
            print('{host} template'.format(host=args.host).center(OUT_WIDTH))
            print('-'.ljust(OUT_WIDTH, '-'))
            print(config.template)
            print('-'.ljust(OUT_WIDTH, '-'))
            template = string.Template(config.template)

            run_string = template.safe_substitute(
                vic_exe=args.vic_exe,
                vic_global=args.global_param,
                timing_table_file=args.timing,
                i=i,
                mpiexec=config.mpiexec,
                **kwargs)
            run_file = 'vic_{host}_{i}.sh'.format(host=args.host, i=i)
            with open(run_file, 'w') as f:
                f.write(run_string)

            cmd = '{submit} {run_file}'.format(submit=config.submit,
                                               run_file=run_file)
            print(cmd)
            if not args.test:
                check_call(cmd, shell=True)

            if args.clean:
                os.remove(run_file)
        else:
            # run locally
            n = kwargs['np']
            print('Running {} with {} processors'.format(args.vic_exe, n))
            if not args.test:
                start = time.time()
                vic_exe.run(args.global_param, mpi_proc=int(n))
                end = time.time()
                diff = end - start
                with open(args.timing, 'a') as f:
                    f.write('%5s | %.2f\n' % (n, diff))

    print('See %s for scaling table' % args.timing)
Example #2
0
def run_scaling(args):
    '''wrapper function for scaling tests'''
    config = hosts[args.host]
    vic_exe = VIC(args.vic_exe)

    # write timing file header
    header = string.Template(table_header)
    header_kwargs = get_header_info(args.vic_exe, args.global_param)
    header = header.safe_substitute(**header_kwargs)
    with open(args.timing, 'w') as f:
        f.write(header)

    for i, kwargs in enumerate(config.profile):
        if config.template:
            # run on a cluster of some kind
            # start by printing the template
            print('-'.ljust(OUT_WIDTH, '-'))
            print('{host} template'.format(
                host=args.host).center(OUT_WIDTH))
            print('-'.ljust(OUT_WIDTH, '-'))
            print(config.template)
            print('-'.ljust(OUT_WIDTH, '-'))
            template = string.Template(config.template)

            run_string = template.safe_substitute(
                vic_exe=args.vic_exe, vic_global=args.global_param,
                timing_table_file=args.timing, i=i,
                mpiexec=config.mpiexec, **kwargs)
            run_file = 'vic_{host}_{i}.sh'.format(host=args.host, i=i)
            with open(run_file, 'w') as f:
                f.write(run_string)

            cmd = '{submit} {run_file}'.format(submit=config.submit,
                                               run_file=run_file)
            print(cmd)
            if not args.test:
                check_call(cmd, shell=True)

            if args.clean:
                os.remove(run_file)
        else:
            # run locally
            n = kwargs['np']
            print('Running {} with {} processors'.format(args.vic_exe, n))
            if not args.test:
                start = time.time()
                vic_exe.run(args.global_param, mpi_proc=int(n))
                end = time.time()
                diff = end - start
                with open(args.timing, 'a') as f:
                    f.write('%5s | %.2f\n' % (n, diff))

    print('See %s for scaling table' % args.timing)
Example #3
0
def main(cfg_file, nproc=1):
    ''' Main function

    Parameters
    ----------
    cfg_file: <str>
        Input config file
    nproc: <int>
        Number of processors to use
    '''
    
    # ====================================================== #
    # Load in config file
    # ====================================================== #
    cfg = read_configobj(cfg_file)
   
 
    # ====================================================== #
    # Process some cfg variables
    # ====================================================== #
    start_date = pd.to_datetime(cfg['FORCING']['start_date'])
    end_date = pd.to_datetime(cfg['FORCING']['end_date'])
    
    start_year = start_date.year
    end_year = end_date.year

    ens_list = range(cfg['FORCING']['ens_start'],
                     cfg['FORCING']['ens_end'] + 1)
    
    
    # ====================================================== #
    # Set up output directories
    # ====================================================== #
    dirs = setup_output_dirs(cfg['OUTPUT']['out_basedir'],
                             mkdirs=['forc_orig_nc', 'forc_orig_asc',
                                     'forc_disagg_asc', 'forc_disagg_nc',
                                     'config_files', 'logs_vic'])
    # Subdirs for config files for ensemble
    subdirs_config = setup_output_dirs(
                            dirs['config_files'],
                            mkdirs=['netcdf2vic', 'vic4', 'vic2nc'])
    
    # ====================================================== #
    # Load in domain file
    # ====================================================== #
    ds_domain = xr.open_dataset(cfg['DOMAIN']['domain_nc'])
    da_domain = ds_domain[cfg['DOMAIN']['mask_name']]
    
    lat_min = da_domain['lat'].min().values
    lat_max = da_domain['lat'].max().values
    lon_min = da_domain['lon'].min().values
    lon_max = da_domain['lon'].max().values
    
    
    # ====================================================== #
    # Load in and process Newman ensemble forcings (for prec, Tmax and Tmin)
    # and orig. Maurer forcing (for wind speed)
    # ====================================================== #
    
    # --- Load Maurer forcings --- #
    print('Processing Maurer forcings...')
    
    # Loop over each year
    list_da_wind = []
    for year in range(start_year, end_year+1):
        print('Year {}'.format(year))
        # --- Load in netCDF file for this year --- #
        da_wind = xr.open_dataset(os.path.join(
                        cfg['FORCING']['maurer_dir'],
                        'nldas_met_update.obs.daily.wind.{}.nc'.format(year)))['wind']
        # --- Mask out the target area --- #
        da_wind = da_wind.sel(latitude=slice(lat_min, lat_max),
                              longitude=slice(lon_min, lon_max))
        da_wind = da_wind.where(da_domain.values)
        # --- Rename lat and lon --- #
        da_wind = da_wind.rename({'latitude': 'lat', 'longitude': 'lon'})
        # --- Put in list --- #
        list_da_wind.append(da_wind)
    
    # Concat all years together
    da_wind_allyears = xr.concat(list_da_wind, dim='time')
   
    # --- Load Newman forcings --- #
    print('Processing Newman forcings...')

    # If 1 processor, do a regular process
    if nproc == 1:
        # Loop over each ensemble member
        for ens in ens_list:
            load_and_process_Newman(ens, cfg, da_domain, lat_min, lat_max,
                                    lon_min, lon_max, start_date, end_date,
                                    dirs, da_wind_allyears)
    # If multiple processors, use mp
    elif nproc > 1:
        # Set up multiprocessing
        pool = mp.Pool(processes=nproc)
        # Loop over each ensemble member
        for ens in ens_list:
            pool.apply_async(load_and_process_Newman,
                             (ens, cfg, da_domain, lat_min, lat_max, lon_min,
                              lon_max, start_date, end_date, dirs,
                              da_wind_allyears,))
        # Finish multiprocessing
        pool.close()
        pool.join()
    
    # ====================================================== #
    # Convert orig. forcings to ascii format
    # ====================================================== #
    
    print('Converting orig. netCDF forcings to VIC ascii...')

    # --- Setup subdirs for asc VIC orig. forcings for each ensemble member
    # --- #
    list_ens = []
    for ens in ens_list:
        list_ens.append('ens_{}'.format(ens))
    subdirs_output = setup_output_dirs(
                        dirs['forc_orig_asc'],
                        mkdirs=list_ens)
    
    # --- Prepare netcdf2vic config file --- #
    dict_cfg_file = {}
    for ens in ens_list:
        cfg_file = os.path.join(subdirs_config['netcdf2vic'],
                                'ens_{}.cfg'.format(ens))
        dict_cfg_file[ens] = cfg_file

        with open(cfg_file, 'w') as f:
            f.write('[options]\n')
            f.write('files: forc_orig.{}.nc\n')
            f.write('verbose: True\n')
            f.write('output_format: ASCII\n')
            f.write('out_prefix: forc_orig_\n')
            f.write('coord_keys: lon,lat\n')
            f.write('var_keys: pr,tasmax,tasmin,wind\n')
            f.write('start_year: {}\n'.format(start_year))
            f.write('end_year: {}\n'.format(end_year))
            f.write('latlon_precision: {}\n'.format(
                            cfg['OUTPUT']['latlon_precision']))

            f.write('\n[paths]\n')
            f.write('in_path: {}\n'.format(os.path.join(
                                        dirs['forc_orig_nc'],
                                        'ens_{}'.format(ens))))
            f.write('mask_path: {}\n'.format(cfg['DOMAIN']['domain_nc']))
            f.write('mask_varname: {}\n'.format(cfg['DOMAIN']['mask_name']))
            f.write('ASCIIoutPath: {}\n'.format(
                        subdirs_output['ens_{}'.format(ens)]))
        
    # --- Run nc_to_vic --- #
    # If 1 processor, do a regular process
    if nproc == 1:
        for ens in ens_list:
            nc_to_vic(dict_cfg_file[ens])
    # If multiple processors, use mp
    elif nproc > 1:
        # Set up multiprocessing
        pool = mp.Pool(processes=nproc)
        # Loop over each ensemble member
        for ens in ens_list:
            pool.apply_async(nc_to_vic, (dict_cfg_file[ens],))
        # Finish multiprocessing
        pool.close()
        pool.join()
    
    # ====================================================== #
    # Run VIC forcing disaggregator
    # ====================================================== #
    
    print('Running VIC as a disaggregator...')
    
    # --- Setup subdirs for asc VIC disagg. forcings and VIC log files for
    # each ensemble member --- #
    list_ens = []
    for ens in ens_list:
        list_ens.append('ens_{}'.format(ens))
    subdirs_output = setup_output_dirs(
                        dirs['forc_disagg_asc'],
                        mkdirs=list_ens)
    subdirs_logs = setup_output_dirs(
                        dirs['logs_vic'],
                        mkdirs=list_ens)
 
    # --- Prepare VIC global file for the disaggregation run --- #
    # Load in global file template
    with open(cfg['VIC_DISAGG']['global_template'], 'r') as f:
         global_param = f.read()
    # Create string template
    s = string.Template(global_param)
    # Loop over each ensemble member
    dict_global_file = {}
    for ens in ens_list:
        # Fill in variables in the template
        global_param = s.safe_substitute(
                            time_step=cfg['VIC_DISAGG']['time_step'],
                            startyear=start_year,
                            startmonth=start_date.month,
                            startday=start_date.day,
                            endyear=end_year,
                            endmonth=end_date.month,
                            endday=end_date.day,
                            forcing1=os.path.join(dirs['forc_orig_asc'],
                                                  'ens_{}'.format(ens),
                                                  'forc_orig_'),
                            grid_decimal=cfg['OUTPUT']['latlon_precision'],
                            prec='PREC',
                            tmax='TMAX',
                            tmin='TMIN',
                            wind='WIND',
                            forceyear=start_year,
                            forcemonth=start_date.month,
                            forceday=start_date.day,
                            result_dir=subdirs_output['ens_{}'.format(ens)])
        # Write global param file
        global_file = os.path.join(subdirs_config['vic4'],
                                   'vic.global.ens_{}.txt'.format(ens))
        dict_global_file[ens] = global_file
        with open(global_file, mode='w') as f:
            for line in global_param:
                f.write(line)
            
    # --- Run VIC --- #
    # Prepare VIC exe
    vic_exe = VIC(cfg['VIC_DISAGG']['vic4_exe'])

    # If 1 processor, do a regular process
    if nproc == 1:
        for ens in ens_list:
            vic_exe.run(dict_global_file[ens],
                        logdir=subdirs_logs['ens_{}'.format(ens)])
    # If multiple processors, use mp
    elif nproc > 1:
        # Set up multiprocessing
        pool = mp.Pool(processes=nproc)
        # Loop over each ensemble member
        for ens in ens_list:
            pool.apply_async(run_vic_for_multiprocess,
                             (vic_exe, dict_global_file[ens],
                              subdirs_logs['ens_{}'.format(ens)],))
        # Finish multiprocessing
        pool.close()
        pool.join()
    
    # ====================================================== #
    # Convert disaggregated forcings to netCDF format
    # ====================================================== #
    
    # --- Prepare config file for vic2nc --- #
    print('Converting disaggregated forcings to netCDF...')
    
    # --- Setup subdirs for VIC disagg. netCDF forcings for each ensemble
    # member --- #
    list_ens = []
    for ens in ens_list:
        list_ens.append('ens_{}'.format(ens))
    subdirs_output = setup_output_dirs(
                        dirs['forc_disagg_nc'],
                        mkdirs=list_ens)

    # --- Prepare netcdf2vic config file --- #
    # Extract disaggregated forcing variable names and order
    with open(cfg['VIC_DISAGG']['global_template'], 'r') as f:
         global_param = f.read()
    outvar_list = find_outvar_global_param(global_param)
    for i, var in enumerate(outvar_list):
        outvar_list[i] = var.strip('OUT_')
   
    # Extract end date and hour 
    end_date_with_hour = end_date + pd.DateOffset(days=1) -\
                         pd.DateOffset(hours=cfg['VIC_DISAGG']['time_step'])

    # Loop over each ensemble member 
    dict_cfg_file = {}
    for ens in ens_list:
        cfg_file = os.path.join(subdirs_config['vic2nc'],
                                'ens_{}.cfg'.format(ens))
        dict_cfg_file[ens] = cfg_file
        
        with open(cfg_file, 'w') as f:
            f.write('[OPTIONS]\n')
            f.write('input_files: {}\n'.format(
                        os.path.join(dirs['forc_disagg_asc'],
                                     'ens_{}'.format(ens),
                                     'force_*')))
            f.write('input_file_format: ascii\n')
            f.write('bin_dt_sec: {}\n'.format(cfg['VIC_DISAGG']['time_step']*3600))
            f.write('bin_start_date: {}\n'.format(start_date.strftime("%Y-%m-%d-%H")))
            f.write('bin_end_date: {}\n'.format(end_date_with_hour.strftime("%Y-%m-%d-%H")))
            f.write('regular_grid: False\n')
            f.write('out_directory: {}\n'.format(subdirs_output['ens_{}'.format(ens)]))
            f.write('memory_mode: big_memory\n')
            f.write('chunksize: 100\n')
            f.write('out_file_prefix: force\n')
            f.write('out_file_format: NETCDF4\n')
            f.write('precision: single\n')
            f.write('start_date: {}\n'.format(start_date.strftime("%Y-%m-%d-%H")))
            f.write('end_date: {}\n'.format(end_date_with_hour.strftime("%Y-%m-%d-%H")))
            f.write('calendar: proleptic_gregorian\n')
            f.write('time_segment: year\n')
            f.write('snow_bands: False\n')
            f.write('veg_tiles: False\n')
            f.write('soil_layers: False\n')
            
            f.write('\n[DOMAIN]\n')
            f.write('filename: {}\n'.format(cfg['DOMAIN']['domain_nc']))
            f.write('longitude_var: {}\n'.format(cfg['DOMAIN']['lon_name']))
            f.write('latitude_var: {}\n'.format(cfg['DOMAIN']['lat_name']))
            f.write('y_x_dims: {}, {}\n'.format(cfg['DOMAIN']['lat_name'],
                                                cfg['DOMAIN']['lon_name']))
            f.write('copy_vars: {}, {}, {}\n'.format(cfg['DOMAIN']['mask_name'],
                                                     cfg['DOMAIN']['lat_name'],
                                                     cfg['DOMAIN']['lon_name']))
            
            f.write('\n[GLOBAL_ATTRIBUTES]\n')
            f.write('title: VIC forcings\n')
            f.write('version: VIC4.2\n')
            f.write('grid: 1/8\n')
            
            for i, var in enumerate(outvar_list):
                if var == 'AIR_TEMP':
                    f.write('\n[AIR_TEMP]\n')
                    f.write('column: {}\n'.format(i))
                    f.write('units: C\n')
                    f.write('standard_name: air_temperature\n')
                    f.write('description: air temperature\n')
                elif var == 'PREC':
                    f.write('\n[PREC]\n')
                    f.write('column: {}\n'.format(i))
                    f.write('units: mm/step\n')
                    f.write('standard_name: precipitation\n')
                    f.write('description: precipitation\n')
                elif var == 'PRESSURE':
                    f.write('\n[PRESSURE]\n')
                    f.write('column: {}\n'.format(i))
                    f.write('units: kPa\n')
                    f.write('standard_name: surface_air_pressure\n')
                    f.write('description: near-surface atmospheric pressure\n')
                elif var == 'SHORTWAVE':
                    f.write('\n[SHORTWAVE]\n')
                    f.write('column: {}\n'.format(i))
                    f.write('units: W m-2\n')
                    f.write('standard_name: incoming_shortwave_radiation\n')
                    f.write('description: incoming shortwave radiation\n')
                elif var == 'LONGWAVE':
                    f.write('\n[LONGWAVE]\n')
                    f.write('column: {}\n'.format(i))
                    f.write('units: W m-2\n')
                    f.write('standard_name: incoming_longwave_radiation\n')
                    f.write('description: incoming longwave radiation\n')
                elif var == 'VP':
                    f.write('\n[VP]\n')
                    f.write('column: {}\n'.format(i))
                    f.write('units: kPa\n')
                    f.write('standard_name: water_vapor_pressure\n')
                    f.write('description: near surface vapor pressure\n')
                elif var == 'WIND':
                    f.write('\n[WIND]\n')
                    f.write('column: {}\n'.format(i))
                    f.write('units: m/s\n')
                    f.write('standard_name: surface_air_pressure\n')
                    f.write('description: near-surface wind speed\n')
   
    # --- Run vic2nc --- #
    # If 1 processor, do a regular process
    if nproc == 1:
        for ens in ens_list:
            cfg_vic2nc = read_config(dict_cfg_file[ens])
            options = cfg_vic2nc.pop('OPTIONS')
            global_atts = cfg_vic2nc.pop('GLOBAL_ATTRIBUTES')
            if not options['regular_grid']:
                domain_dict = cfg_vic2nc.pop('DOMAIN')
            else:
                domain_dict = None
            # Set aside fields dict
            fields = cfg_vic2nc
            # Run vic2nc 
            vic2nc(options, global_atts, domain_dict, fields)

    # If multiple processors, use mp
    elif nproc > 1:
        # Set up multiprocessing
        pool = mp.Pool(processes=nproc)
        # Loop over each ensemble member
        for ens in ens_list:
            cfg_vic2nc = read_config(dict_cfg_file[ens])
            options = cfg_vic2nc.pop('OPTIONS')
            global_atts = cfg_vic2nc.pop('GLOBAL_ATTRIBUTES')
            if not options['regular_grid']:
                domain_dict = cfg_vic2nc.pop('DOMAIN')
            else:
                domain_dict = None
            # set aside fields dict
            fields = cfg_vic2nc
            pool.apply_async(vic2nc,
                             (options, global_atts, domain_dict, fields,))
        # Finish multiprocessing
        pool.close()
        pool.join()
Example #4
0
# Load config file
# =========================================================== #
cfg = read_configobj(sys.argv[1])

# =========================================================== #
# Set random generation seed
# =========================================================== #
np.random.seed(cfg['CONTROL']['seed'])

# =========================================================== #
# Run VIC
# =========================================================== #
# Create class VIC
vic_exe = VIC(cfg['VIC']['exe'])
# Run VIC
vic_exe.run(cfg['VIC']['global'],
            logdir=cfg['OUTPUT']['vic_log_dir'])

# =========================================================== #
# Extract VIC output soil moisture (layer 1) at the end of
# each day, and perturb
# =========================================================== #
# Load VIC output
ds = xr.open_dataset(cfg['OUTPUT']['vic_output_hist_path'])

# Resample surface sm to daily mean
da_sm1_true = ds['OUT_SOIL_MOIST'].sel(nlayer=0)
da_sm1_true_daily = da_sm1_true.resample(dim='time', freq='D', how='mean')

# Reset time index to noon on each day
da_sm1_true_daily['time'] = pd.date_range(
        '{}-12'.format(cfg['TIME_INDEX']['start_date']),