Example #1
0
def check_overlap(wrf_path, ts_now):
    """
  Check if the WRF file <wrf_path> timstamps contain <ts_now>.
  """
    wrfout = WRFModelData(wrf_path)
    outts = wrfout['GMT']
    if ts_now in outts:
        return True
    else:
        print("INFO: previous forecast [%s - %s] exists, running DA till %s" %
              (str(outts[0]), str(outts[-1]), str(ts_now)))
        return False
          }

    # cfg = { 'station_info_dir' : '../real_data/witch_creek',
    #         'station_obs_dir' : '../real_data/witch_creek',
    #         'station_list_file' : '../real_data/witch_creek/station_list',
    #         'wrf_data_file' : '../real_data/witch_creek/wrf20071021_witchcreek_all.nc',
    #         'assimilation_window' : 3600,
    #         'output_dir' : 'results_wc',
    #         'max_dist' : 200.0,
    #         'bin_width' : 20,
    #         'standardize' : False
    #       }


    # load the smallest domain
    wrf_data = WRFModelData(cfg['wrf_data_file'], ['T2', 'PSFC', 'Q2', 'HGT'])

    # read in vars
    lon, lat = wrf_data.get_lons(), wrf_data.get_lats() 
    hgt = wrf_data['HGT']
    tm = wrf_data.get_gmt_times()
    Nt = len(tm)

    print("Loaded %d timestamps from WRF." % Nt)
        
    # load station data from files
    with open(cfg['station_list_file'], 'r') as f:
        si_list = f.read().split('\n')

    si_list = filter(lambda x: len(x) > 0 and x[0] != '#', map(string.strip, si_list))
def run_module():
    
    # read in configuration file to execute run
    print("Reading configuration from [%s]" % sys.argv[1])
    
    with open(sys.argv[1]) as f:
        cfg = eval(f.read())
    
    # ensure output path exists
    if not os.path.isdir(cfg['output_dir']): 
        os.mkdir(cfg['output_dir'])

    # configure diagnostics        
    init_diagnostics(os.path.join(cfg['output_dir'], 'moisture_model_v1_diagnostics.txt'))

    # Error covariance matrix condition number in kriging
    diagnostics().configure_tag("skdm_cov_cond", False, True, True)

    # Assimilation parameters
    diagnostics().configure_tag("assim_K0", False, True, True)
    diagnostics().configure_tag("assim_K1", True, True, True)
    diagnostics().configure_tag("assim_data", False, False, True)

    diagnostics().configure_tag("obs_residual_var", True, True, True)

    diagnostics().configure_tag("fm10_model_residual_var", True, True, True)
    diagnostics().configure_tag("fm10_model_var", False, True, True)
    diagnostics().configure_tag("fm10_kriging_var", False, True, True)

    ### Load and preprocess WRF model data

    # load WRF data
    wrf_data = WRFModelData(cfg['input_file'], tz_name = 'US/Mountain')
    
    # read in spatial and temporal extent of WRF variables
    lat, lon = wrf_data.get_lats(), wrf_data.get_lons()
    tm = wrf_data.get_gmt_times()
    Nt = cfg['Nt'] if cfg['Nt'] is not None else len(tm)
    dom_shape = lat.shape

    # retrieve the rain variable
    rain = wrf_data['RAIN']

    # moisture equilibria are now computed from averaged Q,P,T at beginning and end of period
    Ed, Ew = wrf_data.get_moisture_equilibria()

    ### Load observation data from the stations

    # load station data from files
    with open(os.path.join(cfg['station_data_dir'], cfg['station_list_file']), 'r') as f:
        si_list = f.read().split('\n')

    si_list = filter(lambda x: len(x) > 0, map(string.strip, si_list))

    # for each station id, load the station
    stations = []
    for sinfo in si_list:
        code = sinfo.split(',')[0]
        mws = MesoWestStation(sinfo, wrf_data)
        for suffix in [ '_1', '_2', '_3', '_4', '_5', '_6', '_7' ]:
            mws.load_station_data(os.path.join(cfg['station_data_dir'], '%s%s.xls' % (code, suffix)))
        stations.append(mws)

    print('Loaded %d stations.' % len(stations))
    
    # check stations for nans
    stations = filter(MesoWestStation.data_ok, stations)
    print('Have %d stations with complete data.' % len(stations))

    # set the measurement variance of the stations
    for s in stations:
        s.set_measurement_variance('fm10', cfg['fm10_meas_var'])

    # build the observation data
    obs_data_fm10 = build_observation_data(stations, 'fm10', wrf_data, tm)

    ### Initialize model and visualization

    # find maximum moisture overall to set up visualization
    maxE = 0.5
    
    # construct initial conditions from timestep 1 (because Ed/Ew at zero are zero)
    E = 0.5 * (Ed[1,:,:] + Ew[1,:,:])
    
    # set up parameters
    Q = np.eye(9) * cfg['Q']
    P0 = np.eye(9) * cfg['P0']
    dt = (tm[1] - tm[0]).seconds
    print("INFO: Computed timestep from WRF is is %g seconds." % dt)
    K = np.zeros_like(E)
    V = np.zeros_like(E)
    mV = np.zeros_like(E)
    predicted_field = np.zeros_like(E)
    mresV = np.zeros_like(E)
    Kf_fn = np.zeros_like(E)
    Vf_fn = np.zeros_like(E)
    mid = np.zeros_like(E)
    Kg = np.zeros((dom_shape[0], dom_shape[1], 9))
    cV12 = np.zeros_like(E)
    
    # moisture state and observation residual variance estimators
    mod_re = OnlineVarianceEstimator(np.zeros_like(E), np.ones_like(E) * 0.05, 1)
    obs_re = OnlineVarianceEstimator(np.zeros((len(stations),)), np.ones(len(stations),) * 0.05, 1)
    
    # initialize the mean field model (default fit is 1.0 of equilibrium before new information comes in)
    mfm = MeanFieldModel(cfg['lock_gamma'])

    # construct model grid using standard fuel parameters
    Tk = np.array([1.0, 10.0, 100.0]) * 3600
    models = np.zeros(dom_shape, dtype = np.object)
    models_na = np.zeros_like(models)
    for p in np.ndindex(dom_shape): 
        models[p] = CellMoistureModel((lat[p], lon[p]), 3, E[p], Tk, P0 = P0)
        models_na[p] = CellMoistureModel((lat[p], lon[p]), 3, E[p], Tk, P0 = P0)

    m = None
    plt.figure(figsize = (12, 8))
    
    ###  Run model for each WRF timestep and assimilate data when available
    for t in range(1, Nt):
        model_time = tm[t]
        print("INFO: time: %s, step: %d" % (str(model_time), t))

        # run the model update
        for p in np.ndindex(dom_shape):
            i, j = p
            models[p].advance_model(Ed[t-1, i, j], Ew[t-1, i, j], rain[t-1, i, j], dt, Q)
            models_na[p].advance_model(Ed[t-1, i, j], Ew[t-1, i, j], rain[t-1, i, j], dt, Q)
            
        # prepare visualization data
        f = np.zeros((dom_shape[0], dom_shape[1], 3))
        f_na = np.zeros((dom_shape[0], dom_shape[1], 3))
        for p in np.ndindex(dom_shape):
            f[p[0], p[1], :] = models[p].get_state()[:3]
            f_na[p[0], p[1], :] = models_na[p].get_state()[:3]
            P = models[p].get_state_covar()
            cV12[p] = P[0,1]
            mV[p] = P[1,1]
            mid[p] = models[p].get_model_ids()[1]

        diagnostics().push("fm10_model_var", (t, np.mean(mV)))

        # run Kriging on each observed fuel type
        Kf = []
        Vf = []
        fn = []
        for obs_data, fuel_ndx in [ (obs_data_fm10, 1) ]:

            # run the kriging subsystem and the Kalman update only if we have observations
            if model_time in obs_data:

                # retrieve observations for current time
                obs_t = obs_data[model_time]


                # fit the current estimation of the moisture field to the data 
                base_field = f[:,:,fuel_ndx]
                mfm.fit_to_data(base_field, obs_data[model_time])
                
                # find differences (residuals) between observed measurements and nearest grid points
                # use this to update observation residual standard deviation 
                obs_vals = np.array([o.get_value() for o in obs_data[model_time]])
                mod_vals = np.array([base_field[o.get_nearest_grid_point()] for o in obs_data[model_time]])
                mod_na_vals = np.array([f_na[:,:,fuel_ndx][o.get_nearest_grid_point()] for o in obs_data[model_time]])
                obs_re.update_with(obs_vals - mod_vals)
                diagnostics().push("obs_residual_var", (t, np.mean(obs_re.get_variance())))
            
                # predict the moisture field using observed fuel type
                predicted_field = mfm.predict_field(base_field)

                # update the model residual estimator and get current best estimate of variance
                mod_re.update_with(f[:,:,fuel_ndx] - predicted_field)
                mresV = mod_re.get_variance()
                diagnostics().push("fm10_model_residual_var", (t, np.mean(mresV)))

                # krige observations to grid points
                Kf_fn, Vf_fn = trend_surface_model_kriging(obs_data[model_time], wrf_data, predicted_field)

                krig_vals = np.array([Kf_fn[o.get_nearest_grid_point()] for o in obs_data[model_time]])                
                diagnostics().push("assim_data", (t, fuel_ndx, obs_vals, krig_vals, mod_vals, mod_na_vals))
                plot_model_snapshot(cfg, tm, t, fuel_ndx, obs_vals, krig_vals, mod_vals, mod_na_vals)

                diagnostics().push("fm10_kriging_var", (t, np.mean(Vf_fn)))

                # append to storage for kriged fields in this time instant
                Kf.append(Kf_fn)
                Vf.append(Vf_fn)
                fn.append(fuel_ndx)


        # if there were any observations, run the kalman update step
        if len(fn) > 0:
            Nobs = len(fn)
            # run the kalman update in each model independently
            # gather the standard deviations of the moisture fuel after the Kalman update
            for p in np.ndindex(dom_shape):
                O = np.zeros((Nobs,))
                V = np.zeros((Nobs, Nobs))
                
                # construct observations for this position
                for i in range(Nobs):
                    O[i] = Kf[i][p]
                    V[i,i] = Vf[i][p]
                
                # execute the Kalman update
                Kp = models[p].kalman_update(O, V, fn)
                Kg[p[0], p[1], :] = Kp[:, 0]

            # push new diagnostic outputs
            diagnostics().push("assim_K0", (t, np.mean(Kg[:,:,0])))
            diagnostics().push("assim_K1", (t, np.mean(Kg[:,:,1])))

        # prepare visualization data        
        f = np.zeros((dom_shape[0], dom_shape[1], 3))
        for p in np.ndindex(dom_shape):
            f[p[0], p[1], :] = models[p].get_state()[:3]
            
        plt.clf()
        plt.subplot(3,3,1)
        render_spatial_field_fast(m, lon, lat, f[:,:,0], '1-hr fuel')
        plt.clim([0.0, maxE])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,2)
        render_spatial_field_fast(m, lon, lat, f[:,:,1], '10-hr fuel')
        plt.clim([0.0, maxE])        
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,3)
        render_spatial_field_fast(m, lon, lat, f_na[:,:,1], '10hr fuel - no assim')
        plt.clim([0.0, maxE])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,4)
        render_spatial_field_fast(m, lon, lat, Kg[:,:,0], 'Kalman gain for 1-hr fuel')  
        plt.clim([0.0, 3.0])        
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,5)
        render_spatial_field_fast(m, lon, lat, Kg[:,:,1], 'Kalman gain for 10-hr fuel')       
        plt.clim([0.0, 1.0])        
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,6)
	render_spatial_field_fast(m, lon, lat, Kf_fn, 'Kriging field')
	plt.clim([0.0, maxE])
        plt.axis('off')
	plt.colorbar()
        plt.subplot(3,3,7)
        render_spatial_field_fast(m, lon, lat, mid, 'Model ids')
        plt.clim([0.0, 5.0])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,8)
        render_spatial_field_fast(m, lon, lat, Vf_fn, 'Kriging variance')
        plt.clim([0.0, np.max(Vf_fn)])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,9)
        render_spatial_field_fast(m, lon, lat, mresV, 'Model res. variance')
        plt.clim([0.0, np.max(mresV)])
        plt.axis('off')
        plt.colorbar()
        
        plt.savefig(os.path.join(cfg['output_dir'], 'moisture_model_t%03d.png' % t))


    # store the diagnostics in a binary file
    diagnostics().dump_store(os.path.join(cfg['output_dir'], 'diagnostics.bin'))
    
    # make a plot of gammas
    plt.figure()
    plt.plot(diagnostics().pull('mfm_gamma'), 'bo-')
    plt.title('Mean field model - gamma')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_gamma.png'))

    plt.figure()
    plt.plot(diagnostics().pull('skdm_cov_cond'))
    plt.title('Condition number of covariance matrix')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_sigma_cond.png'))

    # make a plot for each substation
    plt.figure()
    D = diagnostics().pull("assim_data")
    for i in range(len(stations)):
        plt.clf()
        # get data for the i-th station
        t_i = [ o[0] for o in D]
        obs_i = [ o[2][i] for o in D]
        krig_i = [ o[3][i] for o in D]
        mod_i = [ o[4][i] for o in D]
        mod_na_i = [ o[5][i] for o in D]
        mx = max(max(obs_i), max(mod_i), max(krig_i), max(mod_i))
        plt.plot(t_i, obs_i, 'ro')
        plt.plot(t_i, krig_i, 'bo-')
        plt.plot(t_i, mod_i, 'kx-', linewidth = 1.5)
        plt.plot(t_i, mod_na_i, 'mx-')
        plt.ylim([0.0, 1.1 * mx])
        plt.legend(['Obs.', 'Kriged', 'Model', 'NoAssim'])
        plt.title('Station observations fit to model and kriging field')
        plt.savefig(os.path.join(cfg['output_dir'], 'station%02d.png' % (i+1)))
        
    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("assim_K1")],
             [d[1] for d in diagnostics().pull("assim_K1")], 'ro-')
    plt.title('Average Kalman gain')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_kalman_gain_10hr.png'))
    
    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("assim_K0")],
             [d[1] for d in diagnostics().pull("assim_K0")], 'ro-')
    plt.title('Average Kalman gain')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_kalman_gain_1hr.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("fm10_model_var")],
             [d[1] for d in diagnostics().pull("fm10_model_var")], 'ro-')
    plt.title('Average fm10 model variance')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_fm10_model_variance.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("fm10_model_residual_var")],
             [d[1] for d in diagnostics().pull("fm10_model_residual_var")], 'ro-')
    plt.title('Average fm10 model residual variance')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_fm10_model_residual_variance.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("fm10_kriging_var")],
             [d[1] for d in diagnostics().pull("fm10_kriging_var")], 'ro-')
    plt.title('Kriging field variance')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_kriging_variance.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("obs_residual_var")],
             [d[1] for d in diagnostics().pull("obs_residual_var")], 'ro-')
    plt.title('Observation residual variance')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_observation_residual_variance.png'))
    
    plt.figure()
    plt.plot(diagnostics().pull("mfm_mape"), 'ro-', linewidth = 2)
    plt.title('Mean absolute prediction error of station data')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_station_mape.png'))
def run_module():

    # configure diagnostics
    init_diagnostics("results/kriging_test_diagnostics.txt")
    diagnostics().configure_tag("skdm_obs_res", True, True, True)
    diagnostics().configure_tag("skdm_obs_res_mean", True, True, True)

    wrf_data = WRFModelData(
        '../real_data/witch_creek/realfire03_d04_20071022.nc')

    # read in vars
    lat, lon = wrf_data.get_lats(), wrf_data.get_lons()
    tm = wrf_data.get_times()
    rain = wrf_data['RAINNC']
    Ed, Ew = wrf_data.get_moisture_equilibria()

    # obtain sizes
    Nt = rain.shape[0]
    dom_shape = lat.shape
    locs = np.prod(dom_shape)

    # load station data, match to grid points and build observation records
    # load station data from files
    tz = pytz.timezone('US/Pacific')
    stations = [
        Station(os.path.join(station_data_dir, s), tz, wrf_data)
        for s in station_list
    ]
    obs_data = build_observation_data(stations, 'fuel_moisture', wrf_data)

    # construct initial vector
    mfm = MeanFieldModel()

    # set up parameters
    mod_res_std = np.ones_like(Ed[0, :, :]) * 0.05
    obs_res_std = np.ones((len(stations), )) * 0.1

    # construct a basemap representation of the area
    lat_rng = (np.min(lat), np.max(lat))
    lon_rng = (np.min(lon), np.max(lon))
    m = Basemap(llcrnrlon=lon_rng[0],
                llcrnrlat=lat_rng[0],
                urcrnrlon=lon_rng[1],
                urcrnrlat=lat_rng[1],
                projection='mill')

    plt.figure(figsize=(10, 6))

    # run model
    ndx = 1
    for t in range(1, Nt):
        model_time = wrf_data.get_times()[t]
        E = 0.5 * (Ed[t, :, :] + Ew[t, :, :])

        # if we have an observation somewhere in time, run kriging
        if model_time in obs_data:
            print("Time: %s, step: %d" % (str(model_time), t))

            mfm.fit_to_data(E, obs_data[model_time])
            Efit = mfm.predict_field(E)

            # krige data to observations
            K, V = simple_kriging_data_to_model(obs_data[model_time],
                                                obs_res_std, Efit, wrf_data,
                                                mod_res_std, t)

            plt.clf()
            plt.subplot(2, 2, 1)
            render_spatial_field(m, lon, lat, Efit, 'Equilibrium')
            plt.clim([0.0, 0.2])
            plt.colorbar()

            plt.subplot(2, 2, 2)
            render_spatial_field(m, lon, lat, K, 'Kriging field')
            plt.clim([0.0, 0.2])
            plt.colorbar()

            plt.subplot(2, 2, 3)
            render_spatial_field(m, lon, lat, V, 'Kriging variance')
            plt.clim([0.0, np.max(V)])
            plt.colorbar()

            plt.subplot(2, 2, 4)
            render_spatial_field(m, lon, lat, K - Efit,
                                 'Kriging vs. mean field residuals')
            #            plt.clim([0.0, np.max()])
            plt.colorbar()

            plt.savefig('model_outputs/kriging_test_t%03d.png' % (ndx))
            ndx += 1
Example #5
0
def run_module():

    # read in configuration file to execute run
    print("Reading configuration from [%s]" % sys.argv[1])

    with open(sys.argv[1]) as f:
        cfg = eval(f.read())

    # ensure output path exists
    if not os.path.isdir(cfg['output_dir']):
        os.mkdir(cfg['output_dir'])

    # configure diagnostics
    init_diagnostics(
        os.path.join(cfg['output_dir'], 'moisture_model_v1_diagnostics.txt'))

    # Trend surface model diagnostics
    diagnostics().configure_tag("kriging_cov_cond", True, True, True)
    diagnostics().configure_tag("s2_eta_hat", True, True, True)
    diagnostics().configure_tag("kriging_rmse", True, True, True)
    diagnostics().configure_tag("kriging_beta", True, True, True)
    diagnostics().configure_tag("kriging_iters", False, True, True)
    diagnostics().configure_tag("kriging_subzero_s2_estimates", False, True,
                                True)
    diagnostics().configure_tag("fm10_kriging_var", True, True, True)

    diagnostics().configure_tag("f0_summary", True, True, True)
    diagnostics().configure_tag("f1_summary", True, True, True)
    diagnostics().configure_tag("f2_summary", True, True, True)
    diagnostics().configure_tag("f3_summary", True, True, True)

    # Assimilation parameters
    diagnostics().configure_tag("K0_summary", True, True, True)
    diagnostics().configure_tag("K1_summary", True, True, True)
    diagnostics().configure_tag("K2_summary", True, True, True)
    diagnostics().configure_tag("K3_summary", True, True, True)
    diagnostics().configure_tag("assim_info", False, False, True)

    # Model forecast, analysis and non-assimilated model: state, covariance, errors
    diagnostics().configure_tag("fm10f_rmse", True, True, True)
    diagnostics().configure_tag("fm10na_rmse", True, True, True)

    # all simulation times and all assimilation times (subset)
    diagnostics().configure_tag("mta", False, True, True)
    diagnostics().configure_tag("mt", False, True, True)

    # observation values and their nearest grid points
    diagnostics().configure_tag("obs_vals", False, True, True)
    diagnostics().configure_tag("obs_ngp", False, True, True)

    # in test mode, we will emit observations at the target station
    # our predictions, the nearest grid point and the test station id
    diagnostics().configure_tag("test_obs", True, True, True)
    diagnostics().configure_tag("test_pred", True, True, True)
    diagnostics().configure_tag("test_ngp", True, True, True)
    diagnostics().configure_tag("test_station_id", True, True, True)

    ### Load and preprocess WRF model data

    # load WRF data
    wrf_data = WRFModelData(cfg['wrf_output'],
                            ['T2', 'Q2', 'PSFC', 'RAINNC', 'RAINC', 'HGT'])
    wrf_data.slice_field('HGT')

    # read in spatial and temporal extent of WRF variables
    lat, lon = wrf_data.get_lats(), wrf_data.get_lons()
    hgt = wrf_data['HGT']
    tm = wrf_data.get_gmt_times()
    Nt = cfg['Nt'] if cfg.has_key('Nt') and cfg['Nt'] is not None else len(tm)
    dom_shape = lat.shape
    print('INFO: domain size is %d x %d grid points.' % dom_shape)
    print('INFO: domain extent is lats (%g to %g) lons (%g to %g).' %
          (np.amin(lat), np.amax(lat), np.amin(lon), np.amax(lon)))

    # if writing is requested, open output file and set up dimensions
    if cfg['write_fields'] not in ['all', 'fmc_gc', 'none']:
        error('FATAL: write_fields must be one of all, fmc_gc or none.')
    if cfg['write_fields'] == 'none':
        cfg['write_fields'] = False
    out_file = None
    ncfmc_gc, ncfm10a, ncfm10aV, ncfm10f, cnfm10fV, ncfm10na = None, None, None, None, None, None
    nctsmV, ncKg = None, None
    if cfg['write_fields']:
        out_file = netCDF4.Dataset(cfg['output_dir'] + '/fields.nc', 'w')
        out_file.createDimension('Time', None)
        out_file.createDimension('fuel_moisture_classes_stag', 5)
        out_file.createDimension('south_north', dom_shape[0])
        out_file.createDimension('west_east', dom_shape[1])
        ncfmc_gc = out_file.createVariable(
            'FMC_GC', 'f4',
            ('Time', 'fuel_moisture_classes_stag', 'south_north', 'west_east'))
        if cfg['write_fields'] == 'all':
            ncfm10a = out_file.createVariable(
                'fm10a', 'f4', ('Time', 'south_north', 'west_east'))
            ncfm10aV = out_file.createVariable(
                'fm10a_var', 'f4', ('Time', 'south_north', 'west_east'))
            ncfm10na = out_file.createVariable(
                'fm10na', 'f4', ('Time', 'south_north', 'west_east'))
            ncfm10f = out_file.createVariable(
                'fm10f', 'f4', ('Time', 'south_north', 'west_east'))
            ncfm10fV = out_file.createVariable(
                'fm10f_var', 'f4', ('Time', 'south_north', 'west_east'))
            nctsmV = out_file.createVariable(
                'tsm_var', 'f4', ('Time', 'south_north', 'west_east'))
            ncKg = out_file.createVariable(
                'kalman_gain', 'f4', ('Time', 'south_north', 'west_east'))
            print('INFO: opened fields.nc for writing ALL output fields.')
        else:
            print('INFO: opened field.nc for writing FMC_GC only.')

    test_mode = (cfg['run_mode'] == 'test')
    tgt_station = None
    if cfg['run_mode'] == 'test':
        print('INFO: running in TEST mode! Will perform leave-one-out tesing.')
        tgt_station_id = cfg['target_station_id']
        diagnostics().push('test_station_id', tgt_station_id)
    elif cfg['run_mode'] == 'production':
        print(
            'INFO: running in PRODUCTION mode! Using all observation stations.'
        )
    else:
        error('FATAL: invalid run mode! Must be "test" or "production".')

    # determine simulation times
    tm_start = parse_datetime(
        cfg['start_time']) if cfg['start_time'] is not None else tm[0]
    tm_end = parse_datetime(
        cfg['end_time']) if cfg['end_time'] is not None else tm[-1]

    # if the required start time or end time are outside the simulation domain, exit with an error
    if tm_start < tm[0] or tm_end > tm[-1]:
        print('FATAL: invalid time range, required [%s-%s], availble [%s-%s]' %
              (str(tm_start), str(tm_end), str(tm[0]), str(tm[-1])))
        sys.exit(2)

    print('INFO: time limits are %s to %s\nINFO: simulation is from %s to %s' %
          (str(tm_start), str(tm_end), str(tm[0]), str(tm[-1])))

    # retrieve dynamic covariates and remove mean at each time point for T2 and PSFC
    T2 = wrf_data['T2']
    #T2 -= np.mean(np.mean(T2,axis=0),axis=0)[np.newaxis,np.newaxis,:]

    PSFC = wrf_data['PSFC']
    #PSFC -= np.mean(np.mean(PSFC,axis=0),axis=0)[np.newaxis,np.newaxis,:]

    # numerical fix - if it rains at an intensity of less than 0.001 per hour, set rain to zero
    # also, use log(rain + 1) to prevent wild trend surface model predictions when stations see little rain
    # but elsewhere there is too much rain
    # without this, numerical errors in trend surface model may pop up
    rain = wrf_data['RAIN']
    #rain[rain < 0.01] = 0.0
    rain = np.log(rain + 1.0)

    # moisture equilibria are now computed from averaged Q,P,T at beginning and end of period
    Ed, Ew = wrf_data.get_moisture_equilibria()

    ### Load observation data from the stations

    # compute the diagonal distance between grid points
    grid_dist_km = great_circle_distance(lon[0, 0], lat[0, 0], lon[1, 1],
                                         lat[1, 1])
    print('INFO: diagonal distance in grid is %g' % grid_dist_km)

    # load station data from files
    with open(cfg['station_list_file'], 'r') as f:
        si_list = f.read().split('\n')

    si_list = filter(lambda x: len(x) > 0 and x[0] != '#',
                     map(string.strip, si_list))

    # for each station id, load the station
    stations = []
    for code in si_list:
        mws = MesoWestStation(code)
        mws.load_station_info(
            os.path.join(cfg["station_info_dir"], "%s.info" % code))
        mws.register_to_grid(wrf_data)
        if mws.get_dist_to_grid() < grid_dist_km / 2.0:
            print(
                'Station %s: lat %g lon %g nearest grid pt %s lat %g lon %g dist_to_grid %g'
                % (code, mws.lat, mws.lon, str(mws.grid_pt), lat[mws.grid_pt],
                   lon[mws.grid_pt], mws.dist_grid_pt))
            mws.load_station_data(
                os.path.join(cfg["station_data_dir"], "%s.obs" % code))
            if test_mode and mws.get_id() == tgt_station_id:
                tgt_station = mws
                print(
                    'INFO: in test mode, targeting station %s (removed from data pool).'
                    % tgt_station_id)
                diagnostics().push("test_ngp", mws.get_nearest_grid_point())
            else:
                stations.append(mws)

    print('Loaded %d stations (discarded %d stations, too far from grid).' %
          (len(stations), len(si_list) - len(stations)))

    if test_mode and tgt_station is None:
        error(
            'FATAL: in test mode, a station was removed that was not among accepted stations.'
        )

    # build the observation data
    obs_data_fm10 = build_observation_data(stations, 'FM')

    # build target data if in test mode
    tgt_obs_fm10 = None
    test_ngp = None
    if test_mode:
        test_ngp = tgt_station.get_nearest_grid_point()
        tgt_obs_fm10 = build_observation_data([tgt_station], 'FM')

    ### Initialize model and visualization

    # construct initial conditions from timestep 0
    E = 0.5 * (Ed[0, :, :] + Ew[0, :, :])

    # set up parameters
    Nk = 4  # we simulate 4 types of fuel
    Q = np.diag(cfg['Q'])
    P0 = np.diag(cfg['P0'])
    Tk = np.array([1.0, 10.0, 100.0, 1000.0]) * 3600
    dt = (tm[1] - tm[0]).seconds
    print("INFO: Computed timestep from WRF is is %g seconds." % dt)
    mresV = np.zeros_like(E)
    mid = np.zeros_like(E)
    Kg = np.zeros((dom_shape[0], dom_shape[1], len(Tk) + 2))

    # preprocess all static covariates
    cov_ids = cfg['covariates']
    Xd3 = len(cov_ids) + 1
    X = np.zeros((dom_shape[0], dom_shape[1], Xd3))
    Xr = np.zeros((dom_shape[0], dom_shape[1], Xd3))
    static_covar_map = {
        "lon": lon - np.mean(lon),
        "lat": lat - np.mean(lat),
        "elevation": hgt - np.mean(hgt),
        "constant": np.ones(dom_shape)
    }
    dynamic_covar_map = {"temperature": T2, "pressure": PSFC, "rain": rain}

    for i in range(1, Xd3):
        cov_id = cov_ids[i - 1]
        if cov_id in static_covar_map:
            print('INFO: found static covariate %s' % cov_id)
            Xr[:, :, i] = static_covar_map[cov_id]
        elif cov_id in dynamic_covar_map:
            print('INFO: found dynamic covariate %s' % cov_id)
        else:
            print('FATAL: unknown covariate %s encountered' % cov_id)
            sys.exit(2)

    print("INFO: there are %d covariates (including model state)" % Xd3)

    # retrieve assimilation time window
    assim_time_win = cfg['assimilation_time_window']

    print('GMM init: equilibrium (%g,%g,%g) and at 86,205 %g' %
          (np.amin(E), np.mean(E), np.amax(E), E[86, 205]))

    models = GridMoistureModel(
        E[:, :, np.newaxis][:, :, np.zeros((4, ), dtype=np.int)], Tk, P0)
    models_na = GridMoistureModel(
        E[:, :, np.newaxis][:, :, np.zeros((4, ), dtype=np.int)], Tk, P0)

    ###  Run model for each WRF timestep and assimilate data when available
    t_start, t_end = 1, len(tm) - 1
    while tm_start > tm[t_start]:
        t_start += 1
    while tm_end < tm[t_end]:
        t_end -= 1

    # the first FMC_GC value gets filled out with equilibria
    if cfg['write_fields']:
        for i in range(Nk):
            ncfmc_gc[0, i, :, :] = E

    print('INFO: running simulation from %s (%d) to %s (%d).' %
          (str(tm[t_start]), t_start, str(tm[t_end]), t_end))
    for t in range(t_start, t_end + 1):
        model_time = tm[t]
        print("INFO: time: %s, step: %d" % (str(model_time), t))

        diagnostics().push("mt", model_time)

        models_na.advance_model(Ed[t - 1, :, :], Ew[t - 1, :, :],
                                rain[t - 1, :, :], dt, Q)
        models.advance_model(Ed[t - 1, :, :], Ew[t - 1, :, :],
                             rain[t - 1, :, :], dt, Q)

        # extract fuel moisture contents [make a fresh copy every iteration!]
        f = models.get_state().copy()
        f_na = models_na.get_state().copy()

        # push 10-hr fuel state & variance of forecast
        if cfg['write_fields'] == 'all':
            ncfm10f[t, :, :] = models.get_state()[:, :, 1]
            ncfm10fV[t, :, :] = models.P[:, :, 1, 1]
            ncfm10na[t, :, :] = models_na.get_state()[:, :, 1]

        # examine the assimilated fields (if assimilation is activated)
        for i in range(4):
            diagnostics().push("f%d_summary" % i, (t, np.amin(
                f[:, :, i]), np.mean(f[:, :, i]), np.amax(f[:, :, i])))
            if np.any(f[:, :, i] < 0.0):
                print(
                    "WARN: in field %d there were %d negative moisture values !"
                    % (i, np.count_nonzero(f[:, :, i] < 0.0)))
                ind = np.unravel_index(np.argmin(f[:, :, i]), f.shape[:2])
                print(models.P[ind[0], ind[1], :, :])
                print("Full model state at position %d,%d:" % (ind[0], ind[1]))
                print(models.m_ext[ind[0], ind[1], :])
            if np.any(f[:, :, i] > 2.5):
                print(
                    "WARN: in field %d there were %d moisture values above 2.5!"
                    % (i, np.count_nonzero(f[:, :, i] > 2.5)))
                ind = np.unravel_index(np.argmax(f[:, :, i]), f.shape[:2])
                print(models.P[ind[0], ind[1], :, :])
                print("Full model state at position %d,%d:" % (ind[0], ind[1]))
                print(models.m_ext[ind[0], ind[1], :])

        if cfg['assimilate']:

            # run Kriging on each observed fuel type
            Kfs, Vfs, fns = [], [], []
            for obs_data, fuel_ndx in [(obs_data_fm10, 1)]:

                # run the kriging subsystem and the Kalman update only if have valid observations
                valid_times = [
                    z for z in obs_data.keys()
                    if abs(total_seconds(z - model_time)) < assim_time_win /
                    2.0
                ]
                print(
                    'INFO: there are %d valid times at model time %s for fuel index %d'
                    % (len(valid_times), str(model_time), fuel_ndx))
                if len(valid_times) > 0:

                    # add model time as time when assimilation occurred
                    diagnostics().push("mta", model_time)

                    # retrieve observations for current time
                    obs_valid_now = []
                    for z in valid_times:
                        obs_valid_now.extend(obs_data[z])

                    print(
                        'INFO: model time %s, assimilating %d observations.' %
                        (str(model_time), len(obs_valid_now)))

                    # construct covariates for this time instant
                    X[:, :, 0] = f[:, :, fuel_ndx]
                    for i in range(1, Xd3):
                        cov_id = cov_ids[i - 1]
                        if cov_id in static_covar_map:
                            X[:, :, i] = Xr[:, :, i]
                        elif cov_id in dynamic_covar_map:
                            F = dynamic_covar_map[cov_id]
                            X[:, :, i] = F[t, :, :]
                        else:
                            error('FATAL: found unknown covariate %s' % cov_id)

                    # find differences (residuals) between observed measurements and nearest grid points
                    obs_vals = [o.get_value() for o in obs_valid_now]
                    obs_ngp = [
                        o.get_nearest_grid_point() for o in obs_valid_now
                    ]
                    diagnostics().push("obs_vals", obs_vals)
                    diagnostics().push("obs_ngp", obs_ngp)

                    mod_vals = np.array(
                        [f[i, j, fuel_ndx] for i, j in obs_ngp])
                    mod_na_vals = np.array(
                        [f_na[i, j, fuel_ndx] for i, j in obs_ngp])
                    diagnostics().push("fm10f_rmse",
                                       np.mean((obs_vals - mod_vals)**2)**0.5)
                    diagnostics().push(
                        "fm10na_rmse",
                        np.mean((obs_vals - mod_na_vals)**2)**0.5)

                    # krige observations to grid points
                    Kf_fn, Vf_fn = fit_tsm(obs_valid_now, X)
                    if np.count_nonzero(Kf_fn > 2.5) > 0:
                        rain_t = dynamic_covar_map['rain'][t, :, :]
                        print(
                            'WARN: in TSM found %d values over 2.5, %d of those had rain, clamped to 2.5'
                            % (np.count_nonzero(Kf_fn > 2.5),
                               np.count_nonzero(
                                   np.logical_and(Kf_fn > 2.5, rain_t > 0.0))))
                        Kf_fn[Kf_fn > 2.5] = 2.5
                    if np.count_nonzero(Kf_fn < 0.0) > 0:
                        print(
                            'WARN: in TSM found %d values under 0.0, clamped to 0.0'
                            % np.count_nonzero(Kf_fn < 0.0))
                        Kf_fn[Kf_fn < 0.0] = 0.0

                    krig_vals = np.array([Kf_fn[ngp] for ngp in obs_ngp])
                    diagnostics().push("assim_info",
                                       (t, fuel_ndx, obs_vals, krig_vals,
                                        mod_vals, mod_na_vals))
                    diagnostics().push("fm10_kriging_var", (t, np.mean(Vf_fn)))

                    if cfg['write_fields'] == 'all':
                        nctsmV[t, :, :] = Vf_fn

                    # append to storage for kriged fields in this time instant
                    Kfs.append(Kf_fn)
                    Vfs.append(Vf_fn)
                    fns.append(fuel_ndx)

            # if there were any observations, run the kalman update step
            if len(fns) > 0:
                NobsClasses = len(fns)

                O = np.zeros((dom_shape[0], dom_shape[1], NobsClasses))
                V = np.zeros(
                    (dom_shape[0], dom_shape[1], NobsClasses, NobsClasses))

                for i in range(NobsClasses):
                    O[:, :, i] = Kfs[i]
                    V[:, :, i, i] = Vfs[i]

                # execute the Kalman update
                if len(fns) == 1:
                    models.kalman_update_single2(O, V, fns[0], Kg)
                else:
                    models.kalman_update(O, V, fns, Kg)

                # push new diagnostic outputs
                if cfg['write_fields'] == 'all':
                    ncKg[t, :, :] = Kg[:, :, 1]

                for i in range(4):
                    diagnostics().push(
                        "K%d_summary" % i,
                        (t, np.amin(Kg[:, :, i]), np.mean(
                            Kg[:, :, i]), np.amax(Kg[:, :, i])))
                    if np.any(models.get_state()[:, :, i] < 0.0):
                        print(
                            "WARN: in field %d there were %d negative moisture values !"
                            % (i,
                               np.count_nonzero(
                                   models.get_state()[:, :, i] < 0.0)))
                        ind = np.unravel_index(
                            np.argmin(models.get_state()[:, :, i]),
                            models.get_state().shape[:2])
                        print(models.P[ind[0], ind[1], :, :])
                        print(
                            "TSM input at given position: value %g variance %g"
                            % (O[ind[0], ind[1]], V[ind[0], ind[1]]))
                        print("Model state at given position:")
                        print(models.m_ext[ind[0], ind[1], :])

            # store post-assimilation (or forecast depending on whether observations were available) FM-10 state and variance
            if cfg['write_fields'] == 'all':
                ncfm10a[t, :, :] = models.get_state()[:, :, 1]
                ncfm10aV[t, :, :] = models.P[:, :, 1, 1]

            # we don't care if we assimilated or not, we always check our error on target station if in test mode
            if test_mode:
                valid_times = [
                    z for z in tgt_obs_fm10.keys()
                    if abs(total_seconds(z - model_time)) < assim_time_win /
                    2.0
                ]
                tgt_i, tgt_j = test_ngp
                diagnostics().push("test_pred", f[tgt_i, tgt_j, 1])
                if len(valid_times) > 0:
                    # this is our target observation [FIXME: this disregards multiple observations if multiple happen to be valid]
                    tgt_obs = tgt_obs_fm10[valid_times[0]][0]
                    obs = tgt_obs.get_value()
                    diagnostics().push("test_obs", obs)
                else:
                    diagnostics().push("test_obs", np.nan)

            # store data in wrf_file variable FMC_G
            if cfg['write_fields']:
                ncfmc_gc[t, :Nk, :, :] = np.transpose(
                    models.get_state()[:, :, :Nk], axes=[2, 0, 1])

        # store the diagnostics in a binary file when done
    diagnostics().dump_store(os.path.join(cfg['output_dir'],
                                          'diagnostics.bin'))

    # close the netCDF file (relevant if we did write into FMC_GC)
    if out_file is not None:
        out_file.close()
Example #6
0
def run_module():

    # read in configuration file to execute run
    print("Reading configuration from [%s]" % sys.argv[1])

    with open(sys.argv[1]) as f:
        cfg = eval(f.read())

    # ensure output path exists
    if not os.path.isdir(cfg['output_dir']):
        os.mkdir(cfg['output_dir'])

    # configure diagnostics
    init_diagnostics(os.path.join(cfg['output_dir'], 'moisture_model_v1_diagnostics.txt'))

    # Trend surface model diagnostics
    diagnostics().configure_tag("kriging_cov_cond", True, True, True)
    diagnostics().configure_tag("s2_eta_hat", True, True, True)
    diagnostics().configure_tag("kriging_rmse", True, True, True)
    diagnostics().configure_tag("kriging_beta", True, True, True)
    diagnostics().configure_tag("kriging_iters", False, True, True)
    diagnostics().configure_tag("kriging_subzero_s2_estimates", False, True, True)
    diagnostics().configure_tag("fm10_kriging_var", True, True, True)

    diagnostics().configure_tag("f0_summary", True, True, True)
    diagnostics().configure_tag("f1_summary", True, True, True)
    diagnostics().configure_tag("f2_summary", True, True, True)
    diagnostics().configure_tag("f3_summary", True, True, True)

    # Assimilation parameters
    diagnostics().configure_tag("K0_summary", True, True, True)
    diagnostics().configure_tag("K1_summary", True, True, True)
    diagnostics().configure_tag("K2_summary", True, True, True)
    diagnostics().configure_tag("K3_summary", True, True, True)
    diagnostics().configure_tag("assim_info", False, False, True)

    # Model forecast, analysis and non-assimilated model: state, covariance, errors
    diagnostics().configure_tag("fm10f_rmse", True, True, True)
    diagnostics().configure_tag("fm10na_rmse", True, True, True)
    
    # all simulation times and all assimilation times (subset)
    diagnostics().configure_tag("mta", False, True, True)
    diagnostics().configure_tag("mt", False, True, True)

    # observation values and their nearest grid points
    diagnostics().configure_tag("obs_vals", False, True, True)
    diagnostics().configure_tag("obs_ngp", False, True, True)

    # in test mode, we will emit observations at the target station
    # our predictions, the nearest grid point and the test station id
    diagnostics().configure_tag("test_obs", True, True, True)
    diagnostics().configure_tag("test_pred", True, True, True)
    diagnostics().configure_tag("test_ngp", True, True, True)
    diagnostics().configure_tag("test_station_id", True, True, True)

    ### Load and preprocess WRF model data

    # load WRF data
    wrf_data = WRFModelData(cfg['wrf_output'],  ['T2', 'Q2', 'PSFC', 'RAINNC', 'RAINC', 'HGT'])
    wrf_data.slice_field('HGT')

    # read in spatial and temporal extent of WRF variables
    lat, lon = wrf_data.get_lats(), wrf_data.get_lons()
    hgt = wrf_data['HGT']
    tm = wrf_data.get_gmt_times()
    Nt = cfg['Nt'] if cfg.has_key('Nt') and cfg['Nt'] is not None else len(tm)
    dom_shape = lat.shape
    print('INFO: domain size is %d x %d grid points.' % dom_shape)
    print('INFO: domain extent is lats (%g to %g) lons (%g to %g).' % (np.amin(lat),np.amax(lat),np.amin(lon),np.amax(lon)))

    # if writing is requested, open output file and set up dimensions 
    if cfg['write_fields'] not in [ 'all', 'fmc_gc', 'none']:
        error('FATAL: write_fields must be one of all, fmc_gc or none.')
    if cfg['write_fields'] == 'none':
      cfg['write_fields'] = False
    out_file = None
    ncfmc_gc, ncfm10a, ncfm10aV, ncfm10f, cnfm10fV, ncfm10na = None, None, None, None, None, None
    nctsmV, ncKg = None, None
    if cfg['write_fields']:
        out_file = netCDF4.Dataset(cfg['output_dir'] + '/fields.nc', 'w')
        out_file.createDimension('Time', None)
        out_file.createDimension('fuel_moisture_classes_stag', 5)
        out_file.createDimension('south_north', dom_shape[0])
        out_file.createDimension('west_east', dom_shape[1])
        ncfmc_gc = out_file.createVariable('FMC_GC', 'f4', ('Time', 'fuel_moisture_classes_stag', 'south_north', 'west_east'))
        if cfg['write_fields'] == 'all':
            ncfm10a = out_file.createVariable('fm10a', 'f4', ('Time', 'south_north', 'west_east'))
            ncfm10aV = out_file.createVariable('fm10a_var', 'f4', ('Time', 'south_north', 'west_east'))
            ncfm10na = out_file.createVariable('fm10na', 'f4', ('Time', 'south_north', 'west_east'))
            ncfm10f = out_file.createVariable('fm10f', 'f4', ('Time', 'south_north', 'west_east'))
            ncfm10fV = out_file.createVariable('fm10f_var', 'f4', ('Time', 'south_north', 'west_east'))
            nctsmV = out_file.createVariable('tsm_var', 'f4', ('Time', 'south_north', 'west_east'))
            ncKg = out_file.createVariable('kalman_gain', 'f4', ('Time', 'south_north', 'west_east'))
            print('INFO: opened fields.nc for writing ALL output fields.')
        else:
            print('INFO: opened field.nc for writing FMC_GC only.')

    test_mode = (cfg['run_mode'] == 'test')
    tgt_station = None
    if cfg['run_mode'] == 'test':
      print('INFO: running in TEST mode! Will perform leave-one-out tesing.')
      tgt_station_id = cfg['target_station_id']
      diagnostics().push('test_station_id', tgt_station_id)
    elif cfg['run_mode'] == 'production':
      print('INFO: running in PRODUCTION mode! Using all observation stations.')
    else:
      error('FATAL: invalid run mode! Must be "test" or "production".')

    # determine simulation times
    tm_start = parse_datetime(cfg['start_time']) if cfg['start_time'] is not None else tm[0]
    tm_end = parse_datetime(cfg['end_time']) if cfg['end_time'] is not None else tm[-1]

    # if the required start time or end time are outside the simulation domain, exit with an error
    if tm_start < tm[0] or tm_end > tm[-1]:
        print('FATAL: invalid time range, required [%s-%s], availble [%s-%s]' %
              (str(tm_start), str(tm_end), str(tm[0]), str(tm[-1])))
        sys.exit(2)

    print('INFO: time limits are %s to %s\nINFO: simulation is from %s to %s' %
          (str(tm_start), str(tm_end), str(tm[0]), str(tm[-1])))

    # retrieve dynamic covariates and remove mean at each time point for T2 and PSFC
    T2 = wrf_data['T2']
    #T2 -= np.mean(np.mean(T2,axis=0),axis=0)[np.newaxis,np.newaxis,:]

    PSFC = wrf_data['PSFC']
    #PSFC -= np.mean(np.mean(PSFC,axis=0),axis=0)[np.newaxis,np.newaxis,:]

    # numerical fix - if it rains at an intensity of less than 0.001 per hour, set rain to zero
    # also, use log(rain + 1) to prevent wild trend surface model predictions when stations see little rain
    # but elsewhere there is too much rain
    # without this, numerical errors in trend surface model may pop up
    rain = wrf_data['RAIN']
    #rain[rain < 0.01] = 0.0
    rain = np.log(rain + 1.0)

    # moisture equilibria are now computed from averaged Q,P,T at beginning and end of period
    Ed, Ew = wrf_data.get_moisture_equilibria()

    ### Load observation data from the stations

    # compute the diagonal distance between grid points
    grid_dist_km = great_circle_distance(lon[0,0], lat[0,0], lon[1,1], lat[1,1])
    print('INFO: diagonal distance in grid is %g' % grid_dist_km)

    # load station data from files
    with open(cfg['station_list_file'], 'r') as f:
        si_list = f.read().split('\n')

    si_list = filter(lambda x: len(x) > 0 and x[0] != '#', map(string.strip, si_list))

    # for each station id, load the station
    stations = []
    for code in si_list:
        mws = MesoWestStation(code)
        mws.load_station_info(os.path.join(cfg["station_info_dir"], "%s.info" % code))
        mws.register_to_grid(wrf_data)
        if mws.get_dist_to_grid() < grid_dist_km / 2.0:
            print('Station %s: lat %g lon %g nearest grid pt %s lat %g lon %g dist_to_grid %g' %
               (code, mws.lat, mws.lon, str(mws.grid_pt), lat[mws.grid_pt], lon[mws.grid_pt], mws.dist_grid_pt))
            mws.load_station_data(os.path.join(cfg["station_data_dir"], "%s.obs" % code))
            if test_mode and mws.get_id() == tgt_station_id:
                tgt_station = mws
                print('INFO: in test mode, targeting station %s (removed from data pool).' % tgt_station_id)
                diagnostics().push("test_ngp", mws.get_nearest_grid_point())
            else:
                stations.append(mws)

    print('Loaded %d stations (discarded %d stations, too far from grid).' % (len(stations), len(si_list) - len(stations)))

    if test_mode and tgt_station is None:
      error('FATAL: in test mode, a station was removed that was not among accepted stations.')

    # build the observation data
    obs_data_fm10 = build_observation_data(stations, 'FM')

    # build target data if in test mode
    tgt_obs_fm10 = None
    test_ngp = None
    if test_mode:
      test_ngp = tgt_station.get_nearest_grid_point()
      tgt_obs_fm10 = build_observation_data([tgt_station], 'FM')

    ### Initialize model and visualization

    # construct initial conditions from timestep 0
    E = 0.5 * (Ed[0,:,:] + Ew[0,:,:])

    # set up parameters
    Nk = 4  # we simulate 4 types of fuel
    Q = np.diag(cfg['Q'])
    P0 = np.diag(cfg['P0'])
    Tk = np.array([1.0, 10.0, 100.0, 1000.0]) * 3600
    dt = (tm[1] - tm[0]).seconds
    print("INFO: Computed timestep from WRF is is %g seconds." % dt)
    mresV = np.zeros_like(E)
    mid = np.zeros_like(E)
    Kg = np.zeros((dom_shape[0], dom_shape[1], len(Tk)+2))

    # preprocess all static covariates
    cov_ids = cfg['covariates']
    Xd3 = len(cov_ids) + 1
    X = np.zeros((dom_shape[0], dom_shape[1], Xd3))
    Xr = np.zeros((dom_shape[0], dom_shape[1], Xd3))
    static_covar_map = { "lon" : lon - np.mean(lon), "lat" : lat - np.mean(lat), "elevation" : hgt - np.mean(hgt), "constant" : np.ones(dom_shape) }
    dynamic_covar_map = { "temperature" : T2, "pressure" : PSFC, "rain" : rain }

    for i in range(1, Xd3):
        cov_id = cov_ids[i-1]
        if cov_id in static_covar_map:
          print('INFO: found static covariate %s' % cov_id)
          Xr[:, :, i] = static_covar_map[cov_id]
        elif cov_id in dynamic_covar_map:
          print('INFO: found dynamic covariate %s' % cov_id)
        else:
          print('FATAL: unknown covariate %s encountered' % cov_id)
          sys.exit(2)

    print("INFO: there are %d covariates (including model state)" % Xd3)

    # retrieve assimilation time window
    assim_time_win = cfg['assimilation_time_window']

    print('GMM init: equilibrium (%g,%g,%g) and at 86,205 %g' % (np.amin(E),np.mean(E),np.amax(E),E[86,205]))

    models = GridMoistureModel(E[:,:,np.newaxis][:,:,np.zeros((4,),dtype=np.int)], Tk, P0)
    models_na = GridMoistureModel(E[:,:,np.newaxis][:,:,np.zeros((4,),dtype=np.int)], Tk, P0)

    ###  Run model for each WRF timestep and assimilate data when available
    t_start, t_end = 1, len(tm)-1
    while tm_start > tm[t_start]:
        t_start+=1
    while tm_end < tm[t_end]:
        t_end-=1

    # the first FMC_GC value gets filled out with equilibria
    if cfg['write_fields']:
        for i in range(Nk):
            ncfmc_gc[0, i, :, :] = E

    print('INFO: running simulation from %s (%d) to %s (%d).' % (str(tm[t_start]), t_start, str(tm[t_end]), t_end))
    for t in range(t_start, t_end+1):
        model_time = tm[t]
        print("INFO: time: %s, step: %d" % (str(model_time), t))

        diagnostics().push("mt", model_time)

        models_na.advance_model(Ed[t-1,:,:], Ew[t-1,:,:], rain[t-1,:,:], dt, Q)
        models.advance_model(Ed[t-1,:,:], Ew[t-1,:,:], rain[t-1,:,:], dt, Q)

        # extract fuel moisture contents [make a fresh copy every iteration!]
        f = models.get_state().copy()
        f_na = models_na.get_state().copy()

        # push 10-hr fuel state & variance of forecast
        if cfg['write_fields'] == 'all':
            ncfm10f[t,:,:] = models.get_state()[:,:,1]
            ncfm10fV[t,:,:] = models.P[:,:,1,1]
            ncfm10na[t,:,:] = models_na.get_state()[:,:,1]


        # examine the assimilated fields (if assimilation is activated)
        for i in range(4):
            diagnostics().push("f%d_summary" % i, (t, np.amin(f[:,:,i]), np.mean(f[:,:,i]), np.amax(f[:,:,i])))
            if np.any(f[:,:,i] < 0.0):
                print("WARN: in field %d there were %d negative moisture values !" % (i, np.count_nonzero(f[:,:,i] < 0.0)))
                ind = np.unravel_index(np.argmin(f[:,:,i]), f.shape[:2])
                print(models.P[ind[0],ind[1],:,:])
                print("Full model state at position %d,%d:" % (ind[0],ind[1]))
                print(models.m_ext[ind[0],ind[1],:])
            if np.any(f[:,:,i] > 2.5):
                print("WARN: in field %d there were %d moisture values above 2.5!" % (i, np.count_nonzero(f[:,:,i] > 2.5)))
                ind = np.unravel_index(np.argmax(f[:,:,i]), f.shape[:2])
                print(models.P[ind[0],ind[1],:,:])
                print("Full model state at position %d,%d:" % (ind[0],ind[1]))
                print(models.m_ext[ind[0],ind[1],:])

        if cfg['assimilate']:

            # run Kriging on each observed fuel type
            Kfs, Vfs, fns = [], [], []
            for obs_data, fuel_ndx in [ (obs_data_fm10, 1) ]:

                # run the kriging subsystem and the Kalman update only if have valid observations
                valid_times = [z for z in obs_data.keys() if abs(total_seconds(z - model_time)) < assim_time_win/2.0]
                print('INFO: there are %d valid times at model time %s for fuel index %d' % (len(valid_times), str(model_time), fuel_ndx))
                if len(valid_times) > 0:

                    # add model time as time when assimilation occurred
                    diagnostics().push("mta", model_time)

                    # retrieve observations for current time
                    obs_valid_now = []
                    for z in valid_times:
                        obs_valid_now.extend(obs_data[z])

                    print('INFO: model time %s, assimilating %d observations.' % (str(model_time), len(obs_valid_now)))

                    # construct covariates for this time instant
                    X[:,:,0] = f[:,:,fuel_ndx]
                    for i in range(1, Xd3):
                      cov_id = cov_ids[i-1]
                      if cov_id in static_covar_map:
                        X[:, :, i] = Xr[:, :, i]
                      elif cov_id in dynamic_covar_map:
                        F = dynamic_covar_map[cov_id]
                        X[:, :, i] = F[t, :, :]
                      else:
                        error('FATAL: found unknown covariate %s' % cov_id)

                    # find differences (residuals) between observed measurements and nearest grid points
                    obs_vals = [o.get_value() for o in obs_valid_now]
                    obs_ngp  = [o.get_nearest_grid_point() for o in obs_valid_now]
                    diagnostics().push("obs_vals", obs_vals)
                    diagnostics().push("obs_ngp", obs_ngp)

                    mod_vals    = np.array([f[i,j,fuel_ndx] for i,j in obs_ngp])
                    mod_na_vals = np.array([f_na[i,j,fuel_ndx] for i,j  in obs_ngp])
                    diagnostics().push("fm10f_rmse", np.mean((obs_vals - mod_vals)**2)**0.5)
                    diagnostics().push("fm10na_rmse", np.mean((obs_vals - mod_na_vals)**2)**0.5)

                    # krige observations to grid points
                    Kf_fn, Vf_fn = fit_tsm(obs_valid_now, X)
                    if np.count_nonzero(Kf_fn > 2.5) > 0:
                        rain_t = dynamic_covar_map['rain'][t,:,:]
                        print('WARN: in TSM found %d values over 2.5, %d of those had rain, clamped to 2.5' %
                                (np.count_nonzero(Kf_fn > 2.5),
                                 np.count_nonzero(np.logical_and(Kf_fn > 2.5, rain_t > 0.0))))
                        Kf_fn[Kf_fn > 2.5] = 2.5
                    if np.count_nonzero(Kf_fn < 0.0) > 0:
                        print('WARN: in TSM found %d values under 0.0, clamped to 0.0' % np.count_nonzero(Kf_fn < 0.0))
                        Kf_fn[Kf_fn < 0.0] = 0.0

                    krig_vals = np.array([Kf_fn[ngp] for ngp in obs_ngp])
                    diagnostics().push("assim_info", (t, fuel_ndx, obs_vals, krig_vals, mod_vals, mod_na_vals))
                    diagnostics().push("fm10_kriging_var", (t, np.mean(Vf_fn)))

                    if cfg['write_fields'] == 'all':
                        nctsmV[t,:,:] = Vf_fn

                    # append to storage for kriged fields in this time instant
                    Kfs.append(Kf_fn)
                    Vfs.append(Vf_fn)
                    fns.append(fuel_ndx)


            # if there were any observations, run the kalman update step
            if len(fns) > 0:
                NobsClasses = len(fns)

                O = np.zeros((dom_shape[0], dom_shape[1], NobsClasses))
                V = np.zeros((dom_shape[0], dom_shape[1], NobsClasses, NobsClasses))

                for i in range(NobsClasses):
                    O[:,:,i] = Kfs[i]
                    V[:,:,i,i] = Vfs[i]

                # execute the Kalman update
                if len(fns) == 1:
                    models.kalman_update_single2(O, V, fns[0], Kg)
                else:
                    models.kalman_update(O, V, fns, Kg)

                # push new diagnostic outputs
                if cfg['write_fields'] == 'all':
                    ncKg[t,:,:] = Kg[:,:,1]

                for i in range(4):
                    diagnostics().push("K%d_summary" % i, (t, np.amin(Kg[:,:,i]), np.mean(Kg[:,:,i]), np.amax(Kg[:,:,i])))
                    if np.any(models.get_state()[:,:,i] < 0.0):
                        print("WARN: in field %d there were %d negative moisture values !" % (i, np.count_nonzero(models.get_state()[:,:,i] < 0.0)))
                        ind = np.unravel_index(np.argmin(models.get_state()[:,:,i]), models.get_state().shape[:2])
                        print(models.P[ind[0],ind[1],:,:])
                        print("TSM input at given position: value %g variance %g" % (O[ind[0],ind[1]], V[ind[0],ind[1]]))
                        print("Model state at given position:")
                        print(models.m_ext[ind[0],ind[1],:])

            # store post-assimilation (or forecast depending on whether observations were available) FM-10 state and variance
            if cfg['write_fields'] == 'all':
                ncfm10a[t,:,:] = models.get_state()[:,:,1]
                ncfm10aV[t,:,:] = models.P[:,:,1,1]

            # we don't care if we assimilated or not, we always check our error on target station if in test mode
            if test_mode:
                valid_times = [z for z in tgt_obs_fm10.keys() if abs(total_seconds(z - model_time)) < assim_time_win/2.0]
                tgt_i, tgt_j = test_ngp
                diagnostics().push("test_pred", f[tgt_i, tgt_j,1])
                if len(valid_times) > 0:
                  # this is our target observation [FIXME: this disregards multiple observations if multiple happen to be valid]
                  tgt_obs = tgt_obs_fm10[valid_times[0]][0]
                  obs = tgt_obs.get_value()
                  diagnostics().push("test_obs", obs)
                else:
                  diagnostics().push("test_obs", np.nan)


            # store data in wrf_file variable FMC_G
            if cfg['write_fields']:
                ncfmc_gc[t,:Nk,:,:] = np.transpose(models.get_state()[:,:,:Nk],axes=[2,0,1])

        # store the diagnostics in a binary file when done
    diagnostics().dump_store(os.path.join(cfg['output_dir'], 'diagnostics.bin'))

    # close the netCDF file (relevant if we did write into FMC_GC)
    if out_file is not None:
        out_file.close()
def run_module():

    # read in configuration file to execute run
    print("Reading configuration from [%s]" % sys.argv[1])

    with open(sys.argv[1]) as f:
        cfg = eval(f.read())

    # ensure output path exists
    if not os.path.isdir(cfg['output_dir']):
        os.mkdir(cfg['output_dir'])

    # configure diagnostics
    init_diagnostics(
        os.path.join(cfg['output_dir'], 'moisture_model_v1_diagnostics.txt'))

    # Error covariance matrix condition number in kriging
    diagnostics().configure_tag("skdm_cov_cond", False, True, True)

    # Assimilation parameters
    diagnostics().configure_tag("assim_K0", False, True, True)
    diagnostics().configure_tag("assim_K1", True, True, True)
    diagnostics().configure_tag("assim_data", False, False, True)

    diagnostics().configure_tag("fm10_model_var", False, True, True)
    diagnostics().configure_tag("fm10_kriging_var", False, True, True)

    ### Load and preprocess WRF model data

    # load WRF data
    wrf_data = WRFModelData(cfg['input_file'], tz_name='US/Mountain')

    # read in spatial and temporal extent of WRF variables
    lat, lon = wrf_data.get_lats(), wrf_data.get_lons()
    tm = wrf_data.get_gmt_times()
    Nt = cfg['Nt'] if cfg.has_key('Nt') and cfg['Nt'] is not None else len(tm)
    dom_shape = lat.shape

    # retrieve the rain variable
    rain = wrf_data['RAIN']

    # moisture equilibria are now computed from averaged Q,P,T at beginning and end of period
    Ed, Ew = wrf_data.get_moisture_equilibria()

    ### Load observation data from the stations

    # load station data from files
    with open(os.path.join(cfg['station_data_dir'], cfg['station_list_file']),
              'r') as f:
        si_list = f.read().split('\n')

    si_list = filter(lambda x: len(x) > 0 and x[0] != '#',
                     map(string.strip, si_list))

    # for each station id, load the station
    stations = []
    for code in si_list:
        mws = MesoWestStation(code)
        mws.load_station_info(
            os.path.join(cfg["station_data_dir"], "%s.info" % code))
        mws.register_to_grid(wrf_data)
        mws.load_station_data(
            os.path.join(cfg["station_data_dir"], "%s.obs" % code))
        stations.append(mws)

    print('Loaded %d stations.' % len(stations))

    # check stations for nans
    stations = filter(MesoWestStation.data_ok, stations)
    print('Have %d stations with complete data.' % len(stations))

    # build the observation data
    #    obs_data_fm10 = build_observation_data(stations, 'FM')
    obs_data_fm10 = {}

    ### Initialize model and visualization

    # find maximum moisture overall to set up visualization
    maxE = 0.5

    # construct initial conditions from timestep 1 (because Ed/Ew at zero are zero)
    E = 0.5 * (Ed[1, :, :] + Ew[1, :, :])

    # set up parameters
    Q = np.eye(9) * cfg['Q']
    P0 = np.eye(9) * cfg['P0']
    dt = (tm[1] - tm[0]).seconds
    print("INFO: Computed timestep from WRF is is %g seconds." % dt)
    K = np.zeros_like(E)
    V = np.zeros_like(E)
    mV = np.zeros_like(E)
    predicted_field = np.zeros_like(E)
    mresV = np.zeros_like(E)
    Kf_fn = np.zeros_like(E)
    Vf_fn = np.zeros_like(E)
    mid = np.zeros_like(E)
    Kg = np.zeros((dom_shape[0], dom_shape[1], 9))
    cV12 = np.zeros_like(E)

    # initialize the mean field model (default fit is 1.0 of equilibrium before new information comes in)
    mfm = MeanFieldModel(cfg['lock_gamma'])

    # construct model grid using standard fuel parameters
    Tk = np.array([1.0, 10.0, 100.0]) * 3600
    models = np.zeros(dom_shape, dtype=np.object)
    models_na = np.zeros_like(models)
    for p in np.ndindex(dom_shape):
        models[p] = CellMoistureModel((lat[p], lon[p]), 3, E[p], Tk, P0=P0)
        models_na[p] = CellMoistureModel((lat[p], lon[p]), 3, E[p], Tk, P0=P0)

    m = None
    plt.figure(figsize=(12, 8))

    ###  Run model for each WRF timestep and assimilate data when available
    for t in range(1, Nt):
        model_time = tm[t]
        print("INFO: time: %s, step: %d" % (str(model_time), t))

        # run the model update
        for p in np.ndindex(dom_shape):
            i, j = p
            models[p].advance_model(Ed[t - 1, i, j], Ew[t - 1, i, j],
                                    rain[t - 1, i, j], dt, Q)
            models_na[p].advance_model(Ed[t - 1, i, j], Ew[t - 1, i, j],
                                       rain[t - 1, i, j], dt, Q)

        # prepare visualization data
        f = np.zeros((dom_shape[0], dom_shape[1], 3))
        f_na = np.zeros((dom_shape[0], dom_shape[1], 3))
        for p in np.ndindex(dom_shape):
            f[p[0], p[1], :] = models[p].get_state()[:3]
            f_na[p[0], p[1], :] = models_na[p].get_state()[:3]
            P = models[p].get_state_covar()
            cV12[p] = P[0, 1]
            mV[p] = P[1, 1]
            mid[p] = models[p].get_model_ids()[1]

        diagnostics().push("fm10_model_var", (t, np.mean(mV)))

        # run Kriging on each observed fuel type
        Kf = []
        Vf = []
        fn = []
        for obs_data, fuel_ndx in [(obs_data_fm10, 1)]:

            # run the kriging subsystem and the Kalman update only if we have observations
            if model_time in obs_data:

                # retrieve observations for current time
                obs_t = obs_data[model_time]

                # fit the current estimation of the moisture field to the data
                base_field = f[:, :, fuel_ndx]
                mfm.fit_to_data(base_field, obs_data[model_time])

                # find differences (residuals) between observed measurements and nearest grid points
                # use this to update observation residual standard deviation
                obs_vals = np.array(
                    [o.get_value() for o in obs_data[model_time]])
                mod_vals = np.array([
                    base_field[o.get_nearest_grid_point()]
                    for o in obs_data[model_time]
                ])
                mod_na_vals = np.array([
                    f_na[:, :, fuel_ndx][o.get_nearest_grid_point()]
                    for o in obs_data[model_time]
                ])

                # predict the moisture field using observed fuel type
                predicted_field = mfm.predict_field(base_field)

                # krige observations to grid points
                Kf_fn, Vf_fn = trend_surface_model_kriging(
                    obs_data[model_time], wrf_data, predicted_field)

                krig_vals = np.array([
                    Kf_fn[o.get_nearest_grid_point()]
                    for o in obs_data[model_time]
                ])
                diagnostics().push(
                    "assim_data",
                    (t, fuel_ndx, obs_vals, krig_vals, mod_vals, mod_na_vals))
                plot_model_snapshot(cfg, tm, t, fuel_ndx, obs_vals, krig_vals,
                                    mod_vals, mod_na_vals)

                diagnostics().push("fm10_kriging_var", (t, np.mean(Vf_fn)))

                # append to storage for kriged fields in this time instant
                Kf.append(Kf_fn)
                Vf.append(Vf_fn)
                fn.append(fuel_ndx)

        # if there were any observations, run the kalman update step
        if len(fn) > 0:
            Nobs = len(fn)
            # run the kalman update in each model independently
            # gather the standard deviations of the moisture fuel after the Kalman update
            for p in np.ndindex(dom_shape):
                O = np.zeros((Nobs, ))
                V = np.zeros((Nobs, Nobs))

                # construct observations for this position
                for i in range(Nobs):
                    O[i] = Kf[i][p]
                    V[i, i] = Vf[i][p]

                # execute the Kalman update
                Kp = models[p].kalman_update(O, V, fn)
                Kg[p[0], p[1], :] = Kp[:, 0]

            # push new diagnostic outputs
            diagnostics().push("assim_K0", (t, np.mean(Kg[:, :, 0])))
            diagnostics().push("assim_K1", (t, np.mean(Kg[:, :, 1])))

        # prepare visualization data
        f = np.zeros((dom_shape[0], dom_shape[1], 3))
        for p in np.ndindex(dom_shape):
            f[p[0], p[1], :] = models[p].get_state()[:3]

        plt.clf()
        plt.subplot(3, 3, 1)
        render_spatial_field_fast(m, lon, lat, f[:, :, 0], '1-hr fuel')
        plt.clim([0.0, maxE])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 2)
        render_spatial_field_fast(m, lon, lat, f[:, :, 1], '10-hr fuel')
        plt.clim([0.0, maxE])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 3)
        render_spatial_field_fast(m, lon, lat, f_na[:, :, 1],
                                  '10hr fuel - no assim')
        plt.clim([0.0, maxE])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 4)
        render_spatial_field_fast(m, lon, lat, Kg[:, :, 0],
                                  'Kalman gain for 1-hr fuel')
        plt.clim([0.0, 3.0])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 5)
        render_spatial_field_fast(m, lon, lat, Kg[:, :, 1],
                                  'Kalman gain for 10-hr fuel')
        plt.clim([0.0, 1.0])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 6)
        render_spatial_field_fast(m, lon, lat, Kf_fn, 'Kriging field')
        plt.clim([0.0, maxE])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 7)
        render_spatial_field_fast(m, lon, lat, mid, 'Model ids')
        plt.clim([0.0, 5.0])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 8)
        render_spatial_field_fast(m, lon, lat, Vf_fn, 'Kriging variance')
        plt.clim([0.0, np.max(Vf_fn)])
        plt.axis('off')
        plt.colorbar()
        #        plt.subplot(3,3,9)
        #        render_spatial_field_fast(m, lon, lat, mresV, 'Model variance')
        #        plt.clim([0.0, np.max(mresV)])
        #       plt.axis('off')
        #        plt.colorbar()

        plt.savefig(
            os.path.join(cfg['output_dir'], 'moisture_model_t%03d.png' % t))

    # store the diagnostics in a binary file
    diagnostics().dump_store(os.path.join(cfg['output_dir'],
                                          'diagnostics.bin'))

    # make a plot of gammas
    plt.figure()
    plt.plot(diagnostics().pull('mfm_gamma'), 'bo-')
    plt.title('Mean field model - gamma')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_gamma.png'))

    plt.figure()
    plt.plot(diagnostics().pull('skdm_cov_cond'))
    plt.title('Condition number of covariance matrix')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_sigma_cond.png'))

    # make a plot for each substation
    plt.figure()
    D = diagnostics().pull("assim_data")
    for i in range(len(stations)):
        plt.clf()
        # get data for the i-th station
        t_i = [o[0] for o in D]
        obs_i = [o[2][i] for o in D]
        krig_i = [o[3][i] for o in D]
        mod_i = [o[4][i] for o in D]
        mod_na_i = [o[5][i] for o in D]
        mx = max(max(obs_i), max(mod_i), max(krig_i), max(mod_i))
        plt.plot(t_i, obs_i, 'ro')
        plt.plot(t_i, krig_i, 'bo-')
        plt.plot(t_i, mod_i, 'kx-', linewidth=1.5)
        plt.plot(t_i, mod_na_i, 'mx-')
        plt.ylim([0.0, 1.1 * mx])
        plt.legend(['Obs.', 'Kriged', 'Model', 'NoAssim'])
        plt.title('Station observations fit to model and kriging field')
        plt.savefig(
            os.path.join(cfg['output_dir'], 'station%02d.png' % (i + 1)))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("assim_K1")],
             [d[1] for d in diagnostics().pull("assim_K1")], 'ro-')
    plt.title('Average Kalman gain')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_kalman_gain_10hr.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("assim_K0")],
             [d[1] for d in diagnostics().pull("assim_K0")], 'ro-')
    plt.title('Average Kalman gain')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_kalman_gain_1hr.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("fm10_model_var")],
             [d[1] for d in diagnostics().pull("fm10_model_var")], 'ro-')
    plt.title('Average fm10 model variance')
    plt.savefig(os.path.join(cfg['output_dir'],
                             'plot_fm10_model_variance.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("fm10_model_residual_var")],
             [d[1] for d in diagnostics().pull("fm10_model_residual_var")],
             'ro-')
    plt.title('Average fm10 model residual variance')
    plt.savefig(
        os.path.join(cfg['output_dir'],
                     'plot_fm10_model_residual_variance.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("fm10_kriging_var")],
             [d[1] for d in diagnostics().pull("fm10_kriging_var")], 'ro-')
    plt.title('Kriging field variance')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_kriging_variance.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("obs_residual_var")],
             [d[1] for d in diagnostics().pull("obs_residual_var")], 'ro-')
    plt.title('Observation residual variance')
    plt.savefig(
        os.path.join(cfg['output_dir'],
                     'plot_observation_residual_variance.png'))

    plt.figure()
    plt.plot(diagnostics().pull("mfm_mape"), 'ro-', linewidth=2)
    plt.title('Mean absolute prediction error of station data')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_station_mape.png'))
Example #8
0
        'standardize': True
    }

    # cfg = { 'station_info_dir' : '../real_data/witch_creek',
    #         'station_obs_dir' : '../real_data/witch_creek',
    #         'station_list_file' : '../real_data/witch_creek/station_list',
    #         'wrf_data_file' : '../real_data/witch_creek/wrf20071021_witchcreek_all.nc',
    #         'assimilation_window' : 3600,
    #         'output_dir' : 'results_wc',
    #         'max_dist' : 200.0,
    #         'bin_width' : 20,
    #         'standardize' : False
    #       }

    # load the smallest domain
    wrf_data = WRFModelData(cfg['wrf_data_file'], ['T2', 'PSFC', 'Q2', 'HGT'])

    # read in vars
    lon, lat = wrf_data.get_lons(), wrf_data.get_lats()
    hgt = wrf_data['HGT']
    tm = wrf_data.get_gmt_times()
    Nt = len(tm)

    print("Loaded %d timestamps from WRF." % Nt)

    # load station data from files
    with open(cfg['station_list_file'], 'r') as f:
        si_list = f.read().split('\n')

    si_list = filter(lambda x: len(x) > 0 and x[0] != '#',
                     map(string.strip, si_list))
def run_module():
    
    # read in configuration file to execute run
    print("Reading configuration from [%s]" % sys.argv[1])
    
    with open(sys.argv[1]) as f:
        cfg = eval(f.read())
    
    # ensure output path exists
    if not os.path.isdir(cfg['output_dir']): 
        os.mkdir(cfg['output_dir'])
        
    # configure diagnostics        
    init_diagnostics(os.path.join(cfg['output_dir'], 'moisture_model_v1_diagnostics.txt'))
    diagnostics().configure_tag("skdm_obs_res", False, True, True)
    diagnostics().configure_tag("skdm_cov_cond", False, True, True)

    diagnostics().configure_tag("assim_mV", False, False, True)
    diagnostics().configure_tag("assim_K0", False, False, True)
    diagnostics().configure_tag("assim_K1", False, False, True)
    diagnostics().configure_tag("assim_data", False, False, True)
    diagnostics().configure_tag("assim_mresV", False, False, True)

    diagnostics().configure_tag("kriging_variance", False, False, True)
    diagnostics().configure_tag("kriging_obs_res_var", False, False, True)

    print("INFO: input file is [%s]." % cfg['input_file'])
    wrf_data = WRFModelData(cfg['input_file'], tz_name = 'US/Pacific')
    
    # read in vars
    lat, lon = wrf_data.get_lats(), wrf_data.get_lons()
    tm = wrf_data.get_local_times()
    rain = wrf_data['RAIN']
    Ed, Ew = wrf_data.get_moisture_equilibria()
    
    # find maximum moisture overall to set up visualization
#    maxE = max(np.max(Ed), np.max(Ew)) * 1.2
    maxE = 0.3
    
    # obtain sizes
    Nt = rain.shape[0]
    dom_shape = lat.shape
    
    # load station data from files
    tz = pytz.timezone('US/Pacific')
    stations = [StationAdam() for s in station_list]
    for (s,sname) in zip(stations, station_list):
        s.load_station_data(os.path.join(station_data_dir, sname), tz)
        s.register_to_grid(wrf_data)
        s.set_measurement_variance('fm10', 0.05)
    
    # build the observation data structure indexed by time
    obs_data_fm10 = build_observation_data(stations, 'fm10', wrf_data, tm)
    
    # construct initial conditions
    E = 0.5 * (Ed[1,:,:] + Ew[1,:,:])
    
    # set up parameters
    Q = np.eye(9) * 0.001
    P0 = np.eye(9) * 0.01
    dt = 10.0 * 60
    K = np.zeros_like(E)
    V = np.zeros_like(E)
    mV = np.zeros_like(E)
    predicted_field = np.zeros_like(E)
    mresV = np.zeros_like(E)
    Kf_fn = np.zeros_like(E)
    Vf_fn = np.zeros_like(E)
    mid = np.zeros_like(E)
    Kg = np.zeros((dom_shape[0], dom_shape[1], 9))
    cV12 = np.zeros_like(E)
    
    # initialize the mean field model (default fit is 1.0 of equilibrium before new information comes in)
    mfm = MeanFieldModel(cfg['lock_gamma'])

    # construct model grid using standard fuel parameters
    Tk = np.array([1.0, 10.0, 100.0]) * 3600
    models = np.zeros(dom_shape, dtype = np.object)
    models_na = np.zeros_like(models)
    for pos in np.ndindex(dom_shape): 
        models[pos] = CellMoistureModel((lat[pos], lon[pos]), 3, E[pos], Tk, P0 = P0)
        models_na[pos] = CellMoistureModel((lat[pos], lon[pos]), 3, E[pos], Tk, P0 = P0)

    m = None

    plt.figure(figsize = (12, 8))
    
    # run model
    for t in range(1, Nt):
        model_time = tm[t]
        print("Time: %s, step: %d" % (str(model_time), t))

        # pre-compute equilibrium moisture to save a lot of time
        E = 0.5 * (Ed[t,:,:] + Ew[t,:,:])
        
        # run the model update
        for pos in np.ndindex(dom_shape):
            i, j = pos
            models[pos].advance_model(Ed[t, i, j], Ew[t, i, j], rain[t, i, j], dt, Q)
            models_na[pos].advance_model(Ed[t, i, j], Ew[t, i, j], rain[t, i, j], dt, Q)
            
        # prepare visualization data        
        f = np.zeros((dom_shape[0], dom_shape[1], 3))
        f_na = np.zeros((dom_shape[0], dom_shape[1], 3))
        for p in np.ndindex(dom_shape):
            f[p[0], p[1], :] = models[p].get_state()[:3]
            f_na[p[0], p[1], :] = models_na[p].get_state()[:3]
            mV[pos] = models[p].get_state_covar()[1,1]
            cV12[pos] = models[p].get_state_covar()[0,1]
            mid[p] = models[p].get_model_ids()[1]
            

        # run Kriging on each observed fuel type
        Kf = []
        Vf = []
        fn = []
        for obs_data, fuel_ndx in [ (obs_data_fm10, 1) ]:

            if model_time in obs_data:

                # fit the current estimation of the moisture field to the data 
                base_field = f[:,:,fuel_ndx]
                mfm.fit_to_data(base_field, obs_data[model_time])
                
                # find differences (residuals) between observed measurements and nearest grid points
                # use this to update observation residual standard deviation 
                obs_vals = np.array([o.get_value() for o in obs_data[model_time]])
                mod_vals = np.array([f[:,:,fuel_ndx][o.get_nearest_grid_point()] for o in obs_data[model_time]])
                mod_na_vals = np.array([f_na[:,:,fuel_ndx][o.get_nearest_grid_point()] for o in obs_data[model_time]])
                obs_re.update_with(obs_vals - mod_vals)
                diagnostics().push("kriging_obs_res_var", (t, np.mean(obs_re.get_variance())))
            
                # retrieve the variance of the model field
                mresV = mod_re.get_variance()

                # krige data to observations
                if cfg['kriging_strategy'] == 'uk':
                    Kf_fn, Vf_fn, gamma, mape = universal_kriging_data_to_model(obs_data[model_time],
                                                                          obs_re.get_variance() ** 0.5,
                                                                          base_field,
                                                                          wrf_data,
                                                                          mresV ** 0.5, t)
                    # replace the stored gamma with the uk computed gamma
                    diagnostics().pull("mfm_gamma")[-1] = gamma
                    diagnostics().pull("mfm_mape")[-1] = mape
                    print("uk: replaced mfm_gamma %g, mfm_mape %g" % (gamma, mape))

                    # update the residuals estimator with the current
                    mod_re.update_with(gamma * f[:,:,fuel_ndx] - Kf_fn)

                elif cfg['kriging_strategy'] == 'tsm':
                    # predict the moisture field using observed fuel type
                    predicted_field = mfm.predict_field(base_field)

                    # run the tsm kriging estimator
                    Kf_fn, Vf_fn = trend_surface_model_kriging(obs_data[model_time], wrf_data, predicted_field)

                    # update the model residual estimator and get current best estimate of variance
                    mod_re.update_with(f[:,:,fuel_ndx] - predicted_field)

                else:
                    raise ValueError('Invalid kriging strategy [%s] in configuration.' % cfg['kriiging_strategy'])

                krig_vals = np.array([Kf_fn[o.get_nearest_grid_point()] for o in obs_data[model_time]])                
                diagnostics().push("assim_data", (t, fuel_ndx, obs_vals, krig_vals, mod_vals, mod_na_vals))
                plot_model_snapshot(cfg, tm, t, fuel_ndx, obs_vals, krig_vals, mod_vals, mod_na_vals)

                # append to storage for kriged fields in this time instant
                Kf.append(Kf_fn)
                Vf.append(Vf_fn)
                fn.append(fuel_ndx)

        # if there were any observations, run the kalman update step
        if len(fn) > 0:
            Nobs = len(fn)
            # run the kalman update in each model independently
            # gather the standard deviations of the moisture fuel after the Kalman update
            for pos in np.ndindex(dom_shape):
                O = np.zeros((Nobs,))
                V = np.zeros((Nobs, Nobs))
                
                # construct observations for this position
                for i in range(Nobs):
                    O[i] = Kf[i][pos]
                    V[i,i] = Vf[i][pos]
                
                # execute the Kalman update
                Kij = models[pos].kalman_update(O, V, fn)
                Kg[pos[0], pos[1], :] = Kij[:, 0]


        # prepare visualization data        
        f = np.zeros((dom_shape[0], dom_shape[1], 3))
        for p in np.ndindex(dom_shape):
            f[p[0], p[1], :] = models[p].get_state()[:3]
            
        plt.clf()
        plt.subplot(3,3,1)
        render_spatial_field_fast(m, lon, lat, f[:,:,0], '1-hr fuel')
        plt.clim([0.0, maxE])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,2)
        render_spatial_field_fast(m, lon, lat, f[:,:,1], '10-hr fuel')
        plt.clim([0.0, maxE])        
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,3)
        render_spatial_field_fast(m, lon, lat, f_na[:,:,1], '10hr fuel - no assim')
        plt.clim([0.0, maxE])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,4)
        render_spatial_field_fast(m, lon, lat, Kg[:,:,0], 'Kalman gain, fm1')  
        plt.clim([0.0, 3.0])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,5)
        render_spatial_field_fast(m, lon, lat, Kg[:,:,1], 'Kalman gain, fm10')       
        plt.clim([0.0, 1.0])        
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,6)
	render_spatial_field_fast(m, lon, lat, Kf_fn, 'Kriging field')
	plt.clim([0.0, maxE])
        plt.axis('off')
	plt.colorbar()
        plt.subplot(3,3,7)
        render_spatial_field_fast(m, lon, lat, mid, 'Model ids')
        plt.clim([0.0, 5.0])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,8)
        render_spatial_field_fast(m, lon, lat, Vf_fn, 'Kriging var')
        plt.clim([0.0, np.max(Vf_fn)])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3,3,9)
        render_spatial_field_fast(m, lon, lat, mresV, 'fm10 model var')
        plt.clim([0.0, np.max(mresV)])
        plt.axis('off')
        plt.colorbar()
        
        plt.savefig(os.path.join(cfg['output_dir'], 'moisture_model_t%03d.png' % t))

        # push new diagnostic outputs
        diagnostics().push("assim_K0", (t, np.mean(Kg[:,:,0])))
        diagnostics().push("assim_K1", (t, np.mean(Kg[:,:,1])))
        diagnostics().push("assim_mV", (t, np.mean(mV)))
        diagnostics().push("assim_mresV", (t, np.mean(mresV)))
        diagnostics().push("kriging_variance", (t, np.mean(Vf_fn)))

        
    # store the gamma coefficients
    with open(os.path.join(cfg['output_dir'], 'gamma.txt'), 'w') as f:
        f.write(str(diagnostics().pull('mfm_gamma')))
        
    # make a plot of gammas
    plt.figure()
    plt.plot(diagnostics().pull('mfm_gamma'))
    plt.title('Mean field model - gamma')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_gamma.png'))

    plt.figure()
    plt.plot(diagnostics().pull('skdm_cov_cond'))
    plt.title('Condition number of covariance matrix')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_sigma_cond.png'))

    # make 
    # make a plot for each substation
    plt.figure()
    D = diagnostics().pull("assim_data")
    for i in range(len(stations)):
        plt.clf()
        # get data for the i-th station
        t_i = [ o[0] for o in D]
        obs_i = [ o[2][i] for o in D]
        krig_i = [ o[3][i] for o in D]
        mod_i = [ o[4][i] for o in D]
        mod_na_i = [ o[5][i] for o in D]
        mx = max(max(obs_i), max(mod_i), max(krig_i), max(mod_i))
        plt.plot(t_i, obs_i, 'ro')
        plt.plot(t_i, krig_i, 'bo-')
        plt.plot(t_i, mod_i, 'kx-', linewidth = 1.5)
        plt.plot(t_i, mod_na_i, 'mx-')
        plt.ylim([0.0, 1.1 * mx])
        plt.legend(['Obs.', 'Kriged', 'Model', 'NoAssim'])
        plt.title('Station observations fit to model and kriging field')
        plt.savefig(os.path.join(cfg['output_dir'], 'station%02d.png' % (i+1)))
    
    
    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("assim_K1")],
             [d[1] for d in diagnostics().pull("assim_K1")], 'ro-')
    plt.title('Average Kalman gain')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_kalman_gain_10hr.png'))
    
    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("assim_K0")],
             [d[1] for d in diagnostics().pull("assim_K0")], 'ro-')
    plt.title('Average Kalman gain')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_kalman_gain_1hr.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("assim_mV")],
             [d[1] for d in diagnostics().pull("assim_mV")], 'ro-')
    plt.title('Average model variance')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_fm10_model_variance.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("assim_mresV")],
             [d[1] for d in diagnostics().pull("assim_mresV")], 'ro-')
    plt.title('Average fm10 residual variance')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_fm10_model_residual_variance.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("kriging_variance")],
             [d[1] for d in diagnostics().pull("kriging_variance")], 'ro-')
    plt.title('Kriging field variance')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_kriging_variance.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("kriging_obs_res_var")],
             [d[1] for d in diagnostics().pull("kriging_obs_res_var")], 'ro-')
    plt.title('Observation residual variance')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_observation_residual_variance.png'))
    
    plt.figure()
    plt.plot(diagnostics().pull("mfm_mape"), 'ro-', linewidth = 2)
    plt.title('Mean absolute prediction error of station data')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_station_mape.png'))

    diagnostics().dump_store(os.path.join(cfg['output_dir'], 'diagnostics.bin'))
    
    # as a last step encode all the frames as video
    os.system("cd %s; avconv -qscale 1 -r 20 -b 9600 -i moisture_model_t%%03d.png video.mp4" % cfg['output_dir'])
Example #10
0

if __name__ == '__main__':
    
    init_diagnostics('results/examine_mesowest_station_data.log')

    # load stations and match then to grid points
    # load station data from files
    with open(os.path.join(station_data_dir, 'clean_stations'), 'r') as f:
        si_list = f.read().split('\n')

    si_list = filter(lambda x: len(x) > 0, map(string.strip, si_list))

    # load wrf model data
    wrf_data = WRFModelData(os.path.join(station_data_dir, wrf_data_file),
                            fields = ['T2', 'Q2', 'PSFC', fm_var_name, 'FMC_EQUI'],
                            tz_name = 'GMT')
    lon, lat, tm = wrf_data.get_lons(), wrf_data.get_lats(), wrf_data.get_times()
    efms, wrf_equi = wrf_data[fm_var_name], wrf_data['FMC_EQUI']
    T2 = wrf_data['T2']

    print('Loaded %d times from WRF output (%s - %s).' % (len(tm), str(tm[0]), str(tm[-1])))

    # for each station id, load the station
    stations = []
    for sinfo in si_list:
        code = sinfo.split(',')[0]
        mws = MesoWestStation(sinfo, wrf_data)
        for suffix in [ '_1', '_2', '_3', '_4', '_5', '_6', '_7' ]:
            mws.load_station_data(os.path.join(station_data_dir, '%s%s.xls' % (code, suffix)))
        stations.append(mws)
Example #11
0
    # load required variables
    load_lst = list(varlst)
    if 'RAIN' in load_lst:
        load_lst.remove('RAIN')
        load_lst.extend(['RAINC', 'RAINNC' ])

    if 'RH' in load_lst:
        load_lst.remove('RH')
        load_lst.extend(['T2', 'PSFC', 'Q2'])

    load_lst = list(set(load_lst))
    print("INFO: loading the following variables %s" % str(load_lst))

    print("INFO: loading wrf model output data ...")
    w = WRFModelData(args.wrfout_file, load_lst)
    ts = w['GMT']

    N = len(ts)

    if 'RH' in varlst:
        w.compute_relative_humidity()

    print("INFO: processing variables %s" % str(varlst))

    print("INFO: loading county shapes ...")
    lats, lons = w.get_lats(), w.get_lons()
    m = setup_basemap_proj(lats, lons)
    counties = load_colorado_shapes(m)

    print("INFO: found %d times in wrfout." % len(ts))
Example #12
0
def run_module():
    # read in configuration file to execute run
    print("Reading configuration from [%s]" % sys.argv[1])

    with open(sys.argv[1]) as f:
        cfg = eval(f.read())

    # init diagnostics
    init_diagnostics(
        os.path.join(cfg['output_dir'], 'moisture_model_v1_diagnostics.txt'))
    diagnostics().configure_tag("s2_eta_hat", True, True, True)
    diagnostics().configure_tag("kriging_rmse", True, True, True)
    diagnostics().configure_tag("kriging_beta", True, True, True)
    diagnostics().configure_tag("kriging_iters", False, True, True)
    diagnostics().configure_tag("kriging_subzero_s2_estimates", False, True,
                                True)

    # load the wrfinput file
    wrfin = WRFModelData(cfg['wrf_input'],
                         ['T2', 'Q2', 'PSFC', 'HGT', 'FMC_GC', 'FMEP'])
    lat, lon = wrfin.get_lats(), wrfin.get_lons()
    ts_now = wrfin['GMT'][0]
    dom_shape = lat.shape
    print('INFO: domain size is %d x %d grid points, wrfinput timestamp %s' %
          (dom_shape[0], dom_shape[1], str(ts_now)))
    print('INFO: domain extent is lats (%g to %g) lons (%g to %g).' %
          (np.amin(lat), np.amax(lat), np.amin(lon), np.amax(lon)))

    # compute the diagonal distance between grid points
    grid_dist_km = great_circle_distance(lon[0, 0], lat[0, 0], lon[1, 1],
                                         lat[1, 1])
    print('INFO: diagonal distance in grid is %g' % grid_dist_km)

    # load observations but discard those too far away from the grid nodes
    obss = load_raws_observations(cfg['observations'], lat, lon, grid_dist_km)
    fm10 = build_observation_data(obss)
    print('INFO: %d different time instances found in observations' %
          len(fm10))

    # if a previous cycle is available (i.e. the wrfoutput is a valid file)
    if os.path.exists(cfg['wrf_output_prev']) and check_overlap(
            cfg['wrf_output_prev'], ts_now):

        # load the model as a wrfout with all default variables
        wrfout = WRFModelData(cfg['wrf_output_prev'])
        outts = wrfout['GMT']
        print("INFO: previous forecast [%s - %s] exists, running DA till %s" %
              (str(outts[0]), str(outts[-1]), str(ts_now)))

        # run from the start until now (retrieve fuel moisture, extended parameters, covariance matrix)
        model = run_data_assimilation(wrfout, fm10, ts_now, cfg)
        # store this for the current time instance (fm, ep in the wrfinput, P next to it)
        d = netCDF4.Dataset(cfg['wrf_input'], 'r+')
        d.variables['FMC_GC'] = fm
        d.variables['FMEP'] = ep
        d.close()

        # store the covariance matrix alongside the wrfinput file
        dir = os.path.dirname(wrfin)
        store_covariance_matrix(P, os.path.join(dir, 'P.nc'))

    else:

        print(
            "INFO: no previous forecast found, running DA from equilibrium at %s"
            % (str(ts_now)))
        # initialize from weather equilibrium and perform one DA step
        model = init_from_equilibrium(wrfin, fm10, ts_now, cfg)

        # store result in wrfinput dataset
        d = netCDF4.Dataset(cfg['wrf_input'], 'r+')
        fmcep = model.get_state()
        d.variables['FMC_GC'][0, :3, :, :] = fmcep[:, :, :3].transpose(
            (2, 0, 1))
        d.variables['FMEP'][0, :, :, :] = fmcep[:, :, 3:5].transpose((2, 0, 1))
        d.close()
        store_covariance_matrix(
            model.get_state_covar(),
            os.path.join(os.path.dirname(cfg['wrf_input']), 'P.nc'))

    return 0
Example #13
0
def run_module():
  # read in configuration file to execute run
  print("Reading configuration from [%s]" % sys.argv[1])

  with open(sys.argv[1]) as f:
      cfg = eval(f.read())

  # init diagnostics
  init_diagnostics(os.path.join(cfg['output_dir'], 'moisture_model_v1_diagnostics.txt'))
  diagnostics().configure_tag("s2_eta_hat", True, True, True)
  diagnostics().configure_tag("kriging_rmse", True, True, True)
  diagnostics().configure_tag("kriging_beta", True, True, True)
  diagnostics().configure_tag("kriging_iters", False, True, True)
  diagnostics().configure_tag("kriging_subzero_s2_estimates", False, True, True)

  # load the wrfinput file
  wrfin = WRFModelData(cfg['wrf_input'], ['T2', 'Q2', 'PSFC', 'HGT', 'FMC_GC', 'FMEP'])
  lat, lon = wrfin.get_lats(), wrfin.get_lons()
  ts_now = wrfin['GMT'][0]
  dom_shape = lat.shape
  print('INFO: domain size is %d x %d grid points, wrfinput timestamp %s' % (dom_shape[0], dom_shape[1], str(ts_now)))
  print('INFO: domain extent is lats (%g to %g) lons (%g to %g).' % (np.amin(lat),np.amax(lat),np.amin(lon),np.amax(lon)))

  # compute the diagonal distance between grid points
  grid_dist_km = great_circle_distance(lon[0,0], lat[0,0], lon[1,1], lat[1,1])
  print('INFO: diagonal distance in grid is %g' % grid_dist_km)
 
  # load observations but discard those too far away from the grid nodes
  obss = load_raws_observations(cfg['observations'], lat, lon, grid_dist_km)
  fm10 = build_observation_data(obss)
  print('INFO: %d different time instances found in observations' % len(fm10))

  # if a previous cycle is available (i.e. the wrfoutput is a valid file)
  if os.path.exists(cfg['wrf_output_prev']) and check_overlap(cfg['wrf_output_prev'],ts_now):

    # load the model as a wrfout with all default variables
    wrfout = WRFModelData(cfg['wrf_output_prev'])
    outts = wrfout['GMT']
    print("INFO: previous forecast [%s - %s] exists, running DA till %s" % (str(outts[0]),str(outts[-1]),str(ts_now)))

    # run from the start until now (retrieve fuel moisture, extended parameters, covariance matrix)
    model =  run_data_assimilation(wrfout, fm10, ts_now, cfg)
    # store this for the current time instance (fm, ep in the wrfinput, P next to it)
    d = netCDF4.Dataset(cfg['wrf_input'], 'r+')
    d.variables['FMC_GC'] = fm
    d.variables['FMEP'] = ep
    d.close()

    # store the covariance matrix alongside the wrfinput file
    dir = os.path.dirname(wrfin)
    store_covariance_matrix(P, os.path.join(dir, 'P.nc'))

  else:

    print("INFO: no previous forecast found, running DA from equilibrium at %s" % (str(ts_now)))
    # initialize from weather equilibrium and perform one DA step
    model = init_from_equilibrium(wrfin, fm10, ts_now, cfg)

    # store result in wrfinput dataset
    d = netCDF4.Dataset(cfg['wrf_input'], 'r+')
    fmcep = model.get_state()
    d.variables['FMC_GC'][0,:3,:,:] = fmcep[:,:,:3].transpose((2,0,1))
    d.variables['FMEP'][0,:,:,:] = fmcep[:,:,3:5].transpose((2,0,1))
    d.close()
    store_covariance_matrix(model.get_state_covar(), os.path.join(os.path.dirname(cfg['wrf_input']), 'P.nc'))

  return 0
def run_module():

    # configure diagnostics        
    init_diagnostics("results/kriging_test_diagnostics.txt")
    diagnostics().configure_tag("skdm_obs_res", True, True, True)
    diagnostics().configure_tag("skdm_obs_res_mean", True, True, True)
        
    wrf_data = WRFModelData('../real_data/witch_creek/realfire03_d04_20071022.nc')
    
    # read in vars
    lat, lon = wrf_data.get_lats(), wrf_data.get_lons()
    tm = wrf_data.get_times()
    rain = wrf_data['RAINNC']
    Ed, Ew = wrf_data.get_moisture_equilibria()
    
    # obtain sizes
    Nt = rain.shape[0]
    dom_shape = lat.shape
    locs = np.prod(dom_shape)
    
    # load station data, match to grid points and build observation records
    # load station data from files
    tz = pytz.timezone('US/Pacific')
    stations = [Station(os.path.join(station_data_dir, s), tz, wrf_data) for s in station_list]
    obs_data = build_observation_data(stations, 'fuel_moisture', wrf_data) 
    
    # construct initial vector
    mfm = MeanFieldModel()
    
    # set up parameters
    mod_res_std = np.ones_like(Ed[0,:,:]) * 0.05
    obs_res_std = np.ones((len(stations),)) * 0.1
    
    # construct a basemap representation of the area
    lat_rng = (np.min(lat), np.max(lat))
    lon_rng = (np.min(lon), np.max(lon))
    m = Basemap(llcrnrlon=lon_rng[0],llcrnrlat=lat_rng[0],
                urcrnrlon=lon_rng[1],urcrnrlat=lat_rng[1],
                projection = 'mill')

    plt.figure(figsize = (10, 6))
    
    # run model
    ndx = 1
    for t in range(1, Nt):
        model_time = wrf_data.get_times()[t]
        E = 0.5 * (Ed[t,:,:] + Ew[t,:,:])

        # if we have an observation somewhere in time, run kriging
        if model_time in obs_data:
            print("Time: %s, step: %d" % (str(model_time), t))
            
            mfm.fit_to_data(E, obs_data[model_time])
            Efit = mfm.predict_field(E)

            # krige data to observations
            K, V = simple_kriging_data_to_model(obs_data[model_time], obs_res_std, Efit, wrf_data, mod_res_std, t)
                
            plt.clf()
            plt.subplot(2,2,1)
            render_spatial_field(m, lon, lat, Efit, 'Equilibrium')
            plt.clim([0.0, 0.2])
            plt.colorbar()

            plt.subplot(2,2,2)
            render_spatial_field(m, lon, lat, K, 'Kriging field')
            plt.clim([0.0, 0.2])
            plt.colorbar()

            plt.subplot(2,2,3)
            render_spatial_field(m, lon, lat, V, 'Kriging variance')
            plt.clim([0.0, np.max(V)])
            plt.colorbar()
            
            plt.subplot(2,2,4)
            render_spatial_field(m, lon, lat, K - Efit, 'Kriging vs. mean field residuals')
#            plt.clim([0.0, np.max()])
            plt.colorbar()
            
            plt.savefig('model_outputs/kriging_test_t%03d.png' % (ndx))
            ndx += 1 
def run_module():

    # read in configuration file to execute run
    print("Reading configuration from [%s]" % sys.argv[1])

    with open(sys.argv[1]) as f:
        cfg = eval(f.read())

    # ensure output path exists
    if not os.path.isdir(cfg['output_dir']):
        os.mkdir(cfg['output_dir'])

    # configure diagnostics
    init_diagnostics(
        os.path.join(cfg['output_dir'], 'moisture_model_v1_diagnostics.txt'))
    diagnostics().configure_tag("skdm_obs_res", False, True, True)
    diagnostics().configure_tag("skdm_cov_cond", False, True, True)

    diagnostics().configure_tag("assim_mV", False, False, True)
    diagnostics().configure_tag("assim_K0", False, False, True)
    diagnostics().configure_tag("assim_K1", False, False, True)
    diagnostics().configure_tag("assim_data", False, False, True)
    diagnostics().configure_tag("assim_mresV", False, False, True)

    diagnostics().configure_tag("kriging_variance", False, False, True)
    diagnostics().configure_tag("kriging_obs_res_var", False, False, True)

    print("INFO: input file is [%s]." % cfg['input_file'])
    wrf_data = WRFModelData(cfg['input_file'], tz_name='US/Pacific')

    # read in vars
    lat, lon = wrf_data.get_lats(), wrf_data.get_lons()
    tm = wrf_data.get_local_times()
    rain = wrf_data['RAIN']
    Ed, Ew = wrf_data.get_moisture_equilibria()

    # find maximum moisture overall to set up visualization
    #    maxE = max(np.max(Ed), np.max(Ew)) * 1.2
    maxE = 0.3

    # obtain sizes
    Nt = rain.shape[0]
    dom_shape = lat.shape

    # load station data from files
    tz = pytz.timezone('US/Pacific')
    stations = [StationAdam() for s in station_list]
    for (s, sname) in zip(stations, station_list):
        s.load_station_data(os.path.join(station_data_dir, sname), tz)
        s.register_to_grid(wrf_data)
        s.set_measurement_variance('fm10', 0.05)

    # build the observation data structure indexed by time
    obs_data_fm10 = build_observation_data(stations, 'fm10', wrf_data, tm)

    # construct initial conditions
    E = 0.5 * (Ed[1, :, :] + Ew[1, :, :])

    # set up parameters
    Q = np.eye(9) * 0.001
    P0 = np.eye(9) * 0.01
    dt = 10.0 * 60
    K = np.zeros_like(E)
    V = np.zeros_like(E)
    mV = np.zeros_like(E)
    predicted_field = np.zeros_like(E)
    mresV = np.zeros_like(E)
    Kf_fn = np.zeros_like(E)
    Vf_fn = np.zeros_like(E)
    mid = np.zeros_like(E)
    Kg = np.zeros((dom_shape[0], dom_shape[1], 9))
    cV12 = np.zeros_like(E)

    # initialize the mean field model (default fit is 1.0 of equilibrium before new information comes in)
    mfm = MeanFieldModel(cfg['lock_gamma'])

    # construct model grid using standard fuel parameters
    Tk = np.array([1.0, 10.0, 100.0]) * 3600
    models = np.zeros(dom_shape, dtype=np.object)
    models_na = np.zeros_like(models)
    for pos in np.ndindex(dom_shape):
        models[pos] = CellMoistureModel((lat[pos], lon[pos]),
                                        3,
                                        E[pos],
                                        Tk,
                                        P0=P0)
        models_na[pos] = CellMoistureModel((lat[pos], lon[pos]),
                                           3,
                                           E[pos],
                                           Tk,
                                           P0=P0)

    m = None

    plt.figure(figsize=(12, 8))

    # run model
    for t in range(1, Nt):
        model_time = tm[t]
        print("Time: %s, step: %d" % (str(model_time), t))

        # pre-compute equilibrium moisture to save a lot of time
        E = 0.5 * (Ed[t, :, :] + Ew[t, :, :])

        # run the model update
        for pos in np.ndindex(dom_shape):
            i, j = pos
            models[pos].advance_model(Ed[t, i, j], Ew[t, i, j], rain[t, i, j],
                                      dt, Q)
            models_na[pos].advance_model(Ed[t, i, j], Ew[t, i, j],
                                         rain[t, i, j], dt, Q)

        # prepare visualization data
        f = np.zeros((dom_shape[0], dom_shape[1], 3))
        f_na = np.zeros((dom_shape[0], dom_shape[1], 3))
        for p in np.ndindex(dom_shape):
            f[p[0], p[1], :] = models[p].get_state()[:3]
            f_na[p[0], p[1], :] = models_na[p].get_state()[:3]
            mV[pos] = models[p].get_state_covar()[1, 1]
            cV12[pos] = models[p].get_state_covar()[0, 1]
            mid[p] = models[p].get_model_ids()[1]

        # run Kriging on each observed fuel type
        Kf = []
        Vf = []
        fn = []
        for obs_data, fuel_ndx in [(obs_data_fm10, 1)]:

            if model_time in obs_data:

                # fit the current estimation of the moisture field to the data
                base_field = f[:, :, fuel_ndx]
                mfm.fit_to_data(base_field, obs_data[model_time])

                # find differences (residuals) between observed measurements and nearest grid points
                # use this to update observation residual standard deviation
                obs_vals = np.array(
                    [o.get_value() for o in obs_data[model_time]])
                mod_vals = np.array([
                    f[:, :, fuel_ndx][o.get_nearest_grid_point()]
                    for o in obs_data[model_time]
                ])
                mod_na_vals = np.array([
                    f_na[:, :, fuel_ndx][o.get_nearest_grid_point()]
                    for o in obs_data[model_time]
                ])
                obs_re.update_with(obs_vals - mod_vals)
                diagnostics().push("kriging_obs_res_var",
                                   (t, np.mean(obs_re.get_variance())))

                # retrieve the variance of the model field
                mresV = mod_re.get_variance()

                # krige data to observations
                if cfg['kriging_strategy'] == 'uk':
                    Kf_fn, Vf_fn, gamma, mape = universal_kriging_data_to_model(
                        obs_data[model_time],
                        obs_re.get_variance()**0.5, base_field, wrf_data,
                        mresV**0.5, t)
                    # replace the stored gamma with the uk computed gamma
                    diagnostics().pull("mfm_gamma")[-1] = gamma
                    diagnostics().pull("mfm_mape")[-1] = mape
                    print("uk: replaced mfm_gamma %g, mfm_mape %g" %
                          (gamma, mape))

                    # update the residuals estimator with the current
                    mod_re.update_with(gamma * f[:, :, fuel_ndx] - Kf_fn)

                elif cfg['kriging_strategy'] == 'tsm':
                    # predict the moisture field using observed fuel type
                    predicted_field = mfm.predict_field(base_field)

                    # run the tsm kriging estimator
                    Kf_fn, Vf_fn = trend_surface_model_kriging(
                        obs_data[model_time], wrf_data, predicted_field)

                    # update the model residual estimator and get current best estimate of variance
                    mod_re.update_with(f[:, :, fuel_ndx] - predicted_field)

                else:
                    raise ValueError(
                        'Invalid kriging strategy [%s] in configuration.' %
                        cfg['kriiging_strategy'])

                krig_vals = np.array([
                    Kf_fn[o.get_nearest_grid_point()]
                    for o in obs_data[model_time]
                ])
                diagnostics().push(
                    "assim_data",
                    (t, fuel_ndx, obs_vals, krig_vals, mod_vals, mod_na_vals))
                plot_model_snapshot(cfg, tm, t, fuel_ndx, obs_vals, krig_vals,
                                    mod_vals, mod_na_vals)

                # append to storage for kriged fields in this time instant
                Kf.append(Kf_fn)
                Vf.append(Vf_fn)
                fn.append(fuel_ndx)

        # if there were any observations, run the kalman update step
        if len(fn) > 0:
            Nobs = len(fn)
            # run the kalman update in each model independently
            # gather the standard deviations of the moisture fuel after the Kalman update
            for pos in np.ndindex(dom_shape):
                O = np.zeros((Nobs, ))
                V = np.zeros((Nobs, Nobs))

                # construct observations for this position
                for i in range(Nobs):
                    O[i] = Kf[i][pos]
                    V[i, i] = Vf[i][pos]

                # execute the Kalman update
                Kij = models[pos].kalman_update(O, V, fn)
                Kg[pos[0], pos[1], :] = Kij[:, 0]

        # prepare visualization data
        f = np.zeros((dom_shape[0], dom_shape[1], 3))
        for p in np.ndindex(dom_shape):
            f[p[0], p[1], :] = models[p].get_state()[:3]

        plt.clf()
        plt.subplot(3, 3, 1)
        render_spatial_field_fast(m, lon, lat, f[:, :, 0], '1-hr fuel')
        plt.clim([0.0, maxE])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 2)
        render_spatial_field_fast(m, lon, lat, f[:, :, 1], '10-hr fuel')
        plt.clim([0.0, maxE])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 3)
        render_spatial_field_fast(m, lon, lat, f_na[:, :, 1],
                                  '10hr fuel - no assim')
        plt.clim([0.0, maxE])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 4)
        render_spatial_field_fast(m, lon, lat, Kg[:, :, 0], 'Kalman gain, fm1')
        plt.clim([0.0, 3.0])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 5)
        render_spatial_field_fast(m, lon, lat, Kg[:, :, 1],
                                  'Kalman gain, fm10')
        plt.clim([0.0, 1.0])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 6)
        render_spatial_field_fast(m, lon, lat, Kf_fn, 'Kriging field')
        plt.clim([0.0, maxE])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 7)
        render_spatial_field_fast(m, lon, lat, mid, 'Model ids')
        plt.clim([0.0, 5.0])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 8)
        render_spatial_field_fast(m, lon, lat, Vf_fn, 'Kriging var')
        plt.clim([0.0, np.max(Vf_fn)])
        plt.axis('off')
        plt.colorbar()
        plt.subplot(3, 3, 9)
        render_spatial_field_fast(m, lon, lat, mresV, 'fm10 model var')
        plt.clim([0.0, np.max(mresV)])
        plt.axis('off')
        plt.colorbar()

        plt.savefig(
            os.path.join(cfg['output_dir'], 'moisture_model_t%03d.png' % t))

        # push new diagnostic outputs
        diagnostics().push("assim_K0", (t, np.mean(Kg[:, :, 0])))
        diagnostics().push("assim_K1", (t, np.mean(Kg[:, :, 1])))
        diagnostics().push("assim_mV", (t, np.mean(mV)))
        diagnostics().push("assim_mresV", (t, np.mean(mresV)))
        diagnostics().push("kriging_variance", (t, np.mean(Vf_fn)))

    # store the gamma coefficients
    with open(os.path.join(cfg['output_dir'], 'gamma.txt'), 'w') as f:
        f.write(str(diagnostics().pull('mfm_gamma')))

    # make a plot of gammas
    plt.figure()
    plt.plot(diagnostics().pull('mfm_gamma'))
    plt.title('Mean field model - gamma')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_gamma.png'))

    plt.figure()
    plt.plot(diagnostics().pull('skdm_cov_cond'))
    plt.title('Condition number of covariance matrix')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_sigma_cond.png'))

    # make
    # make a plot for each substation
    plt.figure()
    D = diagnostics().pull("assim_data")
    for i in range(len(stations)):
        plt.clf()
        # get data for the i-th station
        t_i = [o[0] for o in D]
        obs_i = [o[2][i] for o in D]
        krig_i = [o[3][i] for o in D]
        mod_i = [o[4][i] for o in D]
        mod_na_i = [o[5][i] for o in D]
        mx = max(max(obs_i), max(mod_i), max(krig_i), max(mod_i))
        plt.plot(t_i, obs_i, 'ro')
        plt.plot(t_i, krig_i, 'bo-')
        plt.plot(t_i, mod_i, 'kx-', linewidth=1.5)
        plt.plot(t_i, mod_na_i, 'mx-')
        plt.ylim([0.0, 1.1 * mx])
        plt.legend(['Obs.', 'Kriged', 'Model', 'NoAssim'])
        plt.title('Station observations fit to model and kriging field')
        plt.savefig(
            os.path.join(cfg['output_dir'], 'station%02d.png' % (i + 1)))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("assim_K1")],
             [d[1] for d in diagnostics().pull("assim_K1")], 'ro-')
    plt.title('Average Kalman gain')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_kalman_gain_10hr.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("assim_K0")],
             [d[1] for d in diagnostics().pull("assim_K0")], 'ro-')
    plt.title('Average Kalman gain')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_kalman_gain_1hr.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("assim_mV")],
             [d[1] for d in diagnostics().pull("assim_mV")], 'ro-')
    plt.title('Average model variance')
    plt.savefig(os.path.join(cfg['output_dir'],
                             'plot_fm10_model_variance.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("assim_mresV")],
             [d[1] for d in diagnostics().pull("assim_mresV")], 'ro-')
    plt.title('Average fm10 residual variance')
    plt.savefig(
        os.path.join(cfg['output_dir'],
                     'plot_fm10_model_residual_variance.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("kriging_variance")],
             [d[1] for d in diagnostics().pull("kriging_variance")], 'ro-')
    plt.title('Kriging field variance')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_kriging_variance.png'))

    plt.figure()
    plt.plot([d[0] for d in diagnostics().pull("kriging_obs_res_var")],
             [d[1] for d in diagnostics().pull("kriging_obs_res_var")], 'ro-')
    plt.title('Observation residual variance')
    plt.savefig(
        os.path.join(cfg['output_dir'],
                     'plot_observation_residual_variance.png'))

    plt.figure()
    plt.plot(diagnostics().pull("mfm_mape"), 'ro-', linewidth=2)
    plt.title('Mean absolute prediction error of station data')
    plt.savefig(os.path.join(cfg['output_dir'], 'plot_station_mape.png'))

    diagnostics().dump_store(os.path.join(cfg['output_dir'],
                                          'diagnostics.bin'))

    # as a last step encode all the frames as video
    os.system(
        "cd %s; avconv -qscale 1 -r 20 -b 9600 -i moisture_model_t%%03d.png video.mp4"
        % cfg['output_dir'])