def initialize(self, num_reals): ''' (re)initialize the process ''' self.num_reals = int(num_reals) self.parensemble = ParameterEnsemble(self.pst) self.parensemble.draw(cov=self.parcov, num_reals=num_reals) self.obsensemble_0 = ObservationEnsemble(self.pst) self.obsensemble_0.draw(cov=self.obscov, num_reals=num_reals) self.obsensemble = self.obsensemble_0.copy() if self.parcov.isdiagonal: self.half_parcov_diag = self.parcov.inv.sqrt else: self.half_parcov_diag = Cov(x=np.diag(self.parcov.x), names=self.parcov.col_names, isdiagonal=True).inv.sqrt #if self.obscov.isdiagonal: #self.half_obscov_inv = self.obscov.inv.sqrt # else: # self.half_obscov_diag = Cov(x=np.diag(self.obscov.x), # names=self.obscov.col_names, # isdiagonal=True) self.delta_par_prior = self._calc_delta_par() self.__initialized = True
def __init__(self, **kwargs): warnings.warn("pyemu.MonteCarlo class is deprecated. "+\ "Please use the ensemble classes directly",PyemuWarning) super(MonteCarlo, self).__init__(**kwargs) assert self.pst is not None, \ "monte carlo requires a pest control file" self.parensemble = ParameterEnsemble(pst=self.pst) self.obsensemble = ObservationEnsemble(pst=self.pst)
def draw(self, num_reals=1, par_file=None, obs=False, enforce_bounds=False, cov=None, how="gaussian"): """draw stochastic realizations of parameters and optionally observations Parameters: ---------- num_reals (int): number of realization to generate par_file (str): parameter file to use as mean values obs (bool): add a realization of measurement noise to obs enforce_bounds (bool): enforce parameter bounds in control file how (str): type of distribution. Must be in ["gaussian","uniform"] Returns: None Raises: None """ if par_file is not None: self.pst.parrep(par_file) how = how.lower().strip() assert how in ["gaussian", "uniform"] if cov is not None: assert isinstance(cov, Cov) if how == "uniform": raise Exception("MonteCarlo.draw() error: 'how'='uniform'," +\ " 'cov' arg cannot be passed") else: cov = self.parcov self.parensemble = ParameterEnsemble(pst=self.pst) self.obsensemble = ObservationEnsemble(pst=self.pst) self.log("generating {0:d} parameter realizations".format(num_reals)) self.parensemble.draw(cov, num_reals=num_reals, how=how, enforce_bounds=enforce_bounds) #if enforce_bounds: # self.parensemble.enforce() self.log("generating {0:d} parameter realizations".format(num_reals)) if obs: self.log( "generating {0:d} observation realizations".format(num_reals)) self.obsensemble.draw(self.obscov, num_reals=num_reals) self.log( "generating {0:d} observation realizations".format(num_reals))
def initialize(self,num_reals,init_lambda=None): ''' (re)initialize the process ''' assert num_reals > 1 # initialize the phi report csv self.phi_csv = open(self.pst.filename+".iobj.csv",'w') self.phi_csv.write("iter_num,total_runs,lambda,min,max,mean,median,std,") self.phi_csv.write(','.join(["{0:010d}".\ format(i+1) for i in range(num_reals)])) self.phi_csv.write('\n') self.total_runs = 0 # this matrix gets used a lot, so only calc once and store self.obscov_inv_sqrt = self.obscov.get(self.pst.nnz_obs_names).inv.sqrt if self.restart: print("restarting...ignoring num_reals") raise NotImplementedError() df = pd.read_csv(self.pst.filename+self.paren_prefix.format(self.restart_iter)) self.parensemble_0 = ParameterEnsemble.from_dataframe(df=df,pst=self.pst) self.parensemble = self.parensemble_0.copy() df = pd.read_csv(self.pst.filename+self.obsen_prefix.format(0)) self.obsensemble_0 = ObservationEnsemble.from_dataframe(df=df.loc[:,self.pst.nnz_obs_names], pst=self.pst) # this matrix gets used a lot, so only calc once self.obs0_matrix = self.obsensemble_0.as_pyemu_matrix() df = pd.read_csv(self.pst.filename+self.obsen_prefix.format(self.restart_iter)) self.obsensemble = ObservationEnsemble.from_dataframe(df=df.loc[:,self.pst.nnz_obs_names], pst=self.pst) assert self.parensemble.shape[0] == self.obsensemble.shape[0] self.num_reals = self.parensemble.shape[0] else: self.num_reals = int(num_reals) self.parensemble_0 = ParameterEnsemble(self.pst) self.parensemble_0.draw(cov=self.parcov,num_reals=num_reals) self.parensemble_0.enforce() self.parensemble = self.parensemble_0.copy() self.parensemble_0.to_csv(self.pst.filename +\ self.paren_prefix.format(0)) self.obsensemble_0 = ObservationEnsemble(self.pst) self.obsensemble_0.draw(cov=self.obscov,num_reals=num_reals) #self.obsensemble = self.obsensemble_0.copy() # save the base obsensemble self.obsensemble_0.to_csv(self.pst.filename +\ self.obsen_prefix.format(-1)) self.obs0_matrix = self.obsensemble_0.nonzero.as_pyemu_matrix() # run the initial parameter ensemble self.obsensemble = self._calc_obs(self.parensemble) self.obsensemble.to_csv(self.pst.filename +\ self.obsen_prefix.format(0)) self.current_phi_vec = self._calc_phi_vec(self.obsensemble) self._phi_report(self.current_phi_vec,0.0) self.last_best_mean = self.current_phi_vec.mean() self.last_best_std = self.current_phi_vec.std() if init_lambda is not None: self.current_lambda = float(init_lambda) else: #following chen and oliver x = self.last_best_mean / (2.0 * float(self.obsensemble.shape[1])) self.current_lambda = 10.0**(np.floor(np.log10(x))) # if using the approximate form of the algorithm, let # the parameter scaling matrix be the identity matrix # jwhite - dec 5 2016 - using the actual parcov inv # for upgrades seems to be pushing parameters around # too much. for now, just not using it, maybe # better choices of lambda will tame it if self.use_approx: self.half_parcov_diag = 1.0 else: # if self.parcov.isdiagonal: # self.half_parcov_diag = self.parcov.sqrt.inv # else: # self.half_parcov_diag = Cov(x=np.diag(self.parcov.x), # names=self.parcov.col_names, # isdiagonal=True).inv.sqrt self.half_parcov_diag = 1.0 self.delta_par_prior = self._calc_delta_par(self.parensemble_0) u,s,v = self.delta_par_prior.pseudo_inv_components() self.Am = u * s.inv self.__initialized = True
def initialize( self, num_reals=1, init_lambda=None, enforce_bounds="reset", parensemble=None, obsensemble=None, restart_obsensemble=None, ): """Initialize the iES process. Depending on arguments, draws or loads initial parameter observations ensembles and runs the initial parameter ensemble Parameters ---------- num_reals : int the number of realizations to draw. Ignored if parensemble/obsensemble are not None init_lambda : float the initial lambda to use. During subsequent updates, the lambda is updated according to upgrade success enforce_bounds : str how to enfore parameter bound transgression. options are reset, drop, or None parensemble : pyemu.ParameterEnsemble or str a parameter ensemble or filename to use as the initial parameter ensemble. If not None, then obsenemble must not be None obsensemble : pyemu.ObservationEnsemble or str an observation ensemble or filename to use as the initial observation ensemble. If not None, then parensemble must not be None restart_obsensemble : pyemu.ObservationEnsemble or str an observation ensemble or filename to use as an evaluated observation ensemble. If not None, this will skip the initial parameter ensemble evaluation - user beware! Example ------- ``>>>import pyemu`` ``>>>es = pyemu.EnsembleSmoother(pst="pest.pst")`` ``>>>es.initialize(num_reals=100)`` """ ''' (re)initialize the process ''' # initialize the phi report csv self.enforce_bounds = enforce_bounds self.total_runs = 0 # this matrix gets used a lot, so only calc once and store self.obscov_inv_sqrt = self.obscov.get(self.pst.nnz_obs_names).inv.sqrt if parensemble is not None and obsensemble is not None: self.logger.log("initializing with existing ensembles") if isinstance(parensemble, str): self.logger.log("loading parensemble from file") if not os.path.exists(obsensemble): self.logger.lraise("can not find parensemble file: {0}".\ format(parensemble)) df = pd.read_csv(parensemble, index_col=0) #df.index = [str(i) for i in df.index] self.parensemble_0 = ParameterEnsemble.from_dataframe( df=df, pst=self.pst) self.logger.log("loading parensemble from file") elif isinstance(parensemble, ParameterEnsemble): self.parensemble_0 = parensemble.copy() else: raise Exception("unrecognized arg type for parensemble, " +\ "should be filename or ParameterEnsemble" +\ ", not {0}".format(type(parensemble))) self.parensemble = self.parensemble_0.copy() if isinstance(obsensemble, str): self.logger.log("loading obsensemble from file") if not os.path.exists(obsensemble): self.logger.lraise("can not find obsensemble file: {0}".\ format(obsensemble)) df = pd.read_csv(obsensemble, index_col=0).loc[:, self.pst.nnz_obs_names] #df.index = [str(i) for i in df.index] self.obsensemble_0 = ObservationEnsemble.from_dataframe( df=df, pst=self.pst) self.logger.log("loading obsensemble from file") elif isinstance(obsensemble, ObservationEnsemble): self.obsensemble_0 = obsensemble.copy() else: raise Exception("unrecognized arg type for obsensemble, " +\ "should be filename or ObservationEnsemble" +\ ", not {0}".format(type(obsensemble))) assert self.parensemble_0.shape[0] == self.obsensemble_0.shape[0] #self.num_reals = self.parensemble_0.shape[0] num_reals = self.parensemble.shape[0] self.logger.log("initializing with existing ensembles") else: self.logger.log( "initializing smoother with {0} realizations".format( num_reals)) #self.num_reals = int(num_reals) #assert self.num_reals > 1 self.logger.log("initializing parensemble") #self.parensemble_0 = ParameterEnsemble(self.pst) #self.parensemble_0.draw(cov=self.parcov,num_reals=num_reals) self.parensemble_0 = pyemu.ParameterEnsemble.from_gaussian_draw( ParameterEnsemble(self.pst), self.parcov, num_reals=num_reals) self.parensemble_0.enforce(enforce_bounds=enforce_bounds) self.logger.log("initializing parensemble") self.parensemble = self.parensemble_0.copy() self.parensemble_0.to_csv(self.pst.filename +\ self.paren_prefix.format(0)) self.logger.log("initializing parensemble") self.logger.log("initializing obsensemble") #self.obsensemble_0 = ObservationEnsemble(self.pst) #self.obsensemble_0.draw(cov=self.obscov,num_reals=num_reals) self.obsensemble_0 = pyemu.ObservationEnsemble.from_id_gaussian_draw( ObservationEnsemble(self.pst), num_reals=num_reals) #self.obsensemble = self.obsensemble_0.copy() # save the base obsensemble self.obsensemble_0.to_csv(self.pst.filename +\ self.obsen_prefix.format(-1)) self.logger.log("initializing obsensemble") self.logger.log( "initializing smoother with {0} realizations".format( num_reals)) self.obs0_matrix = self.obsensemble_0.nonzero.as_pyemu_matrix() self.enforce_bounds = enforce_bounds self.phi_csv = open(self.pst.filename + ".iobj.csv", 'w') self.phi_csv.write( "iter_num,total_runs,lambda,min,max,mean,median,std,") self.phi_csv.write(','.join(["{0:010d}". \ format(i + 1) for i in range(num_reals)])) self.phi_csv.write('\n') self.phi_act_csv = open(self.pst.filename + ".iobj.actual.csv", 'w') self.phi_act_csv.write( "iter_num,total_runs,lambda,min,max,mean,median,std,") self.phi_act_csv.write(','.join(["{0:010d}". \ format(i + 1) for i in range(num_reals)])) self.phi_act_csv.write('\n') if restart_obsensemble is not None: self.logger.log( "loading restart_obsensemble {0}".format(restart_obsensemble)) failed_runs, self.obsensemble = self._load_obs_ensemble( restart_obsensemble) assert self.obsensemble.shape[0] == self.obsensemble_0.shape[0] assert list(self.obsensemble.columns) == list( self.obsensemble_0.columns) self.logger.log( "loading restart_obsensemble {0}".format(restart_obsensemble)) else: # run the initial parameter ensemble self.logger.log("evaluating initial ensembles") failed_runs, self.obsensemble = self._calc_obs(self.parensemble) self.obsensemble.to_csv(self.pst.filename +\ self.obsen_prefix.format(0)) self.logger.log("evaluating initial ensembles") if failed_runs is not None: self.logger.warn("dropping failed realizations") #failed_runs_str = [str(f) for f in failed_runs] #self.parensemble = self.parensemble.drop(failed_runs) #self.obsensemble = self.obsensemble.drop(failed_runs) self.parensemble.loc[failed_runs, :] = np.NaN self.parensemble = self.parensemble.dropna() self.obsensemble.loc[failed_runs, :] = np.NaN self.obsensemble = self.obsensemble.dropna() self.current_phi_vec = self._calc_phi_vec(self.obsensemble) if self.drop_bad_reals is not None: drop_idx = np.argwhere( self.current_phi_vec > self.drop_bad_reals).flatten() run_ids = self.obsensemble.index.values drop_idx = run_ids[drop_idx] if len(drop_idx) == self.obsensemble.shape[0]: raise Exception("dropped all realizations as 'bad'") if len(drop_idx) > 0: self.logger.warn("{0} realizations dropped as 'bad' (indices :{1})".\ format(len(drop_idx),','.join([str(d) for d in drop_idx]))) self.parensemble.loc[drop_idx, :] = np.NaN self.parensemble = self.parensemble.dropna() self.obsensemble.loc[drop_idx, :] = np.NaN self.obsensemble = self.obsensemble.dropna() self.current_phi_vec = self._calc_phi_vec(self.obsensemble) self._phi_report(self.phi_csv, self.current_phi_vec, 0.0) self._phi_report(self.phi_act_csv, self.obsensemble.phi_vector.values, 0.0) self.last_best_mean = self.current_phi_vec.mean() self.last_best_std = self.current_phi_vec.std() self.logger.statement("initial phi (mean, std): {0:15.6G},{1:15.6G}".\ format(self.last_best_mean,self.last_best_std)) if init_lambda is not None: self.current_lambda = float(init_lambda) else: #following chen and oliver x = self.last_best_mean / (2.0 * float(self.obsensemble.shape[1])) self.current_lambda = 10.0**(np.floor(np.log10(x))) # if using the approximate form of the algorithm, let # the parameter scaling matrix be the identity matrix # jwhite - dec 5 2016 - using the actual parcov inv # for upgrades seems to be pushing parameters around # too much. for now, just not using it, maybe # better choices of lambda will tame it self.logger.statement("current lambda:{0:15.6g}".format( self.current_lambda)) if self.use_approx_prior: self.logger.statement("using approximate parcov in solution") self.half_parcov_diag = 1.0 else: #self.logger.statement("using full parcov in solution") # if self.parcov.isdiagonal: # self.half_parcov_diag = self.parcov.sqrt.inv # else: # self.half_parcov_diag = Cov(x=np.diag(self.parcov.x), # names=self.parcov.col_names, # isdiagonal=True).inv.sqrt self.half_parcov_diag = 1.0 self.delta_par_prior = self._calc_delta_par(self.parensemble_0) u, s, v = self.delta_par_prior.pseudo_inv_components() self.Am = u * s.inv self.__initialized = True
def __init__(self, **kwargs): super(MonteCarlo, self).__init__(**kwargs) assert self.pst is not None, \ "monte carlo requires a pest control file" self.parensemble = ParameterEnsemble(pst=self.pst) self.obsensemble = ObservationEnsemble(pst=self.pst)
def draw(self, num_reals=1, par_file=None, obs=False, enforce_bounds=None, cov=None, how="gaussian"): """draw stochastic realizations of parameters and optionally observations, filling MonteCarlo.parensemble and optionally MonteCarlo.obsensemble. Parameters ---------- num_reals : int number of realization to generate par_file : str parameter file to use as mean values. If None, use MonteCarlo.pst.parameter_data.parval1. Default is None obs : bool add a realization of measurement noise to observation values, forming MonteCarlo.obsensemble.Default is False enforce_bounds : str enforce parameter bounds based on control file information. options are 'reset', 'drop' or None. Default is None how : str type of distribution to draw from. Must be in ["gaussian","uniform"] default is "gaussian". Example ------- ``>>>import pyemu`` ``>>>mc = pyemu.MonteCarlo(pst="pest.pst")`` ``>>>mc.draw(1000)`` """ if par_file is not None: self.pst.parrep(par_file) how = how.lower().strip() assert how in ["gaussian", "uniform"] if cov is not None: assert isinstance(cov, Cov) if how == "uniform": raise Exception("MonteCarlo.draw() error: 'how'='uniform'," +\ " 'cov' arg cannot be passed") else: cov = self.parcov self.parensemble = ParameterEnsemble(pst=self.pst) self.obsensemble = ObservationEnsemble(pst=self.pst) self.log("generating {0:d} parameter realizations".format(num_reals)) self.parensemble.draw(cov, num_reals=num_reals, how=how, enforce_bounds=enforce_bounds) #if enforce_bounds: # self.parensemble.enforce() self.log("generating {0:d} parameter realizations".format(num_reals)) if obs: self.log( "generating {0:d} observation realizations".format(num_reals)) self.obsensemble.draw(self.obscov, num_reals=num_reals) self.log( "generating {0:d} observation realizations".format(num_reals))
def initialize(self, num_reals=1, init_lambda=None, enforce_bounds="reset", parensemble=None, obsensemble=None, restart_obsensemble=None): ''' (re)initialize the process ''' # initialize the phi report csv self.enforce_bounds = enforce_bounds self.phi_csv = open(self.pst.filename + ".iobj.csv", 'w') self.phi_csv.write( "iter_num,total_runs,lambda,min,max,mean,median,std,") self.phi_csv.write(','.join(["{0:010d}".\ format(i+1) for i in range(num_reals)])) self.phi_csv.write('\n') self.total_runs = 0 # this matrix gets used a lot, so only calc once and store self.obscov_inv_sqrt = self.obscov.get(self.pst.nnz_obs_names).inv.sqrt if parensemble is not None and obsensemble is not None: self.logger.log("initializing with existing ensembles") if isinstance(parensemble, str): self.logger.log("loading parensemble from file") if not os.path.exists(obsensemble): self.logger.lraise("can not find parensemble file: {0}".\ format(parensemble)) df = pd.read_csv(parensemble, index_col=0) #df.index = [str(i) for i in df.index] self.parensemble_0 = ParameterEnsemble.from_dataframe( df=df, pst=self.pst) self.logger.log("loading parensemble from file") elif isinstance(parensemble, ParameterEnsemble): self.parensemble_0 = parensemble.copy() else: raise Exception("unrecognized arg type for parensemble, " +\ "should be filename or ParameterEnsemble" +\ ", not {0}".format(type(parensemble))) self.parensemble = self.parensemble_0.copy() if isinstance(obsensemble, str): self.logger.log("loading obsensemble from file") if not os.path.exists(obsensemble): self.logger.lraise("can not find obsensemble file: {0}".\ format(obsensemble)) df = pd.read_csv(obsensemble, index_col=0).loc[:, self.pst.nnz_obs_names] #df.index = [str(i) for i in df.index] self.obsensemble_0 = ObservationEnsemble.from_dataframe( df=df, pst=self.pst) self.logger.log("loading obsensemble from file") elif isinstance(obsensemble, ObservationEnsemble): self.obsensemble_0 = obsensemble.copy() else: raise Exception("unrecognized arg type for obsensemble, " +\ "should be filename or ObservationEnsemble" +\ ", not {0}".format(type(obsensemble))) assert self.parensemble_0.shape[0] == self.obsensemble_0.shape[0] #self.num_reals = self.parensemble_0.shape[0] self.logger.log("initializing with existing ensembles") else: self.logger.log( "initializing smoother with {0} realizations".format( num_reals)) #self.num_reals = int(num_reals) #assert self.num_reals > 1 self.logger.log("initializing parensemble") self.parensemble_0 = ParameterEnsemble(self.pst) self.parensemble_0.draw(cov=self.parcov, num_reals=num_reals) self.parensemble_0.enforce(enforce_bounds=enforce_bounds) self.logger.log("initializing parensemble") self.parensemble = self.parensemble_0.copy() self.parensemble_0.to_csv(self.pst.filename +\ self.paren_prefix.format(0)) self.logger.log("initializing parensemble") self.logger.log("initializing obsensemble") self.obsensemble_0 = ObservationEnsemble(self.pst) self.obsensemble_0.draw(cov=self.obscov, num_reals=num_reals) #self.obsensemble = self.obsensemble_0.copy() # save the base obsensemble self.obsensemble_0.to_csv(self.pst.filename +\ self.obsen_prefix.format(-1)) self.logger.log("initializing obsensemble") self.logger.log( "initializing smoother with {0} realizations".format( num_reals)) self.obs0_matrix = self.obsensemble_0.nonzero.as_pyemu_matrix() self.enforce_bounds = enforce_bounds if restart_obsensemble is not None: self.logger.log( "loading restart_obsensemble {0}".format(restart_obsensemble)) failed_runs, self.obsensemble = self._load_obs_ensemble( restart_obsensemble) assert self.obsensemble.shape[0] == self.obsensemble_0.shape[0] assert list(self.obsensemble.columns) == list( self.obsensemble_0.columns) self.logger.log( "loading restart_obsensemble {0}".format(restart_obsensemble)) else: # run the initial parameter ensemble self.logger.log("evaluating initial ensembles") failed_runs, self.obsensemble = self._calc_obs(self.parensemble) self.obsensemble.to_csv(self.pst.filename +\ self.obsen_prefix.format(0)) self.logger.log("evaluating initial ensembles") if failed_runs is not None: self.logger.warn("dropping failed realizations") #failed_runs_str = [str(f) for f in failed_runs] self.parensemble = self.parensemble.drop(failed_runs) self.obsensemble = self.obsensemble.drop(failed_runs) self.current_phi_vec = self._calc_phi_vec(self.obsensemble) self._phi_report(self.current_phi_vec, 0.0) self.last_best_mean = self.current_phi_vec.mean() self.last_best_std = self.current_phi_vec.std() self.logger.statement("initial phi (mean, std): {0:15.6G},{1:15.6G}".\ format(self.last_best_mean,self.last_best_std)) if init_lambda is not None: self.current_lambda = float(init_lambda) else: #following chen and oliver x = self.last_best_mean / (2.0 * float(self.obsensemble.shape[1])) self.current_lambda = 10.0**(np.floor(np.log10(x))) # if using the approximate form of the algorithm, let # the parameter scaling matrix be the identity matrix # jwhite - dec 5 2016 - using the actual parcov inv # for upgrades seems to be pushing parameters around # too much. for now, just not using it, maybe # better choices of lambda will tame it self.logger.statement("current lambda:{0:15.6g}".format( self.current_lambda)) if self.use_approx_prior: self.logger.statement("using approximate parcov in solution") self.half_parcov_diag = 1.0 else: #self.logger.statement("using full parcov in solution") # if self.parcov.isdiagonal: # self.half_parcov_diag = self.parcov.sqrt.inv # else: # self.half_parcov_diag = Cov(x=np.diag(self.parcov.x), # names=self.parcov.col_names, # isdiagonal=True).inv.sqrt self.half_parcov_diag = 1.0 self.delta_par_prior = self._calc_delta_par(self.parensemble_0) u, s, v = self.delta_par_prior.pseudo_inv_components() self.Am = u * s.inv self.__initialized = True