コード例 #1
0
ファイル: smoother.py プロジェクト: wk1984/pyemu
 def _calc_obs(self):
     '''
     propagate the ensemble forward...
     '''
     self.parensemble.to_csv(os.path.join("sweep_in.csv"))
     #os.chdir("smoother")
     print(os.listdir('.'))
     os.system("sweep {0}".format(self.pst.filename))
     #os.chdir('..')
     obs = ObservationEnsemble.from_csv(os.path.join('sweep_out.csv'))
     obs.columns = [item.lower() for item in obs.columns]
     self.obsensemble = ObservationEnsemble.from_dataframe(df=obs.loc[:,self.obscov.row_names],pst=self.pst)
     #todo: modifiy sweep to be interactive...
     return
コード例 #2
0
ファイル: smoother.py プロジェクト: jroth-usgs/pyemu
    def initialize(self,num_reals):
        '''
        (re)initialize the process
        '''
        self.num_reals = int(num_reals)
        self.parensemble = ParameterEnsemble(self.pst)
        self.parensemble.draw(cov=self.parcov,num_reals=num_reals)

        self.obsensemble_0 = ObservationEnsemble(self.pst)
        self.obsensemble_0.draw(cov=self.obscov,num_reals=num_reals)
        self.obsensemble = self.obsensemble_0.copy()

        if self.parcov.isdiagonal:
            self.half_parcov_diag = self.parcov.inv.sqrt
        else:
            self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
                                        names=self.parcov.col_names,
                                        isdiagonal=True).inv.sqrt
        #if self.obscov.isdiagonal:
        #self.half_obscov_inv = self.obscov.inv.sqrt
       # else:
        #    self.half_obscov_diag = Cov(x=np.diag(self.obscov.x),
        #                                names=self.obscov.col_names,
        #                                isdiagonal=True)

        self.delta_par_prior = self._calc_delta_par()

        self.__initialized = True
コード例 #3
0
ファイル: smoother.py プロジェクト: zandy19/pyemu
    def _calc_obs_local(self, parensemble):
        '''
        propagate the ensemble forward using sweep.
        '''
        parensemble.to_csv(self.sweep_in_csv)
        if self.num_slaves > 0:
            port = 4004

            def master():
                os.system("sweep {0} /h :{1} >nul".format(
                    self.pst.filename, port))

            master_thread = threading.Thread(target=master)
            master_thread.start()
            time.sleep(
                1.5
            )  #just some time for the master to get up and running to take slaves
            pyemu.utils.start_slaves("template",
                                     "sweep",
                                     self.pst.filename,
                                     self.num_slaves,
                                     slave_root='.',
                                     port=port)
            master_thread.join()
        else:
            os.system("sweep {0}".format(self.pst.filename))

        obs = pd.read_csv(self.sweep_out_csv)
        obs.columns = [item.lower() for item in obs.columns]
        self.total_runs += obs.shape[0]
        return ObservationEnsemble.from_dataframe(
            df=obs.loc[:, self.obscov.row_names], pst=self.pst)
コード例 #4
0
 def __init__(self, **kwargs):
     warnings.warn("pyemu.MonteCarlo class is deprecated.  "+\
                   "Please use the ensemble classes directly",PyemuWarning)
     super(MonteCarlo, self).__init__(**kwargs)
     assert self.pst is not None, \
         "monte carlo requires a pest control file"
     self.parensemble = ParameterEnsemble(pst=self.pst)
     self.obsensemble = ObservationEnsemble(pst=self.pst)
コード例 #5
0
ファイル: mc.py プロジェクト: jroth-usgs/pyemu
    def draw(self, num_reals=1, par_file = None, obs=False,
             enforce_bounds=False,cov=None, how="gaussian"):
        """draw stochastic realizations of parameters and
           optionally observations

        Parameters:
        ----------
            num_reals (int): number of realization to generate

            par_file (str): parameter file to use as mean values

            obs (bool): add a realization of measurement noise to obs

            enforce_bounds (bool): enforce parameter bounds in control file

            how (str): type of distribution.  Must be in ["gaussian","uniform"]
        Returns:
            None
        Raises:
            None
        """
        if par_file is not None:
            self.pst.parrep(par_file)
        how = how.lower().strip()
        assert how in ["gaussian","uniform"]

        if cov is not None:
            assert isinstance(cov,Cov)
            if how == "uniform":
                raise Exception("MonteCarlo.draw() error: 'how'='uniform'," +\
                                " 'cov' arg cannot be passed")
        else:
            cov = self.parcov

        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)
        self.log("generating {0:d} parameter realizations".format(num_reals))
        self.parensemble.draw(cov,num_reals=num_reals, how=how)
        if enforce_bounds:
            self.parensemble.enforce()
        self.log("generating {0:d} parameter realizations".format(num_reals))
        if obs:
            self.log("generating {0:d} observation realizations".format(num_reals))
            self.obsensemble.draw(self.obscov,num_reals=num_reals)
            self.log("generating {0:d} observation realizations".format(num_reals))
コード例 #6
0
 def _load_obs_ensemble(self,filename):
     if not os.path.exists(filename):
         self.logger.lraise("obsensemble file {0} does not exists".format(filename))
     obs = pd.read_csv(filename)
     obs.columns = [item.lower() for item in obs.columns]
     self.raw_sweep_out = obs.copy() # save this for later to support restart
     assert "input_run_id" in obs.columns,\
         "'input_run_id' col missing...need newer version of sweep"
     obs.index = obs.input_run_id
     failed_runs = None
     if 1 in obs.failed_flag.values:
         failed_runs = obs.loc[obs.failed_flag == 1].index.values
         self.logger.warn("{0} runs failed (indices: {1})".\
                          format(len(failed_runs),','.join([str(f) for f in failed_runs])))
     obs = ObservationEnsemble.from_dataframe(df=obs.loc[:,self.obscov.row_names],
                                                            pst=self.pst)
     if obs.isnull().values.any():
         self.logger.lraise("_calc_obs() error: NaNs in obsensemble")
     return failed_runs, obs
コード例 #7
0
class MonteCarlo(LinearAnalysis):
    """LinearAnalysis derived type for monte carlo analysis

    Parameters
    ----------
    **kwargs : dict
        dictionary of keyword arguments.  See pyemu.LinearAnalysis for
        complete definitions

    Attributes
    ----------
    parensemble : pyemu.ParameterEnsemble
    obsensemble : pyemu.ObservationEnsemble

    Returns
    -------
    MonteCarlo : MonteCarlo

    Example
    -------
    ``>>>import pyemu``

    ``>>>mc = pyemu.MonteCarlo(pst="pest.pst")``

    """
    def __init__(self, **kwargs):
        super(MonteCarlo, self).__init__(**kwargs)
        assert self.pst is not None, \
            "monte carlo requires a pest control file"
        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)

    @property
    def num_reals(self):
        """ get the number of realizations in the parameter ensemble

        Returns
        -------
        num_real : int
        
        """
        return self.parensemble.shape[0]

    def get_nsing(self, epsilon=1.0e-4):
        """ get the number of solution space dimensions given
            a ratio between the largest and smallest singular
            values

        Parameters
        ----------
        epsilon: float
            singular value ratio

        Returns
        -------
        nsing : float
            number of singular components above the epsilon ratio threshold
        
        Note
        -----
            If nsing == nadj_par, then None is returned
        
        """
        mx = self.xtqx.shape[0]
        nsing = mx - np.searchsorted(
            np.sort((self.xtqx.s.x / self.xtqx.s.x.max())[:, 0]), epsilon)
        if nsing == mx:
            self.logger.warn("optimal nsing=npar")
            nsing = None
        return nsing

    def get_null_proj(self, nsing=None):
        """ get a null-space projection matrix of XTQX

        Parameters
        ----------
        nsing: int
            optional number of singular components to use
            If Nonte, then nsing is determined from
            call to MonteCarlo.get_nsing()
        
        Returns
        -------
        v2_proj : pyemu.Matrix
            the null-space projection matrix (V2V2^T)
        
        """
        if nsing is None:
            nsing = self.get_nsing()
        if nsing is None:
            raise Exception("nsing is None")
        print("using {0} singular components".format(nsing))
        self.log("forming null space projection matrix with " +\
                 "{0} of {1} singular components".format(nsing,self.jco.shape[1]))

        v2_proj = (self.xtqx.v[:, nsing:] * self.xtqx.v[:, nsing:].T)
        self.log("forming null space projection matrix with " +\
                 "{0} of {1} singular components".format(nsing,self.jco.shape[1]))

        return v2_proj

    def draw(self,
             num_reals=1,
             par_file=None,
             obs=False,
             enforce_bounds=None,
             cov=None,
             how="gaussian"):
        """draw stochastic realizations of parameters and
           optionally observations, filling MonteCarlo.parensemble and
           optionally MonteCarlo.obsensemble.

        Parameters
        ----------
        num_reals : int
            number of realization to generate
        par_file : str
            parameter file to use as mean values. If None,
            use MonteCarlo.pst.parameter_data.parval1.
            Default is None
        obs : bool
            add a realization of measurement noise to observation values,
            forming MonteCarlo.obsensemble.Default is False
        enforce_bounds : str
            enforce parameter bounds based on control file information.
            options are 'reset', 'drop' or None.  Default is None
        how : str
            type of distribution to draw from. Must be in ["gaussian","uniform"]
            default is "gaussian".

        Example
        -------
        ``>>>import pyemu``

        ``>>>mc = pyemu.MonteCarlo(pst="pest.pst")``

        ``>>>mc.draw(1000)``

        """
        if par_file is not None:
            self.pst.parrep(par_file)
        how = how.lower().strip()
        assert how in ["gaussian", "uniform"]

        if cov is not None:
            assert isinstance(cov, Cov)
            if how == "uniform":
                raise Exception("MonteCarlo.draw() error: 'how'='uniform'," +\
                                " 'cov' arg cannot be passed")
        else:
            cov = self.parcov

        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)
        self.log("generating {0:d} parameter realizations".format(num_reals))
        self.parensemble.draw(cov,
                              num_reals=num_reals,
                              how=how,
                              enforce_bounds=enforce_bounds)
        #if enforce_bounds:
        #    self.parensemble.enforce()
        self.log("generating {0:d} parameter realizations".format(num_reals))
        if obs:
            self.log(
                "generating {0:d} observation realizations".format(num_reals))
            self.obsensemble.draw(self.obscov, num_reals=num_reals)
            self.log(
                "generating {0:d} observation realizations".format(num_reals))

    def project_parensemble(self,
                            par_file=None,
                            nsing=None,
                            inplace=True,
                            enforce_bounds='reset'):
        """ perform the null-space projection operations for null-space monte carlo

        Parameters
        ----------
        par_file: str
            an optional file of parameter values to use
        nsing: int
            number of singular values to in forming null subspace matrix
        inplace: bool
            overwrite the existing parameter ensemble with the
            projected values
        enforce_bounds: str
            how to enforce parameter bounds.  can be None, 'reset', or 'drop'.
            Default is None

        Returns
        -------
        par_en : pyemu.ParameterEnsemble
            if inplace is False, otherwise None

        Note
        ----
        to use this method, the MonteCarlo instance must have been constructed
        with the ``jco`` argument.

        Example
        -------
        ``>>>import pyemu``

        ``>>>mc = pyemu.MonteCarlo(jco="pest.jcb")``

        ``>>>mc.draw(1000)``

        ``>>>mc.project_parensemble(par_file="final.par",nsing=100)``

        """
        assert self.jco is not None,"MonteCarlo.project_parensemble()" +\
                                    "requires a jacobian attribute"
        if par_file is not None:
            assert os.path.exists(par_file),"monte_carlo.draw() error: par_file not found:" +\
                par_file
            self.parensemble.pst.parrep(par_file)

        # project the ensemble
        self.log("projecting parameter ensemble")
        en = self.parensemble.project(self.get_null_proj(nsing),
                                      inplace=inplace,
                                      log=self.log)
        self.log("projecting parameter ensemble")
        return en

    def write_psts(self, prefix, existing_jco=None, noptmax=None):
        """ write parameter and optionally observation realizations
            to a series of pest control files

        Parameters
        ----------
        prefix: str
            pest control file prefix

        existing_jco: str
            filename of an existing jacobian matrix to add to the
            pest++ options in the control file.  This is useful for
            NSMC since this jco can be used to get the first set of
            parameter upgrades for free!  Needs to be the path the jco
            file as seen from the location where pest++ will be run

        noptmax: int
            value of NOPTMAX to set in new pest control files

        Example
        -------
        ``>>>import pyemu``

        ``>>>mc = pyemu.MonteCarlo(jco="pest.jcb")``

        ``>>>mc.draw(1000, obs=True)``

        ``>>>mc.write_psts("mc_", existing_jco="pest.jcb", noptmax=1)``

        """
        self.log("writing realized pest control files")
        # get a copy of the pest control file
        pst = self.pst.get(par_names=self.pst.par_names,
                           obs_names=self.pst.obs_names)

        if noptmax is not None:
            pst.control_data.noptmax = noptmax
            pst.control_data.noptmax = noptmax

        if existing_jco is not None:
            pst.pestpp_options["BASE_JACOBIAN"] = existing_jco

        # set the indices
        pst.parameter_data.index = pst.parameter_data.parnme
        pst.observation_data.index = pst.observation_data.obsnme

        if self.parensemble.istransformed:
            par_en = self.parensemble._back_transform(inplace=False)
        else:
            par_en = self.parensemble

        for i in range(self.num_reals):
            pst_name = prefix + "{0:d}.pst".format(i)
            self.log("writing realized pest control file " + pst_name)
            pst.parameter_data.loc[par_en.columns,
                                   "parval1"] = par_en.iloc[i, :].T

            # reset the regularization
            #if pst.control_data.pestmode == "regularization":
            #pst.zero_order_tikhonov(parbounds=True)
            #zero_order_tikhonov(pst,parbounds=True)
            # add the obs noise realization if needed
            if self.obsensemble.shape[0] == self.num_reals:
                pst.observation_data.loc[self.obsensemble.columns,"obsval"] = \
                    self.obsensemble.iloc[i, :].T

            # write
            pst.write(pst_name)
            self.log("writing realized pest control file " + pst_name)
        self.log("writing realized pest control files")
コード例 #8
0
class EnsembleSmoother():
    def __init__(self, pst, parcov=None, obscov=None):
        assert isinstance(pst, Pst)
        self.pst = pst
        if parcov is not None:
            assert isinstance(parcov, Cov)
        else:
            parcov = Cov.from_parameter_data(self.pst)
        if obscov is not None:
            assert isinstance(obscov, Cov)
        else:
            obscov = Cov.from_observation_data(pst)

        self.parcov = parcov
        self.obscov = obscov

        self.__initialized = False
        self.num_reals = 0
        self.half_parcov_diag = None
        self.half_obscov_diag = None
        self.delta_par_prior = None
        self.iter_num = 0

    def initialize(self, num_reals):
        '''
        (re)initialize the process
        '''
        self.num_reals = int(num_reals)
        self.parensemble = ParameterEnsemble(self.pst)
        self.parensemble.draw(cov=self.parcov, num_reals=num_reals)

        self.obsensemble_0 = ObservationEnsemble(self.pst)
        self.obsensemble_0.draw(cov=self.obscov, num_reals=num_reals)
        self.obsensemble = self.obsensemble_0.copy()

        if self.parcov.isdiagonal:
            self.half_parcov_diag = self.parcov.inv.sqrt
        else:
            self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
                                        names=self.parcov.col_names,
                                        isdiagonal=True).inv.sqrt
        #if self.obscov.isdiagonal:
        #self.half_obscov_inv = self.obscov.inv.sqrt

    # else:
    #    self.half_obscov_diag = Cov(x=np.diag(self.obscov.x),
    #                                names=self.obscov.col_names,
    #                                isdiagonal=True)

        self.delta_par_prior = self._calc_delta_par()

        self.__initialized = True

    def _calc_delta_par(self):
        '''
        calc the scaled parameter ensemble differences from the mean
        '''
        mean = np.array(self.parensemble.mean(axis=0))
        delta = self.parensemble.as_pyemu_matrix()
        for i in range(self.num_reals):
            delta.x[i, :] -= mean
        #delta = Matrix(x=(self.half_parcov_diag * delta.transpose()).x,
        #               row_names=self.parensemble.columns)
        delta = self.half_parcov_diag * delta.T
        return delta * (1.0 / np.sqrt(float(self.num_reals - 1.0)))

    def _calc_delta_obs(self):
        '''
        calc the scaled observation ensemble differences from the mean
        '''

        mean = np.array(self.obsensemble.mean(axis=0))
        delta = self.obsensemble.as_pyemu_matrix()
        for i in range(self.num_reals):
            delta.x[i, :] -= mean
        delta = self.obscov.inv.sqrt * delta.T
        return delta * (1.0 / np.sqrt(float(self.num_reals - 1.0)))

    def _calc_obs(self):
        '''
        propagate the ensemble forward...
        '''
        self.parensemble.to_csv(os.path.join("smoother", "sweep_in.csv"))
        os.chdir("smoother")
        print(os.listdir('.'))
        os.system("sweep freyberg.pst")
        os.chdir('..')
        obs = ObservationEnsemble.from_csv(os.path.join(\
                "smoother",'sweep_out.csv'))
        obs.columns = [item.lower() for item in obs.columns]
        self.obsensemble = ObservationEnsemble.from_dataframe(
            df=obs.loc[:, self.obscov.row_names], pst=self.pst)
        #todo: modifiy sweep to be interactive...
        return

    @property
    def current_lambda(self):
        return 10.0

    def update(self):
        if not self.__initialized:
            raise Exception("must call initialize() before update()")
        self._calc_obs()
        delta_obs = self._calc_delta_obs()
        u, s, v = delta_obs.pseudo_inv_components()
        #print(v)
        #print(s)
        #print(v)
        diff = self.obsensemble.as_pyemu_matrix(
        ) - self.obsensemble_0.as_pyemu_matrix()
        #print(diff)
        x1 = u.T * self.obscov.inv.sqrt * diff.T
        x1.autoalign = False
        #print(x1)
        x2 = (Cov.identity_like(s) + s**2).inv * x1
        #print(x2)
        x3 = v * s * x2
        #print(x3)
        upgrade_1 = (self.half_parcov_diag * self._calc_delta_par() *
                     x3).to_dataframe()
        upgrade_1.index.name = "parnme"
        print(upgrade_1)
        self.parensemble += upgrade_1.T
        print(self.parensemble)
        if self.iter_num > 0:
            raise NotImplementedError()

        print(upgrade_1.shape)
コード例 #9
0
ファイル: mc.py プロジェクト: aleaf/pyemu
    def draw(self, num_reals=1, par_file = None, obs=False,
             enforce_bounds=None, cov=None, how="gaussian"):
        """draw stochastic realizations of parameters and
           optionally observations, filling MonteCarlo.parensemble and
           optionally MonteCarlo.obsensemble.

        Parameters
        ----------
        num_reals : int
            number of realization to generate
        par_file : str
            parameter file to use as mean values. If None,
            use MonteCarlo.pst.parameter_data.parval1.
            Default is None
        obs : bool
            add a realization of measurement noise to observation values,
            forming MonteCarlo.obsensemble.Default is False
        enforce_bounds : str
            enforce parameter bounds based on control file information.
            options are 'reset', 'drop' or None.  Default is None
        how : str
            type of distribution to draw from. Must be in ["gaussian","uniform"]
            default is "gaussian".

        Example
        -------
        ``>>>import pyemu``

        ``>>>mc = pyemu.MonteCarlo(pst="pest.pst")``

        ``>>>mc.draw(1000)``

        """
        if par_file is not None:
            self.pst.parrep(par_file)
        how = how.lower().strip()
        assert how in ["gaussian","uniform"]

        if cov is not None:
            assert isinstance(cov,Cov)
            if how == "uniform":
                raise Exception("MonteCarlo.draw() error: 'how'='uniform'," +\
                                " 'cov' arg cannot be passed")
        else:
            cov = self.parcov

        self.log("generating {0:d} parameter realizations".format(num_reals))

        if how == "gaussian":
            self.parensemble = ParameterEnsemble.from_gaussian_draw(pst=self.pst,cov=cov,
                                                                    num_reals=num_reals,
                                                                    use_homegrown=True)

        elif how == "uniform":
            self.parensemble = ParameterEnsemble.from_uniform_draw(pst=self.pst,num_reals=num_reals)

        else:
            raise Exception("MonteCarlo.draw(): unrecognized 'how' arg: {0}".format(how))

        #self.parensemble = ParameterEnsemble(pst=self.pst)
        #self.obsensemble = ObservationEnsemble(pst=self.pst)
        #self.parensemble.draw(cov,num_reals=num_reals, how=how,
        #                      enforce_bounds=enforce_bounds)
        if enforce_bounds is not  None:
            self.parensemble.enforce(enforce_bounds)
        self.log("generating {0:d} parameter realizations".format(num_reals))

        if obs:
            self.log("generating {0:d} observation realizations".format(num_reals))
            self.obsensemble = ObservationEnsemble.from_id_gaussian_draw(pst=self.pst,num_reals=num_reals)
            self.log("generating {0:d} observation realizations".format(num_reals))
コード例 #10
0
ファイル: smoother.py プロジェクト: xuexianwu/pyemu
class EnsembleSmoother():
    def __init__(self,
                 pst,
                 parcov=None,
                 obscov=None,
                 num_slaves=0,
                 use_approx_prior=True,
                 submit_file=None,
                 verbose=False,
                 port=4004,
                 slave_dir="template"):
        self.logger = Logger(verbose)
        if verbose is not False:
            self.logger.echo = True
        self.num_slaves = int(num_slaves)
        if submit_file is not None:
            if not os.path.exists(submit_file):
                self.logger.lraise(
                    "submit_file {0} not found".format(submit_file))
        elif num_slaves > 0:
            if not os.path.exists(slave_dir):
                self.logger.lraise(
                    "template dir {0} not found".format(slave_dir))

        self.slave_dir = slave_dir
        self.submit_file = submit_file
        self.port = int(port)
        self.use_approx_prior = bool(use_approx_prior)
        self.paren_prefix = ".parensemble.{0:04d}.csv"
        self.obsen_prefix = ".obsensemble.{0:04d}.csv"

        if isinstance(pst, str):
            pst = Pst(pst)
        assert isinstance(pst, Pst)
        self.pst = pst
        self.sweep_in_csv = pst.pestpp_options.get("sweep_parameter_csv_file",
                                                   "sweep_in.csv")
        self.sweep_out_csv = pst.pestpp_options.get("sweep_output_csv_file",
                                                    "sweep_out.csv")
        if parcov is not None:
            assert isinstance(parcov, Cov)
        else:
            parcov = Cov.from_parameter_data(self.pst)
        if obscov is not None:
            assert isinstance(obscov, Cov)
        else:
            obscov = Cov.from_observation_data(pst)

        self.parcov = parcov
        self.obscov = obscov

        # if restart_iter > 0:
        #     self.restart_iter = restart_iter
        #     paren = self.pst.filename+self.paren_prefix.format(restart_iter)
        #     assert os.path.exists(paren),\
        #         "could not find restart par ensemble {0}".format(paren)
        #     obsen0 = self.pst.filename+self.obsen_prefix.format(0)
        #     assert os.path.exists(obsen0),\
        #         "could not find restart obs ensemble 0 {0}".format(obsen0)
        #     obsen = self.pst.filename+self.obsen_prefix.format(restart_iter)
        #     assert os.path.exists(obsen),\
        #         "could not find restart obs ensemble {0}".format(obsen)
        #     self.restart = True

        self.__initialized = False
        #self.num_reals = 0
        self.half_parcov_diag = None
        self.half_obscov_diag = None
        self.delta_par_prior = None
        self.iter_num = 0
        #self.enforce_bounds = None
        self.raw_sweep_out = None

    @property
    def current_phi(self):
        return pd.DataFrame(data={"phi":self._calc_phi_vec(self.obsensemble)},\
                            index=self.obsensemble.index)

    def initialize(self,
                   num_reals=1,
                   init_lambda=None,
                   enforce_bounds="reset",
                   parensemble=None,
                   obsensemble=None,
                   restart_obsensemble=None):
        '''
        (re)initialize the process
        '''
        # initialize the phi report csv
        self.enforce_bounds = enforce_bounds
        self.phi_csv = open(self.pst.filename + ".iobj.csv", 'w')
        self.phi_csv.write(
            "iter_num,total_runs,lambda,min,max,mean,median,std,")
        self.phi_csv.write(','.join(["{0:010d}".\
                                    format(i+1) for i in range(num_reals)]))
        self.phi_csv.write('\n')
        self.total_runs = 0
        # this matrix gets used a lot, so only calc once and store
        self.obscov_inv_sqrt = self.obscov.get(self.pst.nnz_obs_names).inv.sqrt

        if parensemble is not None and obsensemble is not None:
            self.logger.log("initializing with existing ensembles")
            if isinstance(parensemble, str):
                self.logger.log("loading parensemble from file")
                if not os.path.exists(obsensemble):
                    self.logger.lraise("can not find parensemble file: {0}".\
                                       format(parensemble))
                df = pd.read_csv(parensemble, index_col=0)
                #df.index = [str(i) for i in df.index]
                self.parensemble_0 = ParameterEnsemble.from_dataframe(
                    df=df, pst=self.pst)
                self.logger.log("loading parensemble from file")

            elif isinstance(parensemble, ParameterEnsemble):
                self.parensemble_0 = parensemble.copy()
            else:
                raise Exception("unrecognized arg type for parensemble, " +\
                                "should be filename or ParameterEnsemble" +\
                                ", not {0}".format(type(parensemble)))
            self.parensemble = self.parensemble_0.copy()
            if isinstance(obsensemble, str):
                self.logger.log("loading obsensemble from file")
                if not os.path.exists(obsensemble):
                    self.logger.lraise("can not find obsensemble file: {0}".\
                                       format(obsensemble))
                df = pd.read_csv(obsensemble,
                                 index_col=0).loc[:, self.pst.nnz_obs_names]
                #df.index = [str(i) for i in df.index]
                self.obsensemble_0 = ObservationEnsemble.from_dataframe(
                    df=df, pst=self.pst)
                self.logger.log("loading obsensemble from file")

            elif isinstance(obsensemble, ObservationEnsemble):
                self.obsensemble_0 = obsensemble.copy()
            else:
                raise Exception("unrecognized arg type for obsensemble, " +\
                                "should be filename or ObservationEnsemble" +\
                                ", not {0}".format(type(obsensemble)))

            assert self.parensemble_0.shape[0] == self.obsensemble_0.shape[0]
            #self.num_reals = self.parensemble_0.shape[0]
            self.logger.log("initializing with existing ensembles")

        else:
            self.logger.log(
                "initializing smoother with {0} realizations".format(
                    num_reals))
            #self.num_reals = int(num_reals)
            #assert self.num_reals > 1
            self.logger.log("initializing parensemble")
            self.parensemble_0 = ParameterEnsemble(self.pst)
            self.parensemble_0.draw(cov=self.parcov, num_reals=num_reals)
            self.parensemble_0.enforce(enforce_bounds=enforce_bounds)
            self.logger.log("initializing parensemble")
            self.parensemble = self.parensemble_0.copy()
            self.parensemble_0.to_csv(self.pst.filename +\
                                      self.paren_prefix.format(0))
            self.logger.log("initializing parensemble")
            self.logger.log("initializing obsensemble")
            self.obsensemble_0 = ObservationEnsemble(self.pst)
            self.obsensemble_0.draw(cov=self.obscov, num_reals=num_reals)
            #self.obsensemble = self.obsensemble_0.copy()

            # save the base obsensemble
            self.obsensemble_0.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(-1))
            self.logger.log("initializing obsensemble")
            self.logger.log(
                "initializing smoother with {0} realizations".format(
                    num_reals))

        self.obs0_matrix = self.obsensemble_0.nonzero.as_pyemu_matrix()
        self.enforce_bounds = enforce_bounds

        if restart_obsensemble is not None:
            self.logger.log(
                "loading restart_obsensemble {0}".format(restart_obsensemble))
            failed_runs, self.obsensemble = self._load_obs_ensemble(
                restart_obsensemble)
            assert self.obsensemble.shape[0] == self.obsensemble_0.shape[0]
            assert list(self.obsensemble.columns) == list(
                self.obsensemble_0.columns)
            self.logger.log(
                "loading restart_obsensemble {0}".format(restart_obsensemble))

        else:
            # run the initial parameter ensemble
            self.logger.log("evaluating initial ensembles")
            failed_runs, self.obsensemble = self._calc_obs(self.parensemble)
            self.obsensemble.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(0))
            self.logger.log("evaluating initial ensembles")

        if failed_runs is not None:
            self.logger.warn("dropping failed realizations")
            #failed_runs_str = [str(f) for f in failed_runs]
            self.parensemble = self.parensemble.drop(failed_runs)
            self.obsensemble = self.obsensemble.drop(failed_runs)
        self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
        self._phi_report(self.current_phi_vec, 0.0)

        self.last_best_mean = self.current_phi_vec.mean()
        self.last_best_std = self.current_phi_vec.std()
        self.logger.statement("initial phi (mean, std): {0:15.6G},{1:15.6G}".\
                              format(self.last_best_mean,self.last_best_std))
        if init_lambda is not None:
            self.current_lambda = float(init_lambda)
        else:
            #following chen and oliver
            x = self.last_best_mean / (2.0 * float(self.obsensemble.shape[1]))
            self.current_lambda = 10.0**(np.floor(np.log10(x)))

        # if using the approximate form of the algorithm, let
        # the parameter scaling matrix be the identity matrix
        # jwhite - dec 5 2016 - using the actual parcov inv
        # for upgrades seems to be pushing parameters around
        # too much.  for now, just not using it, maybe
        # better choices of lambda will tame it
        self.logger.statement("current lambda:{0:15.6g}".format(
            self.current_lambda))

        if self.use_approx_prior:
            self.logger.statement("using approximate parcov in solution")
            self.half_parcov_diag = 1.0
        else:
            #self.logger.statement("using full parcov in solution")
            # if self.parcov.isdiagonal:
            #     self.half_parcov_diag = self.parcov.sqrt.inv
            # else:
            #     self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
            #                                 names=self.parcov.col_names,
            #                                 isdiagonal=True).inv.sqrt
            self.half_parcov_diag = 1.0
        self.delta_par_prior = self._calc_delta_par(self.parensemble_0)
        u, s, v = self.delta_par_prior.pseudo_inv_components()
        self.Am = u * s.inv

        self.__initialized = True

    def get_localizer(self):
        onames = self.pst.nnz_obs_names
        pnames = self.pst.adj_par_names
        localizer = Matrix(x=np.ones((len(onames), len(pnames))),
                           row_names=onames,
                           col_names=pnames)
        return localizer

    def _calc_delta_par(self, parensemble):
        '''
        calc the scaled parameter ensemble differences from the mean
        '''
        return self._calc_delta(parensemble, self.half_parcov_diag)

    def _calc_delta_obs(self, obsensemble):
        '''
        calc the scaled observation ensemble differences from the mean
        '''
        return self._calc_delta(obsensemble.nonzero, self.obscov.inv.sqrt)

    def _calc_delta(self, ensemble, scaling_matrix):
        '''
        calc the scaled  ensemble differences from the mean
        '''
        mean = np.array(ensemble.mean(axis=0))
        delta = ensemble.as_pyemu_matrix()
        for i in range(ensemble.shape[0]):
            delta.x[i, :] -= mean
        delta = scaling_matrix * delta.T
        delta *= (1.0 / np.sqrt(float(ensemble.shape[0] - 1.0)))
        return delta

    def _calc_obs(self, parensemble):
        self.logger.log("removing existing sweep in/out files")
        try:
            os.remove(self.sweep_in_csv)
        except Exception as e:
            self.logger.warn(
                "error removing existing sweep in file:{0}".format(str(e)))
        try:
            os.remove(self.sweep_out_csv)
        except Exception as e:
            self.logger.warn(
                "error removing existing sweep out file:{0}".format(str(e)))
        self.logger.log("removing existing sweep in/out files")

        if parensemble.isnull().values.any():
            parensemble.to_csv("_nan.csv")
            self.logger.lraise(
                "_calc_obs() error: NaNs in parensemble (written to '_nan.csv')"
            )

        if self.submit_file is None:
            self._calc_obs_local(parensemble)
        else:
            self._calc_obs_condor(parensemble)

        # make a copy of sweep out for restart purposes
        # sweep_out = str(self.iter_num)+"_raw_"+self.sweep_out_csv
        # if os.path.exists(sweep_out):
        #     os.remove(sweep_out)
        # shutil.copy2(self.sweep_out_csv,sweep_out)

        self.logger.log("reading sweep out csv {0}".format(self.sweep_out_csv))
        failed_runs, obs = self._load_obs_ensemble(self.sweep_out_csv)
        self.logger.log("reading sweep out csv {0}".format(self.sweep_out_csv))
        self.total_runs += obs.shape[0]
        self.logger.statement("total runs:{0}".format(self.total_runs))
        return failed_runs, obs

    def _load_obs_ensemble(self, filename):
        if not os.path.exists(filename):
            self.logger.lraise(
                "obsensemble file {0} does not exists".format(filename))
        obs = pd.read_csv(filename)
        obs.columns = [item.lower() for item in obs.columns]
        self.raw_sweep_out = obs.copy(
        )  # save this for later to support restart
        assert "input_run_id" in obs.columns,\
            "'input_run_id' col missing...need newer version of sweep"
        obs.index = obs.input_run_id
        failed_runs = None
        if 1 in obs.failed_flag.values:
            failed_runs = obs.loc[obs.failed_flag == 1].index.values
            self.logger.warn("{0} runs failed (indices: {1})".\
                             format(len(failed_runs),','.join([str(f) for f in failed_runs])))
        obs = ObservationEnsemble.from_dataframe(
            df=obs.loc[:, self.obscov.row_names], pst=self.pst)
        if obs.isnull().values.any():
            self.logger.lraise("_calc_obs() error: NaNs in obsensemble")
        return failed_runs, obs

    def _get_master_thread(self):
        master_stdout = "_master_stdout.dat"
        master_stderr = "_master_stderr.dat"

        def master():
            try:
                #os.system("sweep {0} /h :{1} 1>{2} 2>{3}". \
                #          format(self.pst.filename, self.port, master_stdout, master_stderr))
                pyemu.helpers.run("sweep {0} /h :{1} 1>{2} 2>{3}". \
                          format(self.pst.filename, self.port, master_stdout, master_stderr))

            except Exception as e:
                self.logger.lraise("error starting condor master: {0}".format(
                    str(e)))
            with open(master_stderr, 'r') as f:
                err_lines = f.readlines()
            if len(err_lines) > 0:
                self.logger.warn("master stderr lines: {0}".format(','.join(
                    [l.strip() for l in err_lines])))

        master_thread = threading.Thread(target=master)
        master_thread.start()
        time.sleep(2.0)
        return master_thread

    def _calc_obs_condor(self, parensemble):
        self.logger.log("evaluating ensemble of size {0} with htcondor".\
                        format(parensemble.shape[0]))

        parensemble.to_csv(self.sweep_in_csv)
        master_thread = self._get_master_thread()
        condor_temp_file = "_condor_submit_stdout.dat"
        condor_err_file = "_condor_submit_stderr.dat"
        self.logger.log("calling condor_submit with submit file {0}".format(
            self.submit_file))
        try:
            os.system("condor_submit {0} 1>{1} 2>{2}".\
                      format(self.submit_file,condor_temp_file,condor_err_file))
        except Exception as e:
            self.logger.lraise("error in condor_submit: {0}".format(str(e)))
        self.logger.log("calling condor_submit with submit file {0}".format(
            self.submit_file))
        time.sleep(
            2.0)  #some time for condor to submit the job and echo to stdout
        condor_submit_string = "submitted to cluster"
        with open(condor_temp_file, 'r') as f:
            lines = f.readlines()
        self.logger.statement("condor_submit stdout: {0}".\
                              format(','.join([l.strip() for l in lines])))
        with open(condor_err_file, 'r') as f:
            err_lines = f.readlines()
        if len(err_lines) > 0:
            self.logger.warn("stderr from condor_submit:{0}".\
                             format([l.strip() for l in err_lines]))
        cluster_number = None
        for line in lines:
            if condor_submit_string in line.lower():
                cluster_number = int(
                    float(line.split(condor_submit_string)[-1]))
        if cluster_number is None:
            self.logger.lraise("couldn't find cluster number...")
        self.logger.statement("condor cluster: {0}".format(cluster_number))
        master_thread.join()
        self.logger.statement("condor master thread exited")
        self.logger.log(
            "calling condor_rm on cluster {0}".format(cluster_number))
        os.system("condor_rm cluster {0}".format(cluster_number))
        self.logger.log(
            "calling condor_rm on cluster {0}".format(cluster_number))
        self.logger.log("evaluating ensemble of size {0} with htcondor".\
                        format(parensemble.shape[0]))

    def _calc_obs_local(self, parensemble):
        '''
        propagate the ensemble forward using sweep.
        '''
        self.logger.log("evaluating ensemble of size {0} locally with sweep".\
                        format(parensemble.shape[0]))
        parensemble.to_csv(self.sweep_in_csv)
        if self.num_slaves > 0:
            master_thread = self._get_master_thread()
            pyemu.utils.start_slaves(self.slave_dir,
                                     "sweep",
                                     self.pst.filename,
                                     self.num_slaves,
                                     slave_root='..',
                                     port=self.port)
            master_thread.join()
        else:
            os.system("sweep {0}".format(self.pst.filename))

        self.logger.log("evaluating ensemble of size {0} locally with sweep".\
                        format(parensemble.shape[0]))

    def _calc_phi_vec(self, obsensemble):
        obs_diff = self._get_residual_matrix(obsensemble)
        #phi_vec = np.diagonal((obs_diff * self.obscov_inv_sqrt.get(row_names=obs_diff.col_names,
        #                                                           col_names=obs_diff.col_names) * obs_diff.T).x)
        q = np.diagonal(
            self.obscov_inv_sqrt.get(row_names=obs_diff.col_names,
                                     col_names=obs_diff.col_names).x)
        phi_vec = []
        for i in range(obs_diff.shape[0]):
            o = obs_diff.x[i, :]
            phi_vec.append(((obs_diff.x[i, :] * q)**2).sum())
        return np.array(phi_vec)

    def _phi_report(self, phi_vec, cur_lam):
        self.phi_csv.write("{0},{1},{2},{3},{4},{5},{6}".format(
            self.iter_num, self.total_runs, cur_lam, phi_vec.min(),
            phi_vec.max(), phi_vec.mean(), np.median(phi_vec), phi_vec.std()))
        self.phi_csv.write(",".join(
            ["{0:20.8}".format(phi) for phi in phi_vec]))
        self.phi_csv.write("\n")
        self.phi_csv.flush()

    def _get_residual_matrix(self, obsensemble):
        obs_matrix = obsensemble.nonzero.as_pyemu_matrix()
        return obs_matrix - self.obs0_matrix.get(
            col_names=obs_matrix.col_names, row_names=obs_matrix.row_names)

    def update(self,
               lambda_mults=[1.0],
               localizer=None,
               run_subset=None,
               use_approx=True):

        if run_subset is not None:
            if run_subset >= self.obsensemble.shape[0]:
                self.logger.warn("run_subset ({0}) >= num of active reals ({1})...ignoring ".\
                                 format(run_subset,self.obsensemble.shape[0]))
                run_subset = None

        self.iter_num += 1
        self.logger.log("iteration {0}".format(self.iter_num))
        self.logger.statement("{0} active realizations".format(
            self.obsensemble.shape[0]))
        if self.obsensemble.shape[0] < 2:
            self.logger.lraise(
                "at least active 2 realizations (really like 300) are needed to update"
            )
        if not self.__initialized:
            #raise Exception("must call initialize() before update()")
            self.logger.lraise("must call initialize() before update()")

        self.logger.log("calculate scaled delta obs")
        scaled_delta_obs = self._calc_delta_obs(self.obsensemble)
        self.logger.log("calculate scaled delta obs")
        self.logger.log("calculate scaled delta par")
        scaled_delta_par = self._calc_delta_par(self.parensemble)
        self.logger.log("calculate scaled delta par")

        self.logger.log("calculate pseudo inv comps")
        u, s, v = scaled_delta_obs.pseudo_inv_components()
        self.logger.log("calculate pseudo inv comps")

        self.logger.log("calculate obs diff matrix")
        obs_diff = self.obscov_inv_sqrt * self._get_residual_matrix(
            self.obsensemble).T
        self.logger.log("calculate obs diff matrix")

        # here is the math part...calculate upgrade matrices
        mean_lam, std_lam, paren_lam, obsen_lam = [], [], [], []
        lam_vals = []
        for ilam, cur_lam_mult in enumerate(lambda_mults):

            parensemble_cur_lam = self.parensemble.copy()
            #print(parensemble_cur_lam.isnull().values.any())

            cur_lam = self.current_lambda * cur_lam_mult
            lam_vals.append(cur_lam)
            self.logger.log("calcs for  lambda {0}".format(cur_lam_mult))
            scaled_ident = Cov.identity_like(s) * (cur_lam + 1.0)
            scaled_ident += s**2
            scaled_ident = scaled_ident.inv

            # build up this matrix as a single element so we can apply
            # localization
            self.logger.log("building upgrade_1 matrix")
            upgrade_1 = -1.0 * (self.half_parcov_diag * scaled_delta_par) *\
                        v * s * scaled_ident * u.T
            self.logger.log("building upgrade_1 matrix")

            # apply localization
            if localizer is not None:
                self.logger.log("applying localization")
                upgrade_1.hadamard_product(localizer)
                self.logger.log("applying localization")

            # apply residual information
            self.logger.log("applying residuals")
            upgrade_1 *= obs_diff
            self.logger.log("applying residuals")

            self.logger.log("processing upgrade_1")
            upgrade_1 = upgrade_1.to_dataframe()
            upgrade_1.index.name = "parnme"
            upgrade_1 = upgrade_1.T
            upgrade_1.index = [int(i) for i in upgrade_1.index]
            upgrade_1.to_csv(self.pst.filename+".upgrade_1.{0:04d}.csv".\
                               format(self.iter_num))
            if upgrade_1.isnull().values.any():
                self.logger.lraise("NaNs in upgrade_1")
            self.logger.log("processing upgrade_1")

            #print(upgrade_1.isnull().values.any())
            #print(parensemble_cur_lam.index)
            #print(upgrade_1.index)
            parensemble_cur_lam += upgrade_1

            # parameter-based upgrade portion
            if not use_approx and self.iter_num > 1:
                self.logger.log("building upgrade_2 matrix")
                par_diff = (self.parensemble - self.parensemble_0.loc[self.parensemble.index,:]).\
                    as_pyemu_matrix().T
                x4 = self.Am.T * self.half_parcov_diag * par_diff
                x5 = self.Am * x4
                x6 = scaled_delta_par.T * x5
                x7 = v * scaled_ident * v.T * x6
                upgrade_2 = -1.0 * (self.half_parcov_diag * scaled_delta_par *
                                    x7).to_dataframe()
                upgrade_2.index.name = "parnme"
                upgrade_2 = upgrade_2.T
                upgrade_2.to_csv(self.pst.filename+".upgrade_2.{0:04d}.csv".\
                                   format(self.iter_num))
                upgrade_2.index = [int(i) for i in upgrade_2.index]

                if upgrade_2.isnull().values.any():
                    self.logger.lraise("NaNs in upgrade_2")

                parensemble_cur_lam += upgrade_2
                self.logger.log("building upgrade_2 matrix")
            parensemble_cur_lam.enforce(self.enforce_bounds)

            # this is for testing failed runs on upgrade testing
            # works with the 10par_xsec smoother test
            #parensemble_cur_lam.iloc[:,:] = -1000000.0

            paren_lam.append(pd.DataFrame(parensemble_cur_lam.loc[:, :]))
            self.logger.log("calcs for  lambda {0}".format(cur_lam_mult))

        # subset if needed
        # and combine lambda par ensembles into one par ensemble for evaluation
        if run_subset is not None and run_subset < self.parensemble.shape[0]:
            #subset_idx = ["{0:d}".format(i) for i in np.random.randint(0,self.parensemble.shape[0]-1,run_subset)]
            subset_idx = self.parensemble.iloc[:run_subset, :].index.values
            self.logger.statement("subset idxs: " +
                                  ','.join([str(s) for s in subset_idx]))
            paren_lam_subset = [pe.loc[subset_idx, :] for pe in paren_lam]
            paren_combine = pd.concat(paren_lam_subset, ignore_index=True)
            paren_lam_subset = None
        else:
            subset_idx = self.parensemble.index.values
            paren_combine = pd.concat(paren_lam, ignore_index=True)


        self.logger.log("evaluating ensembles for lambdas : {0}".\
                        format(','.join(["{0:8.3E}".format(l) for l in lam_vals])))
        failed_runs, obsen_combine = self._calc_obs(paren_combine)
        #if failed_runs is not None:
        #    obsen_combine.loc[failed_runs,:] = np.NaN
        self.logger.log("evaluating ensembles for lambdas : {0}".\
                        format(','.join(["{0:8.3E}".format(l) for l in lam_vals])))
        paren_combine = None

        if failed_runs is not None and len(
                failed_runs) == obsen_combine.shape[0]:
            self.logger.lraise("all runs failed - cannot continue")

        # unpack lambda obs ensembles from combined obs ensemble
        nrun_per_lam = self.obsensemble.shape[0]
        if run_subset is not None:
            nrun_per_lam = run_subset
        obsen_lam = []
        for i in range(len(lam_vals)):
            sidx = i * nrun_per_lam
            eidx = sidx + nrun_per_lam
            oe = ObservationEnsemble.from_dataframe(
                df=obsen_combine.iloc[sidx:eidx, :].copy(), pst=self.pst)
            oe.index = subset_idx
            # check for failed runs in this set - drop failed runs from obs ensembles
            if failed_runs is not None:
                failed_runs_this = np.array(
                    [f for f in failed_runs if f >= sidx and f < eidx]) - sidx
                if len(failed_runs_this) > 0:
                    if len(failed_runs_this) == oe.shape[0]:
                        self.logger.warn(
                            "all runs failed for lambda {0}".format(
                                lam_vals[i]))
                    else:
                        self.logger.warn("{0} run failed for lambda {1}".\
                                         format(len(failed_runs_this),lam_vals[i]))
                    oe.iloc[failed_runs_this, :] = np.NaN
                    oe = oe.dropna()
            obsen_lam.append(oe)
        obsen_combine = None

        # here is where we need to select out the "best" lambda par and obs
        # ensembles
        self.logger.statement("\n**************************")
        self.logger.statement(str(datetime.now()))
        self.logger.statement("total runs:{0}".format(self.total_runs))
        self.logger.statement("iteration: {0}".format(self.iter_num))
        self.logger.statement("current lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                              format(self.current_lambda,
                         self.last_best_mean,self.last_best_std))
        phi_vecs = [self._calc_phi_vec(obsen) for obsen in obsen_lam]
        mean_std = [(pv.mean(), pv.std()) for pv in phi_vecs]
        update_pars = False
        update_lambda = False
        # accept a new best if its within 10%
        best_mean = self.last_best_mean * 1.1
        best_std = self.last_best_std * 1.1
        best_i = 0
        for i, (m, s) in enumerate(mean_std):
            self.logger.statement(" tested lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                                 format(self.current_lambda * lambda_mults[i],m,s))
            if m < best_mean:
                update_pars = True
                best_mean = m
                best_i = i
                if s < best_std:
                    update_lambda = True
                    best_std = s
        if np.isnan(best_mean):
            self.logger.lraise("best mean = NaN")
        if np.isnan(best_std):
            self.logger.lraise("best std = NaN")

        if not update_pars:
            self.current_lambda *= max(lambda_mults) * 10.0
            self.current_lambda = min(self.current_lambda, 100000)
            self.logger.statement("not accepting iteration, increased lambda:{0}".\
                  format(self.current_lambda))
        else:
            self.parensemble = ParameterEnsemble.from_dataframe(
                df=paren_lam[best_i], pst=self.pst)
            if run_subset is not None:
                failed_runs, self.obsensemble = self._calc_obs(
                    self.parensemble)
                if failed_runs is not None:
                    self.logger.warn("dropping failed realizations")
                    self.parensemble = self.parensemble.drop(failed_runs)
                    self.obsensemble = self.obsensemble.drop(failed_runs)
                self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
                self._phi_report(self.current_phi_vec,
                                 self.current_lambda * lambda_mults[best_i])
                best_mean = self.current_phi_vec.mean()
                best_std = self.current_phi_vec.std()
            else:
                self.obsensemble = obsen_lam[best_i]
                # reindex parensemble in case failed runs
                self.parensemble = ParameterEnsemble.from_dataframe(
                    df=self.parensemble.loc[self.obsensemble.index],
                    pst=self.pst)
                self._phi_report(phi_vecs[best_i],
                                 self.current_lambda * lambda_mults[best_i])
                self.current_phi_vec = phi_vecs[best_i]

            self.logger.statement("   best lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda*lambda_mults[best_i],
                         best_mean,best_std))
            self.last_best_mean = best_mean
            self.last_best_std = best_std

        if update_lambda:
            # be aggressive
            self.current_lambda *= (lambda_mults[best_i] * 0.75)
            # but don't let lambda get too small
            self.current_lambda = max(self.current_lambda, 0.001)
            self.logger.statement("updating lambda: {0:15.6G}".\
                  format(self.current_lambda ))

        self.logger.statement("**************************\n")
        self.parensemble.to_csv(self.pst.filename+self.paren_prefix.\
                                    format(self.iter_num))
        self.obsensemble.to_csv(self.pst.filename+self.obsen_prefix.\
                                    format(self.iter_num))
        if self.raw_sweep_out is not None:
            self.raw_sweep_out.to_csv(self.pst.filename+"_raw{0}".\
                                        format(self.iter_num))
        self.logger.log("iteration {0}".format(self.iter_num))
コード例 #11
0
ファイル: smoother.py プロジェクト: jroth-usgs/pyemu
class EnsembleSmoother():

    def __init__(self,pst,parcov=None,obscov=None):
        assert isinstance(pst,Pst)
        self.pst = pst
        if parcov is not None:
            assert isinstance(parcov,Cov)
        else:
            parcov = Cov.from_parameter_data(self.pst)
        if obscov is not None:
            assert isinstance(obscov,Cov)
        else:
            obscov = Cov.from_observation_data(pst)

        self.parcov = parcov
        self.obscov = obscov

        self.__initialized = False
        self.num_reals = 0
        self.half_parcov_diag = None
        self.half_obscov_diag = None
        self.delta_par_prior = None
        self.iter_num = 0


    def initialize(self,num_reals):
        '''
        (re)initialize the process
        '''
        self.num_reals = int(num_reals)
        self.parensemble = ParameterEnsemble(self.pst)
        self.parensemble.draw(cov=self.parcov,num_reals=num_reals)

        self.obsensemble_0 = ObservationEnsemble(self.pst)
        self.obsensemble_0.draw(cov=self.obscov,num_reals=num_reals)
        self.obsensemble = self.obsensemble_0.copy()

        if self.parcov.isdiagonal:
            self.half_parcov_diag = self.parcov.inv.sqrt
        else:
            self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
                                        names=self.parcov.col_names,
                                        isdiagonal=True).inv.sqrt
        #if self.obscov.isdiagonal:
        #self.half_obscov_inv = self.obscov.inv.sqrt
       # else:
        #    self.half_obscov_diag = Cov(x=np.diag(self.obscov.x),
        #                                names=self.obscov.col_names,
        #                                isdiagonal=True)

        self.delta_par_prior = self._calc_delta_par()

        self.__initialized = True

    def _calc_delta_par(self):
        '''
        calc the scaled parameter ensemble differences from the mean
        '''
        mean = np.array(self.parensemble.mean(axis=0))
        delta = self.parensemble.as_pyemu_matrix()
        for i in range(self.num_reals):
            delta.x[i,:] -= mean
        #delta = Matrix(x=(self.half_parcov_diag * delta.transpose()).x,
        #               row_names=self.parensemble.columns)
        delta = self.half_parcov_diag * delta.T
        return delta * (1.0 / np.sqrt(float(self.num_reals - 1.0)))

    def _calc_delta_obs(self):
        '''
        calc the scaled observation ensemble differences from the mean
        '''

        mean = np.array(self.obsensemble.mean(axis=0))
        delta = self.obsensemble.as_pyemu_matrix()
        for i in range(self.num_reals):
            delta.x[i,:] -= mean
        delta = self.obscov.inv.sqrt * delta.T
        return delta * (1.0 / np.sqrt(float(self.num_reals - 1.0)))


    def _calc_obs(self):
        '''
        propagate the ensemble forward...
        '''
        self.parensemble.to_csv(os.path.join("smoother","sweep_in.csv"))
        os.chdir("smoother")
        print(os.listdir('.'))
        os.system("sweep freyberg.pst")
        os.chdir('..')
        obs = ObservationEnsemble.from_csv(os.path.join(\
                "smoother",'sweep_out.csv'))
        obs.columns = [item.lower() for item in obs.columns]
        self.obsensemble = ObservationEnsemble.from_dataframe(df=obs.loc[:,self.obscov.row_names],pst=self.pst)
        #todo: modifiy sweep to be interactive...
        return

    @property
    def current_lambda(self):
        return 10.0

    def update(self):
        if not self.__initialized:
            raise Exception("must call initialize() before update()")
        self._calc_obs()
        delta_obs = self._calc_delta_obs()
        u,s,v = delta_obs.pseudo_inv_components()
        #print(v)
        #print(s)
        #print(v)
        diff = self.obsensemble.as_pyemu_matrix() - self.obsensemble_0.as_pyemu_matrix()
        #print(diff)
        x1 = u.T * self.obscov.inv.sqrt * diff.T
        x1.autoalign = False
        #print(x1)
        x2 = (Cov.identity_like(s) + s**2).inv * x1
        #print(x2)
        x3 = v * s * x2
        #print(x3)
        upgrade_1 = (self.half_parcov_diag * self._calc_delta_par() * x3).to_dataframe()
        upgrade_1.index.name = "parnme"
        print(upgrade_1)
        self.parensemble += upgrade_1.T
        print(self.parensemble)
        if self.iter_num > 0:
            raise NotImplementedError()

        print(upgrade_1.shape)
コード例 #12
0
ファイル: smoother.py プロジェクト: wkitlasten/pyemu
    def update(self,
               lambda_mults=[1.0],
               localizer=None,
               run_subset=None,
               use_approx=True,
               calc_only=False):
        """update the iES one GLM cycle

        Parameters
        ----------
            lambda_mults : list
                a list of lambda multipliers to test.  Each lambda mult value will require
                evaluating (a subset of) the parameter ensemble.
            localizer : pyemu.Matrix
                a jacobian localizing matrix
            run_subset : int
                the number of realizations to test for each lambda_mult value.  For example,
                if run_subset = 30 and num_reals=100, the first 30 realizations will be run (in
                parallel) for each lambda_mult value.  Then the best lambda_mult is selected and the
                remaining 70 realizations for that lambda_mult value are run (in parallel).
            use_approx : bool
                 a flag to use the MLE or MAP upgrade solution.  True indicates use MLE solution
            calc_only : bool
                a flag to calculate the upgrade matrix only (not run the ensemble). This is mostly for
                debugging and testing on travis. Default is False

        Example
        -------

        ``>>>import pyemu``

        ``>>>es = pyemu.EnsembleSmoother(pst="pest.pst")``

        ``>>>es.initialize(num_reals=100)``

        ``>>>es.update(lambda_mults=[0.1,1.0,10.0],run_subset=30)``

         """

        if run_subset is not None:
            if run_subset >= self.obsensemble.shape[0]:
                self.logger.warn("run_subset ({0}) >= num of active reals ({1})...ignoring ".\
                                 format(run_subset,self.obsensemble.shape[0]))
                run_subset = None

        self.iter_num += 1
        self.logger.log("iteration {0}".format(self.iter_num))
        self.logger.statement("{0} active realizations".format(
            self.obsensemble.shape[0]))
        if self.obsensemble.shape[0] < 2:
            self.logger.lraise(
                "at least active 2 realizations (really like 300) are needed to update"
            )
        if not self.__initialized:
            #raise Exception("must call initialize() before update()")
            self.logger.lraise("must call initialize() before update()")

        self.logger.log("calculate scaled delta obs")
        scaled_delta_obs = self._calc_delta_obs(self.obsensemble)
        self.logger.log("calculate scaled delta obs")
        self.logger.log("calculate scaled delta par")
        scaled_delta_par = self._calc_delta_par(self.parensemble)
        self.logger.log("calculate scaled delta par")

        self.logger.log("calculate pseudo inv comps")
        u, s, v = scaled_delta_obs.pseudo_inv_components()
        self.logger.log("calculate pseudo inv comps")

        self.logger.log("calculate obs diff matrix")
        obs_diff = self.obscov_inv_sqrt * self._get_residual_matrix(
            self.obsensemble).T
        self.logger.log("calculate obs diff matrix")

        # here is the math part...calculate upgrade matrices
        mean_lam, std_lam, paren_lam, obsen_lam = [], [], [], []
        lam_vals = []
        for ilam, cur_lam_mult in enumerate(lambda_mults):

            parensemble_cur_lam = self.parensemble.copy()
            #print(parensemble_cur_lam.isnull().values.any())

            cur_lam = self.current_lambda * cur_lam_mult
            lam_vals.append(cur_lam)
            self.logger.log("calcs for  lambda {0}".format(cur_lam_mult))
            scaled_ident = Cov.identity_like(s) * (cur_lam + 1.0)
            scaled_ident += s**2
            scaled_ident = scaled_ident.inv

            # build up this matrix as a single element so we can apply
            # localization
            self.logger.log("building upgrade_1 matrix")
            upgrade_1 = -1.0 * (self.half_parcov_diag * scaled_delta_par) *\
                        v * s * scaled_ident * u.T
            self.logger.log("building upgrade_1 matrix")

            # apply localization
            if localizer is not None:
                self.logger.log("applying localization")
                upgrade_1.hadamard_product(localizer)
                self.logger.log("applying localization")

            # apply residual information
            self.logger.log("applying residuals")
            upgrade_1 *= obs_diff
            self.logger.log("applying residuals")

            self.logger.log("processing upgrade_1")
            upgrade_1 = upgrade_1.to_dataframe()
            upgrade_1.index.name = "parnme"
            upgrade_1 = upgrade_1.T
            upgrade_1.index = [int(i) for i in upgrade_1.index]
            upgrade_1.to_csv(self.pst.filename+".upgrade_1.{0:04d}.csv".\
                               format(self.iter_num))
            if upgrade_1.isnull().values.any():
                self.logger.lraise("NaNs in upgrade_1")
            self.logger.log("processing upgrade_1")

            #print(upgrade_1.isnull().values.any())
            #print(parensemble_cur_lam.index)
            #print(upgrade_1.index)
            parensemble_cur_lam += upgrade_1

            # parameter-based upgrade portion
            if not use_approx and self.iter_num > 1:
                self.logger.log("building upgrade_2 matrix")
                par_diff = (self.parensemble - self.parensemble_0.loc[self.parensemble.index,:]).\
                    as_pyemu_matrix().T
                x4 = self.Am.T * self.half_parcov_diag * par_diff
                x5 = self.Am * x4
                x6 = scaled_delta_par.T * x5
                x7 = v * scaled_ident * v.T * x6
                upgrade_2 = -1.0 * (self.half_parcov_diag * scaled_delta_par *
                                    x7).to_dataframe()
                upgrade_2.index.name = "parnme"
                upgrade_2 = upgrade_2.T
                upgrade_2.to_csv(self.pst.filename+".upgrade_2.{0:04d}.csv".\
                                   format(self.iter_num))
                upgrade_2.index = [int(i) for i in upgrade_2.index]

                if upgrade_2.isnull().values.any():
                    self.logger.lraise("NaNs in upgrade_2")

                parensemble_cur_lam += upgrade_2
                self.logger.log("building upgrade_2 matrix")
            parensemble_cur_lam.enforce(self.enforce_bounds)

            # this is for testing failed runs on upgrade testing
            # works with the 10par_xsec smoother test
            #parensemble_cur_lam.iloc[:,:] = -1000000.0

            paren_lam.append(pd.DataFrame(parensemble_cur_lam.loc[:, :]))
            self.logger.log("calcs for  lambda {0}".format(cur_lam_mult))

        if calc_only:
            return

        # subset if needed
        # and combine lambda par ensembles into one par ensemble for evaluation
        if run_subset is not None and run_subset < self.parensemble.shape[0]:
            #subset_idx = ["{0:d}".format(i) for i in np.random.randint(0,self.parensemble.shape[0]-1,run_subset)]
            subset_idx = self.parensemble.iloc[:run_subset, :].index.values
            self.logger.statement("subset idxs: " +
                                  ','.join([str(s) for s in subset_idx]))
            paren_lam_subset = [pe.loc[subset_idx, :] for pe in paren_lam]
            paren_combine = pd.concat(paren_lam_subset, ignore_index=True)
            paren_lam_subset = None
        else:
            subset_idx = self.parensemble.index.values
            paren_combine = pd.concat(paren_lam, ignore_index=True)


        self.logger.log("evaluating ensembles for lambdas : {0}".\
                        format(','.join(["{0:8.3E}".format(l) for l in lam_vals])))
        failed_runs, obsen_combine = self._calc_obs(paren_combine)
        #if failed_runs is not None:
        #    obsen_combine.loc[failed_runs,:] = np.NaN
        self.logger.log("evaluating ensembles for lambdas : {0}".\
                        format(','.join(["{0:8.3E}".format(l) for l in lam_vals])))
        paren_combine = None

        if failed_runs is not None and len(
                failed_runs) == obsen_combine.shape[0]:
            self.logger.lraise("all runs failed - cannot continue")

        # unpack lambda obs ensembles from combined obs ensemble
        nrun_per_lam = self.obsensemble.shape[0]
        if run_subset is not None:
            nrun_per_lam = run_subset
        obsen_lam = []
        for i in range(len(lam_vals)):
            sidx = i * nrun_per_lam
            eidx = sidx + nrun_per_lam
            oe = ObservationEnsemble.from_dataframe(
                df=obsen_combine.iloc[sidx:eidx, :].copy(), pst=self.pst)
            oe.index = subset_idx
            # check for failed runs in this set - drop failed runs from obs ensembles
            if failed_runs is not None:
                failed_runs_this = np.array(
                    [f for f in failed_runs if f >= sidx and f < eidx]) - sidx
                if len(failed_runs_this) > 0:
                    if len(failed_runs_this) == oe.shape[0]:
                        self.logger.warn(
                            "all runs failed for lambda {0}".format(
                                lam_vals[i]))
                    else:
                        self.logger.warn("{0} run failed for lambda {1}".\
                                         format(len(failed_runs_this),lam_vals[i]))
                    oe.iloc[failed_runs_this, :] = np.NaN
                    oe = oe.dropna()

            # don't drop bad reals here, instead, mask bad reals in the lambda
            # selection and drop later
            # if self.drop_bad_reals is not None:
            #     assert isinstance(drop_bad_reals, float)
            #     drop_idx = np.argwhere(self.current_phi_vec > self.drop_bad_reals).flatten()
            #     run_ids = self.obsensemble.index.values
            #     drop_idx = run_ids[drop_idx]
            #     if len(drop_idx) == self.obsensemble.shape[0]:
            #         raise Exception("dropped all realizations as 'bad'")
            #     if len(drop_idx) > 0:
            #         self.logger.warn("{0} realizations dropped as 'bad' (indices :{1})". \
            #                          format(len(drop_idx), ','.join([str(d) for d in drop_idx])))
            #         self.parensemble.loc[drop_idx, :] = np.NaN
            #         self.parensemble = self.parensemble.dropna()
            #         self.obsensemble.loc[drop_idx, :] = np.NaN
            #         self.obsensemble = self.obsensemble.dropna()
            #
            #         self.current_phi_vec = self._calc_phi_vec(self.obsensemble)

            obsen_lam.append(oe)
        obsen_combine = None

        # here is where we need to select out the "best" lambda par and obs
        # ensembles
        self.logger.statement("\n**************************")
        self.logger.statement(str(datetime.now()))
        self.logger.statement("total runs:{0}".format(self.total_runs))
        self.logger.statement("iteration: {0}".format(self.iter_num))
        self.logger.statement("current lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                              format(self.current_lambda,
                         self.last_best_mean,self.last_best_std))
        phi_vecs = [self._calc_phi_vec(obsen) for obsen in obsen_lam]
        if self.drop_bad_reals is not None:
            for i, pv in enumerate(phi_vecs):
                #for testing the drop_bad_reals functionality
                #pv[[0,3,7]] = self.drop_bad_reals + 1.0
                pv[pv > self.drop_bad_reals] = np.NaN
                pv = pv[~np.isnan(pv)]
                if len(pv) == 0:
                    raise Exception("all realization for lambda {0} dropped as 'bad'".\
                                    format(lam_vals[i]))
                phi_vecs[i] = pv
        mean_std = [(pv.mean(), pv.std()) for pv in phi_vecs]
        update_pars = False
        update_lambda = False
        # accept a new best if its within 10%
        best_mean = self.last_best_mean * 1.1
        best_std = self.last_best_std * 1.1
        best_i = 0
        for i, (m, s) in enumerate(mean_std):
            self.logger.statement(" tested lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                                 format(self.current_lambda * lambda_mults[i],m,s))
            if m < best_mean:
                update_pars = True
                best_mean = m
                best_i = i
                if s < best_std:
                    update_lambda = True
                    best_std = s
        if np.isnan(best_mean):
            self.logger.lraise("best mean = NaN")
        if np.isnan(best_std):
            self.logger.lraise("best std = NaN")

        if not update_pars:
            self.current_lambda *= max(lambda_mults) * 10.0
            self.current_lambda = min(self.current_lambda, 100000)
            self.logger.statement("not accepting iteration, increased lambda:{0}".\
                  format(self.current_lambda))
        else:
            self.parensemble = ParameterEnsemble.from_dataframe(
                df=paren_lam[best_i], pst=self.pst)
            if run_subset is not None:
                failed_runs, self.obsensemble = self._calc_obs(
                    self.parensemble)
                if failed_runs is not None:
                    self.logger.warn("dropping failed realizations")
                    self.parensemble.loc[failed_runs, :] = np.NaN
                    self.parensemble = self.parensemble.dropna()
                    self.obsensemble.loc[failed_runs, :] = np.NaN
                    self.obsensemble = self.obsensemble.dropna()

                self.current_phi_vec = self._calc_phi_vec(self.obsensemble)

                #self._phi_report(self.current_phi_vec,self.current_lambda * lambda_mults[best_i])
                best_mean = self.current_phi_vec.mean()
                best_std = self.current_phi_vec.std()
            else:
                self.obsensemble = obsen_lam[best_i]
                # reindex parensemble in case failed runs
                self.parensemble = ParameterEnsemble.from_dataframe(
                    df=self.parensemble.loc[self.obsensemble.index],
                    pst=self.pst)
                self.current_phi_vec = phi_vecs[best_i]

            if self.drop_bad_reals is not None:
                # for testing drop_bad_reals functionality
                # self.current_phi_vec[::2] = self.drop_bad_reals + 1.0
                drop_idx = np.argwhere(
                    self.current_phi_vec > self.drop_bad_reals).flatten()
                run_ids = self.obsensemble.index.values
                drop_idx = run_ids[drop_idx]
                if len(drop_idx) > self.obsensemble.shape[0] - 3:
                    raise Exception("dropped too many realizations as 'bad'")
                if len(drop_idx) > 0:
                    self.logger.warn("{0} realizations dropped as 'bad' (indices :{1})". \
                                     format(len(drop_idx), ','.join([str(d) for d in drop_idx])))
                    self.parensemble.loc[drop_idx, :] = np.NaN
                    self.parensemble = self.parensemble.dropna()
                    self.obsensemble.loc[drop_idx, :] = np.NaN
                    self.obsensemble = self.obsensemble.dropna()

                    self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
                    best_mean = self.current_phi_vec.mean()
                    best_std = self.current_phi_vec.std()

            self._phi_report(self.phi_csv, self.current_phi_vec,
                             self.current_lambda * lambda_mults[best_i])
            self._phi_report(self.phi_act_csv,
                             self.obsensemble.phi_vector.values,
                             self.current_lambda * lambda_mults[best_i])


            self.logger.statement("   best lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda*lambda_mults[best_i],
                         best_mean,best_std))
            self.logger.statement("   actual mean phi: {0:15.6G}".format(
                float(self.current_actual_phi.mean())))
            self.last_best_mean = best_mean
            self.last_best_std = best_std

        if update_lambda:
            # be aggressive
            self.current_lambda *= (lambda_mults[best_i] * 0.75)
            # but don't let lambda get too small
            self.current_lambda = max(self.current_lambda, 0.00001)
            self.logger.statement("updating lambda: {0:15.6G}".\
                  format(self.current_lambda ))

        self.logger.statement("**************************\n")
        self.parensemble.to_csv(self.pst.filename+self.paren_prefix.\
                                    format(self.iter_num))
        self.obsensemble.to_csv(self.pst.filename+self.obsen_prefix.\
                                    format(self.iter_num))
        if self.raw_sweep_out is not None:
            self.raw_sweep_out.to_csv(self.pst.filename+"_raw{0}".\
                                        format(self.iter_num))
        self.logger.log("iteration {0}".format(self.iter_num))
コード例 #13
0
ファイル: mc.py プロジェクト: jroth-usgs/pyemu
class MonteCarlo(LinearAnalysis):
    """LinearAnalysis derived type for monte carlo analysis

       Note: requires a pest control file, which can be
             derived from a jco argument
             MonteCarlo.project_parsensemble also
             requires a jacobian

    """
    def __init__(self,**kwargs):
        super(MonteCarlo,self).__init__(**kwargs)
        assert self.pst is not None, \
            "monte carlo requires a pest control file"
        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)

    @property
    def num_reals(self):
        return self.parensemble.shape[0]

    def get_nsing(self,epsilon=1.0e-4):
        """ get the number of solution space dimensions given
            a ratio between the largest and smallest singular
            values

        Parameters:
            epsilon: ratio
        Returns : integer (or None)
            number of singular components above the epsilon ratio threshold
            If nsing == nadj_par, then None is returned
        """
        mx = self.xtqx.shape[0]
        nsing = mx - np.searchsorted(
                np.sort((self.xtqx.s.x / self.xtqx.s.x.max())[:,0]),epsilon)
        if nsing == mx:
            self.logger.warn("optimal nsing=npar")
            nsing = None
        return nsing

    def get_null_proj(self,nsing=None):
        """ get a null-space projection matrix of XTQX

        Parameters:
        ----------
            nsing: optional number of singular components to use
                      if none, call self.get_nsing()
        Returns:
        -------
            Matrix instance : V2V2^T
        """
        if nsing is None:
            nsing = self.get_nsing()
        if nsing is None:
            raise Exception("nsing is None")
        print("using {0} singular components".format(nsing))
        self.log("forming null space projection matrix with " +\
                 "{0} of {1} singular components".format(nsing,self.jco.shape[1]))

        v2_proj = (self.xtqx.v[:,nsing:] * self.xtqx.v[:,nsing:].T)
        self.log("forming null space projection matrix with " +\
                 "{0} of {1} singular components".format(nsing,self.jco.shape[1]))

        return v2_proj

    def draw(self, num_reals=1, par_file = None, obs=False,
             enforce_bounds=False,cov=None, how="gaussian"):
        """draw stochastic realizations of parameters and
           optionally observations

        Parameters:
        ----------
            num_reals (int): number of realization to generate

            par_file (str): parameter file to use as mean values

            obs (bool): add a realization of measurement noise to obs

            enforce_bounds (bool): enforce parameter bounds in control file

            how (str): type of distribution.  Must be in ["gaussian","uniform"]
        Returns:
            None
        Raises:
            None
        """
        if par_file is not None:
            self.pst.parrep(par_file)
        how = how.lower().strip()
        assert how in ["gaussian","uniform"]

        if cov is not None:
            assert isinstance(cov,Cov)
            if how == "uniform":
                raise Exception("MonteCarlo.draw() error: 'how'='uniform'," +\
                                " 'cov' arg cannot be passed")
        else:
            cov = self.parcov

        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)
        self.log("generating {0:d} parameter realizations".format(num_reals))
        self.parensemble.draw(cov,num_reals=num_reals, how=how)
        if enforce_bounds:
            self.parensemble.enforce()
        self.log("generating {0:d} parameter realizations".format(num_reals))
        if obs:
            self.log("generating {0:d} observation realizations".format(num_reals))
            self.obsensemble.draw(self.obscov,num_reals=num_reals)
            self.log("generating {0:d} observation realizations".format(num_reals))




    def project_parensemble(self,par_file=None,nsing=None,
                            inplace=True):
        """ perform the null-space projection operations for null-space monte carlo

        Parameters:
            par_file: str
                an optional file of parameter values to use
            nsing: int
                number of singular values to in forming null subspace matrix
            inplace: bool
                overwrite the existing parameter ensemble with the
                projected values
        Returns:
        -------
            if inplace is False, ParameterEnsemble instance, otherwise None
        """
        assert self.jco is not None,"MonteCarlo.project_parensemble()" +\
                                    "requires a jacobian attribute"
        if par_file is not None:
            assert os.path.exists(par_file),"monte_carlo.draw() error: par_file not found:" +\
                par_file
            self.parensemble.pst.parrep(par_file)

        # project the ensemble
        self.log("projecting parameter ensemble")
        en = self.parensemble.project(self.get_null_proj(nsing),inplace=inplace,log=self.log)
        self.log("projecting parameter ensemble")
        return en

    def write_psts(self,prefix,existing_jco=None,noptmax=None):
        """ write parameter and optionally observation realizations
            to pest control files
        Parameters:
        ----------
            prefix: str
                pest control file prefix
            existing_jco: str
                filename of an existing jacobian matrix to add to the
                pest++ options in the control file.  This is useful for
                NSMC since this jco can be used to get the first set of
                parameter upgrades for free!  Needs to be the path the jco
                file as seen from the location where pest++ will be run
            noptmax: int
                value of NOPTMAX to set in new pest control files
        Returns:
        -------
            None
        """
        self.log("writing realized pest control files")
        # get a copy of the pest control file
        pst = self.pst.get(par_names=self.pst.par_names,obs_names=self.pst.obs_names)

        if noptmax is not None:
            pst.control_data.noptmax = noptmax
            pst.control_data.noptmax = noptmax

        if existing_jco is not None:
            pst.pestpp_options["BASE_JACOBIAN"] = existing_jco

        # set the indices
        pst.parameter_data.index = pst.parameter_data.parnme
        pst.observation_data.index = pst.observation_data.obsnme

        if self.parensemble.istransformed:
            par_en = self.parensemble._back_transform(inplace=False)
        else:
            par_en = self.parensemble

        for i in range(self.num_reals):
            pst_name = prefix + "{0:d}.pst".format(i)
            self.log("writing realized pest control file " + pst_name)
            pst.parameter_data.loc[par_en.columns,"parval1"] = par_en.iloc[i, :].T

            # reset the regularization
            if pst.control_data.pestmode == "regularization":
                pst.zero_order_tikhonov(parbounds=True)

            # add the obs noise realization if needed
            if self.obsensemble.shape[0] == self.num_reals:
                pst.observation_data.loc[self.obsensemble.columns,"obsval"] = \
                    self.obsensemble.iloc[i, :].T

            # write
            pst.write(pst_name)
            self.log("writing realized pest control file " + pst_name)
        self.log("writing realized pest control files")
コード例 #14
0
    def update(self,
               lambda_mults=[1.0],
               localizer=None,
               run_subset=None,
               use_approx=True,
               calc_only=False):
        """update the iES one GLM cycle

        Parameters
        ----------
            lambda_mults : list
                a list of lambda multipliers to test.  Each lambda mult value will require
                evaluating (a subset of) the parameter ensemble.
            localizer : pyemu.Matrix
                a jacobian localizing matrix
            run_subset : int
                the number of realizations to test for each lambda_mult value.  For example,
                if run_subset = 30 and num_reals=100, the first 30 realizations will be run (in
                parallel) for each lambda_mult value.  Then the best lambda_mult is selected and the
                remaining 70 realizations for that lambda_mult value are run (in parallel).
            use_approx : bool
                 a flag to use the MLE or MAP upgrade solution.  True indicates use MLE solution
            calc_only : bool
                a flag to calculate the upgrade matrix only (not run the ensemble). This is mostly for
                debugging and testing on travis. Default is False

        Example
        -------

        ``>>>import pyemu``

        ``>>>es = pyemu.EnsembleSmoother(pst="pest.pst")``

        ``>>>es.initialize(num_reals=100)``

        ``>>>es.update(lambda_mults=[0.1,1.0,10.0],run_subset=30)``

         """

        #if not self.parensemble.istransformed:
        #    self.parensemble._transform(inplace=False)

        if run_subset is not None:
            if run_subset >= self.obsensemble.shape[0]:
                self.logger.warn("run_subset ({0}) >= num of active reals ({1})...ignoring ".\
                                 format(run_subset,self.obsensemble.shape[0]))
                run_subset = None

        self.iter_num += 1
        mat_prefix = self.pst.filename.replace('.pst', '') + ".{0}".format(
            self.iter_num)
        self.logger.log("iteration {0}".format(self.iter_num))
        self.logger.statement("{0} active realizations".format(
            self.obsensemble.shape[0]))
        if self.obsensemble.shape[0] < 2:
            self.logger.lraise(
                "at least active 2 realizations (really like 300) are needed to update"
            )
        if not self._initialized:
            #raise Exception("must call initialize() before update()")
            self.logger.lraise("must call initialize() before update()")

        self.logger.log("calculate scaled delta obs")
        scaled_delta_obs = self._calc_delta_obs(self.obsensemble)
        self.logger.log("calculate scaled delta obs")
        self.logger.log("calculate scaled delta par")
        scaled_delta_par = self._calc_delta_par(self.parensemble)
        self.logger.log("calculate scaled delta par")

        self.logger.log("calculate pseudo inv comps")
        u, s, v = scaled_delta_obs.pseudo_inv_components(
            eigthresh=self.pst.svd_data.eigthresh)
        s.col_names = s.row_names
        self.logger.log("calculate pseudo inv comps")

        self.logger.log("calculate obs diff matrix")
        #obs_diff = self.obscov_inv_sqrt * self._get_residual_obs_matrix(self.obsensemble).T
        obs_diff = self.obscov_inv_sqrt * self.phi.get_residual_obs_matrix(
            self.obsensemble).T
        self.logger.log("calculate obs diff matrix")

        if self.save_mats:
            np.savetxt(mat_prefix + ".obs_diff.dat",
                       scaled_delta_obs.x,
                       fmt="%15.6e")
            np.savetxt(mat_prefix + ".par_diff.dat",
                       scaled_delta_par.x,
                       fmt="%15.6e")
            np.savetxt(mat_prefix + ".u.dat", u.x, fmt="%15.6e")
            np.savetxt(mat_prefix + ".s.dat", s.x, fmt="%15.6e")
            np.savetxt(mat_prefix + ".v.dat", v.x, fmt="%15.6e")
        # here is the math part...calculate upgrade matrices
        mean_lam, std_lam, paren_lam, obsen_lam = [], [], [], []
        lam_vals = []
        for ilam, cur_lam_mult in enumerate(lambda_mults):

            parensemble_cur_lam = self.parensemble.copy()
            #print(parensemble_cur_lam.isnull().values.any())

            cur_lam = self.current_lambda * cur_lam_mult
            lam_vals.append(cur_lam)
            self.logger.log("calcs for  lambda {0}".format(cur_lam_mult))
            scaled_ident = Cov.identity_like(s) * (cur_lam + 1.0)
            scaled_ident += s**2
            scaled_ident = scaled_ident.inv

            # build up this matrix as a single element so we can apply
            # localization
            self.logger.log("building upgrade_1 matrix")
            upgrade_1 = -1.0 * (self.parcov_inv_sqrt * scaled_delta_par) *\
                        v * s * scaled_ident * u.T
            if self.save_mats:
                np.savetxt(mat_prefix + ".ivec.dat".format(self.iter_num),
                           scaled_ident.x,
                           fmt="%15.6e")
            self.logger.log("building upgrade_1 matrix")

            # apply localization
            if localizer is not None:
                self.logger.log("applying localization")
                upgrade_1.hadamard_product(localizer)
                self.logger.log("applying localization")

            # apply residual information
            self.logger.log("applying residuals")
            upgrade_1 *= obs_diff
            self.logger.log("applying residuals")

            self.logger.log("processing upgrade_1")
            if self.save_mats:
                np.savetxt(mat_prefix + ".upgrade_1.dat",
                           upgrade_1.T.x,
                           fmt="%15.6e")
            upgrade_1 = upgrade_1.to_dataframe()
            upgrade_1.index.name = "parnme"
            upgrade_1 = upgrade_1.T
            upgrade_1.index = [int(i) for i in upgrade_1.index]
            upgrade_1.to_csv(self.pst.filename+".upgrade_1.{0:04d}.csv".\
                               format(self.iter_num))
            if upgrade_1.isnull().values.any():
                self.logger.lraise("NaNs in upgrade_1")
            self.logger.log("processing upgrade_1")

            #print(upgrade_1.isnull().values.any())
            #print(parensemble_cur_lam.index)
            #print(upgrade_1.index)
            parensemble_cur_lam += upgrade_1

            # parameter-based upgrade portion
            if not use_approx and self.iter_num > 1:
                #if True:
                self.logger.log("building upgrade_2 matrix")
                par_diff = (self.parensemble - self.parensemble_0.loc[self.parensemble.index,:]).\
                    as_pyemu_matrix().T
                x4 = self.Am.T * self.parcov_inv_sqrt * par_diff
                x5 = self.Am * x4
                x6 = scaled_delta_par.T * x5
                x7 = v * scaled_ident * v.T * x6
                ug2_mat = -1.0 * (self.parcov_inv_sqrt * scaled_delta_par * x7)
                upgrade_2 = ug2_mat.to_dataframe()
                upgrade_2.index.name = "parnme"
                upgrade_2 = upgrade_2.T
                upgrade_2.to_csv(self.pst.filename+".upgrade_2.{0:04d}.csv".\
                                   format(self.iter_num))
                upgrade_2.index = [int(i) for i in upgrade_2.index]

                if self.save_mats:
                    np.savetxt(mat_prefix + ".scaled_par_resid.dat",
                               par_diff.x,
                               fmt="%15.6e")
                    np.savetxt(mat_prefix + ".x4.dat", x4.x, fmt="%15.6e")
                    np.savetxt(mat_prefix + ".x5.dat", x5.x, fmt="%15.6e")
                    np.savetxt(mat_prefix + ".x6.dat", x6.x, fmt="%15.6e")
                    np.savetxt(mat_prefix + ".x7.dat", x7.x, fmt="%15.6e")
                    np.savetxt(mat_prefix + ".upgrade_2.dat",
                               ug2_mat.T.x,
                               fmt="%15.6e")

                if upgrade_2.isnull().values.any():
                    self.logger.lraise("NaNs in upgrade_2")

                parensemble_cur_lam += upgrade_2
                self.logger.log("building upgrade_2 matrix")
            self.logger.log("enforcing bounds")
            parensemble_cur_lam.enforce(self.enforce_bounds)
            self.logger.log("enforcing bounds")

            self.logger.log("filling fixed parameters")
            #fill in fixed pars with initial values
            fi = parensemble_cur_lam.fixed_indexer
            li = parensemble_cur_lam.log_indexer
            log_values = self.pst.parameter_data.loc[:, "parval1"].copy()
            log_values.loc[li] = log_values.loc[li].apply(np.log10)
            fixed_vals = log_values.loc[fi]

            for fname, fval in zip(fixed_vals.index, fixed_vals.values):
                # if fname not in df.columns:
                #    continue
                # print(fname)
                parensemble_cur_lam.loc[:, fname] = fval
            self.logger.log("filling fixed parameters")
            # this is for testing failed runs on upgrade testing
            # works with the 10par_xsec smoother test
            #parensemble_cur_lam.iloc[:,:] = -1000000.0

            # some hackery - we lose track of the transform flag here, but just
            # know it is transformed.  Need to create dataframe here because
            # pd.concat doesn't like par ensembles later
            paren_lam.append(pd.DataFrame(parensemble_cur_lam.loc[:, :]))
            self.logger.log("calcs for  lambda {0}".format(cur_lam_mult))

        if calc_only:
            return

        # subset if needed
        # and combine lambda par ensembles into one par ensemble for evaluation
        if run_subset is not None and run_subset < self.parensemble.shape[0]:
            #subset_idx = ["{0:d}".format(i) for i in np.random.randint(0,self.parensemble.shape[0]-1,run_subset)]
            subset_idx = self.parensemble.iloc[:run_subset, :].index.values
            self.logger.statement("subset idxs: " +
                                  ','.join([str(s) for s in subset_idx]))

            # more tracking of transformed - just know it! Creating dataframes...
            paren_lam_subset = [pe.loc[subset_idx, :] for pe in paren_lam]
            paren_combine = pd.concat(paren_lam_subset, ignore_index=True)
            paren_lam_subset = None
        else:
            subset_idx = self.parensemble.index.values
            paren_combine = pd.concat(paren_lam, ignore_index=True)


        self.logger.log("evaluating ensembles for lambdas : {0}".\
                        format(','.join(["{0:8.3E}".format(l) for l in lam_vals])))
        # back to par ensemble and know it is transformed
        paren_combine = ParameterEnsemble.from_dataframe(df=paren_combine,
                                                         pst=self.pst,
                                                         istransformed=True)
        failed_runs, obsen_combine = self._calc_obs(paren_combine)
        self.logger.log("evaluating ensembles for lambdas : {0}".\
                        format(','.join(["{0:8.3E}".format(l) for l in lam_vals])))
        paren_combine = None

        if failed_runs is not None and len(
                failed_runs) == obsen_combine.shape[0]:
            self.logger.lraise("all runs failed - cannot continue")

        # unpack lambda obs ensembles from combined obs ensemble
        nrun_per_lam = self.obsensemble.shape[0]
        if run_subset is not None:
            nrun_per_lam = run_subset
        obsen_lam = []

        for i in range(len(lam_vals)):
            sidx = i * nrun_per_lam
            eidx = sidx + nrun_per_lam
            oe = ObservationEnsemble.from_dataframe(
                df=obsen_combine.iloc[sidx:eidx, :].copy(), pst=self.pst)
            oe.index = subset_idx
            # check for failed runs in this set - drop failed runs from obs ensembles
            if failed_runs is not None:
                failed_runs_this = np.array(
                    [f for f in failed_runs if f >= sidx and f < eidx]) - sidx
                if len(failed_runs_this) > 0:
                    if len(failed_runs_this) == oe.shape[0]:
                        self.logger.warn(
                            "all runs failed for lambda {0}".format(
                                lam_vals[i]))
                    else:
                        self.logger.warn("{0} run failed for lambda {1}".\
                                         format(len(failed_runs_this),lam_vals[i]))
                    oe.iloc[failed_runs_this, :] = np.NaN
                    oe = oe.dropna()
                    paren_lam[i].iloc[failed_runs_this, :] = np.NaN
                    paren_lam[i] = ParameterEnsemble.from_dataframe(
                        df=paren_lam[i].dropna(), pst=self.pst)
                    paren_lam[i].__instransformed = True

            # don't drop bad reals here, instead, mask bad reals in the lambda
            # selection and drop later
            # if self.drop_bad_reals is not None:
            #     assert isinstance(drop_bad_reals, float)
            #     drop_idx = np.argwhere(self.current_phi_vec > self.drop_bad_reals).flatten()
            #     run_ids = self.obsensemble.index.values
            #     drop_idx = run_ids[drop_idx]
            #     if len(drop_idx) == self.obsensemble.shape[0]:
            #         raise Exception("dropped all realizations as 'bad'")
            #     if len(drop_idx) > 0:
            #         self.logger.warn("{0} realizations dropped as 'bad' (indices :{1})". \
            #                          format(len(drop_idx), ','.join([str(d) for d in drop_idx])))
            #         self.parensemble.loc[drop_idx, :] = np.NaN
            #         self.parensemble = self.parensemble.dropna()
            #         self.obsensemble.loc[drop_idx, :] = np.NaN
            #         self.obsensemble = self.obsensemble.dropna()
            #
            #         self.current_phi_vec = self._calc_phi_vec(self.obsensemble)

            obsen_lam.append(oe)
        obsen_combine = None

        # here is where we need to select out the "best" lambda par and obs
        # ensembles

        #phi_vecs = [self._calc_phi_vec(obsen) for obsen in obsen_lam]
        #phi_vecs_reg = [self._calc_regul_phi_vec(paren) for paren in paren_lam]
        #if self.regul_factor > 0.0:
        #    for i,(pv,prv) in enumerate(zip(phi_vecs,phi_vecs_reg)):
        #        phi_vecs[i] = pv + (prv * self.regul_factor)
        self.logger.log("calc lambda phi vectors")
        phi_vecs = [
            self.phi.get_meas_and_regul_phi(oe, pe.loc[oe.index, :])
            for oe, pe in zip(obsen_lam, paren_lam)
        ]
        self.logger.log("calc lambda phi vectors")
        if self.drop_bad_reals is not None:
            for i, (meas_pv, regul_pv) in enumerate(phi_vecs):
                #for testing the drop_bad_reals functionality
                #pv[[0,3,7]] = self.drop_bad_reals + 1.0
                regul_pv = regul_pv.copy()
                regul_pv[meas_pv > self.drop_bad_reals] = np.NaN
                regul_pv = regul_pv[~np.isnan(regul_pv)]
                meas_pv[meas_pv > self.drop_bad_reals] = np.NaN
                meas_pv = meas_pv[~np.isnan(meas_pv)]
                if len(meas_pv) == 0:
                    #raise Exception("all realization for lambda {0} dropped as 'bad'".\
                    #                format(lam_vals[i]))
                    self.logger.warn(
                        "all realizations for lambda {0} marked as 'bad'")
                    meas_pv = np.zeros_like(obsen_lam[0].shape[0]) + 1.0e+30
                    regul_pv = np.zeros_like(obsen_lam[0].shape[0]) + 1.0e+30
                phi_vecs[i] = (meas_pv, regul_pv)
        mean_std_meas = [(pv[0].mean(), pv[0].std()) for pv in phi_vecs]
        mean_std_regul = [(pv[1].mean(), pv[1].std()) for pv in phi_vecs]
        update_pars = False
        update_lambda = False
        self.logger.statement("**************************")
        # self.logger.statement(str(datetime.now()))
        self.logger.statement("lambda testing summary")
        self.logger.statement("total runs:{0}".format(self.total_runs))
        self.logger.statement("iteration: {0}".format(self.iter_num))
        self.logger.statement("current lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}". \
                              format(self.current_lambda,
                                     self.last_best_mean, self.last_best_std))

        # accept a new best if its within 10%
        best_mean = self.last_best_mean * 1.1
        best_std = self.last_best_std * 1.1
        best_i = 0
        for i, ((mm, ms),
                (rm, rs)) in enumerate(zip(mean_std_meas, mean_std_regul)):
            self.logger.statement(
                " tested lambda:{0:15.6G}, meas mean:{1:15.6G}, meas std:{2:15.6G}"
                .format(self.current_lambda * lambda_mults[i], mm, ms))
            self.logger.statement("{0:30s}regul mean:{1:15.6G}, regul std:{2:15.6G}".\
                                  format(' ',rm,rs))
            m = mm + (self.regul_factor * rm)
            s = ms + (self.regul_factor * rs)
            if m < best_mean:
                update_pars = True
                best_mean = m
                best_i = i
                if s < best_std:
                    update_lambda = True
                    best_std = s
        if np.isnan(best_mean):
            self.logger.lraise("best mean = NaN")
        if np.isnan(best_std):
            self.logger.lraise("best std = NaN")

        if not update_pars:
            self.current_lambda *= max(lambda_mults) * 10.0
            self.current_lambda = min(self.current_lambda, 100000)
            self.logger.statement("not accepting iteration, increased lambda:{0}".\
                  format(self.current_lambda))
        else:
            #more transformation status hard coding - ugly
            self.parensemble = ParameterEnsemble.from_dataframe(
                df=paren_lam[best_i], pst=self.pst, istransformed=True)
            if run_subset is not None:
                failed_runs, self.obsensemble = self._calc_obs(
                    self.parensemble)
                if failed_runs is not None:
                    self.logger.warn("dropping failed realizations")
                    self.parensemble.loc[failed_runs, :] = np.NaN
                    self.parensemble = self.parensemble.dropna()
                    self.obsensemble.loc[failed_runs, :] = np.NaN
                    self.obsensemble = self.obsensemble.dropna()

                self.phi.update()
                best_mean = self.phi.comp_phi.mean()
                best_std = self.phi.comp_phi.std()
            else:
                self.obsensemble = obsen_lam[best_i]
                # reindex parensemble in case failed runs
                self.parensemble = ParameterEnsemble.from_dataframe(
                    df=self.parensemble.loc[self.obsensemble.index],
                    pst=self.pst,
                    istransformed=self.parensemble.istransformed)
                self.phi.update()
            if self.drop_bad_reals is not None:
                # for testing drop_bad_reals functionality
                # self.current_phi_vec[::2] = self.drop_bad_reals + 1.0
                #drop_idx = np.argwhere(self.current_phi_vec > self.drop_bad_reals).flatten()
                drop_idx = np.argwhere(
                    self.phi.comp_phi > self.drop_bad_reals).flatten()
                run_ids = self.obsensemble.index.values
                drop_idx = run_ids[drop_idx]
                if len(drop_idx) > self.obsensemble.shape[0] - 3:
                    raise Exception("dropped too many realizations as 'bad'")
                if len(drop_idx) > 0:
                    self.logger.warn("{0} realizations dropped as 'bad' (indices :{1})". \
                                     format(len(drop_idx), ','.join([str(d) for d in drop_idx])))
                    self.parensemble.loc[drop_idx, :] = np.NaN
                    self.parensemble = self.parensemble.dropna()
                    self.obsensemble.loc[drop_idx, :] = np.NaN
                    self.obsensemble = self.obsensemble.dropna()

                    self.phi.update()
                    best_mean = self.phi.comp_phi.mean()
                    best_std = self.phi.comp_phi.std()

            self.phi.report(cur_lam=self.current_lambda * lambda_mults[best_i])

            self.logger.statement("   best lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda*lambda_mults[best_i],
                         best_mean,best_std))
            #self.logger.statement("   actual mean phi: {0:15.6G}".format(float(self.current_actual_phi.mean())))
            self.last_best_mean = best_mean
            self.last_best_std = best_std

        if update_lambda:
            # be aggressive
            self.current_lambda *= (lambda_mults[best_i] * 0.75)
            # but don't let lambda get too small
            self.current_lambda = max(self.current_lambda, 0.00001)
            self.logger.statement("updating lambda: {0:15.6G}".\
                  format(self.current_lambda ))

        self.logger.statement("**************************\n")
        self.parensemble.to_csv(self.pst.filename+self.paren_prefix.\
                                    format(self.iter_num))
        self.obsensemble.to_csv(self.pst.filename+self.obsen_prefix.\
                                    format(self.iter_num))
        if self.raw_sweep_out is not None:
            self.raw_sweep_out.to_csv(self.pst.filename+"_sweepraw{0}.csv".\
                                        format(self.iter_num))
        self.logger.log("iteration {0}".format(self.iter_num))
コード例 #15
0
    def initialize(self,
                   num_reals=1,
                   init_lambda=None,
                   enforce_bounds="reset",
                   parensemble=None,
                   obsensemble=None,
                   restart_obsensemble=None,
                   regul_factor=0.0,
                   use_approx_prior=True,
                   build_empirical_prior=False):
        """Initialize the iES process.  Depending on arguments, draws or loads
        initial parameter observations ensembles and runs the initial parameter
        ensemble

        Parameters
        ----------
            num_reals : int
                the number of realizations to draw.  Ignored if parensemble/obsensemble
                are not None
            init_lambda : float
                the initial lambda to use.  During subsequent updates, the lambda is
                updated according to upgrade success
            enforce_bounds : str
                how to enfore parameter bound transgression.  options are
                reset, drop, or None
            parensemble : pyemu.ParameterEnsemble or str
                a parameter ensemble or filename to use as the initial
                parameter ensemble.  If not None, then obsenemble must not be
                None
            obsensemble : pyemu.ObservationEnsemble or str
                an observation ensemble or filename to use as the initial
                observation ensemble.  If not None, then parensemble must
                not be None
            restart_obsensemble : pyemu.ObservationEnsemble or str
                an observation ensemble or filename to use as an
                evaluated observation ensemble.  If not None, this will skip the initial
                parameter ensemble evaluation - user beware!
            regul_factor : float
                the regularization penalty fraction of the composite objective.  The
                Prurist, MAP solution would be regul_factor = 1.0, yielding equal
                parts measurement and regularization to the composite objective function.
                Default is 0.0, which means only seek to minimize the measurement objective
                function
            use_approx_prior : bool
                a flag to use the inverse, square root of the prior ccovariance matrix
                for scaling the upgrade calculation.  If True, this matrix is not used.
                Default is True
            build_empirical_prior : bool
                flag to build the prior parameter covariance matrix from an existing parensemble.
                If True and parensemble is None, an exception is raised


        Example
        -------
        ``>>>import pyemu``

        ``>>>es = pyemu.EnsembleSmoother(pst="pest.pst")``

        ``>>>es.initialize(num_reals=100)``

        """
        '''
        (re)initialize the process
        '''
        # initialize the phi report csv
        self.enforce_bounds = enforce_bounds

        self.regul_factor = float(regul_factor)

        self.total_runs = 0
        # this matrix gets used a lot, so only calc once and store
        self.obscov_inv_sqrt = self.obscov.get(self.pst.nnz_obs_names).inv.sqrt

        if use_approx_prior:
            self.logger.statement("using approximate parcov in solution")
            self.parcov_inv_sqrt = 1.0
        else:
            self.logger.statement("using full parcov in solution")
            # Chen and Oliver use a low rank approx here, but so far,
            # I haven't needed it - not using enough parameters yet
            self.logger.log("forming inverse sqrt parcov matrix")
            self.parcov_inv_sqrt = self.parcov.inv.sqrt
            self.logger.log("forming inverse sqrt parcov matrix")

        if parensemble is not None and obsensemble is not None:
            self.logger.log("initializing with existing ensembles")
            if isinstance(parensemble, str):
                self.logger.log("loading parensemble from file")
                if not os.path.exists(obsensemble):
                    self.logger.lraise("can not find parensemble file: {0}".\
                                       format(parensemble))
                df = pd.read_csv(parensemble, index_col=0)
                #df.index = [str(i) for i in df.index]
                self.parensemble_0 = ParameterEnsemble.from_dataframe(
                    df=df, pst=self.pst)
                self.logger.log("loading parensemble from file")

            elif isinstance(parensemble, ParameterEnsemble):
                self.parensemble_0 = parensemble.copy()
            else:
                raise Exception("unrecognized arg type for parensemble, " +\
                                "should be filename or ParameterEnsemble" +\
                                ", not {0}".format(type(parensemble)))
            self.parensemble = self.parensemble_0.copy()
            if isinstance(obsensemble, str):
                self.logger.log("loading obsensemble from file")
                if not os.path.exists(obsensemble):
                    self.logger.lraise("can not find obsensemble file: {0}".\
                                       format(obsensemble))
                df = pd.read_csv(obsensemble,
                                 index_col=0).loc[:, self.pst.nnz_obs_names]
                #df.index = [str(i) for i in df.index]
                self.obsensemble_0 = ObservationEnsemble.from_dataframe(
                    df=df, pst=self.pst)
                self.logger.log("loading obsensemble from file")

            elif isinstance(obsensemble, ObservationEnsemble):
                self.obsensemble_0 = obsensemble.copy()
            else:
                raise Exception("unrecognized arg type for obsensemble, " +\
                                "should be filename or ObservationEnsemble" +\
                                ", not {0}".format(type(obsensemble)))

            assert self.parensemble_0.shape[0] == self.obsensemble_0.shape[0]
            #self.num_reals = self.parensemble_0.shape[0]
            num_reals = self.parensemble.shape[0]
            self.logger.log("initializing with existing ensembles")

            if build_empirical_prior:

                self.reset_parcov(self.parensemble.covariance_matrix())
                if self.save_mats:
                    self.parcov.to_binary(self.pst.filename + ".empcov.jcb")

        else:
            if build_empirical_prior:
                self.logger.lraise(
                    "can't use build_emprirical_prior without parensemble...")
            self.logger.log(
                "initializing smoother with {0} realizations".format(
                    num_reals))
            self.logger.log("initializing parensemble")
            self.parensemble_0 = pyemu.ParameterEnsemble.from_gaussian_draw(
                self.pst, self.parcov, num_reals=num_reals)
            self.parensemble_0.enforce(enforce_bounds=enforce_bounds)
            self.logger.log("initializing parensemble")
            self.parensemble = self.parensemble_0.copy()
            self.parensemble_0.to_csv(self.pst.filename +\
                                      self.paren_prefix.format(0))
            self.logger.log("initializing parensemble")
            self.logger.log("initializing obsensemble")
            self.obsensemble_0 = pyemu.ObservationEnsemble.from_id_gaussian_draw(
                self.pst, num_reals=num_reals)
            #self.obsensemble = self.obsensemble_0.copy()

            # save the base obsensemble
            self.obsensemble_0.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(-1))
            self.logger.log("initializing obsensemble")
            self.logger.log(
                "initializing smoother with {0} realizations".format(
                    num_reals))

        if use_approx_prior:
            self.logger.statement("using approximate parcov in solution")
            self.parcov_inv_sqrt = 1.0
        else:
            self.logger.statement("using full parcov in solution")
            # Chen and Oliver use a low rank approx here, but so far,
            # I haven't needed it - not using enough parameters yet
            self.logger.log("forming inverse sqrt parcov matrix")
            self.parcov_inv_sqrt = self.parcov.inv.sqrt
            self.logger.log("forming inverse sqrt parcov matrix")

        # self.obs0_matrix = self.obsensemble_0.nonzero.as_pyemu_matrix()
        # self.par0_matrix = self.parensemble_0.as_pyemu_matrix()
        self.enforce_bounds = enforce_bounds

        if restart_obsensemble is not None:
            self.logger.log(
                "loading restart_obsensemble {0}".format(restart_obsensemble))
            failed_runs, self.obsensemble = self._load_obs_ensemble(
                restart_obsensemble)
            assert self.obsensemble.shape[0] == self.obsensemble_0.shape[0]
            assert list(self.obsensemble.columns) == list(
                self.obsensemble_0.columns)
            self.logger.log(
                "loading restart_obsensemble {0}".format(restart_obsensemble))

        else:
            # run the initial parameter ensemble
            self.logger.log("evaluating initial ensembles")
            failed_runs, self.obsensemble = self._calc_obs(self.parensemble)
            self.obsensemble.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(0))
            if self.raw_sweep_out is not None:
                self.raw_sweep_out.to_csv(self.pst.filename + "_sweepraw0.csv")
            self.logger.log("evaluating initial ensembles")

        if failed_runs is not None:
            self.logger.warn("dropping failed realizations")
            #failed_runs_str = [str(f) for f in failed_runs]
            #self.parensemble = self.parensemble.drop(failed_runs)
            #self.obsensemble = self.obsensemble.drop(failed_runs)
            self.parensemble.loc[failed_runs, :] = np.NaN
            self.parensemble = self.parensemble.dropna()
            self.obsensemble.loc[failed_runs, :] = np.NaN
            self.obsensemble = self.obsensemble.dropna()

        if not self.parensemble.istransformed:
            self.parensemble._transform(inplace=True)
        if not self.parensemble_0.istransformed:
            self.parensemble_0._transform(inplace=True)

        self.phi = Phi(self)

        if self.drop_bad_reals is not None:
            #drop_idx = np.argwhere(self.current_phi_vec > self.drop_bad_reals).flatten()
            #comp_phi = self.phi.comp_phi
            #drop_idx = np.argwhere(self.phi.comp_phi > self.drop_bad_reals).flatten()
            #meas_phi = self.phi.meas_phi
            drop_idx = np.argwhere(
                self.phi.meas_phi > self.drop_bad_reals).flatten()
            run_ids = self.obsensemble.index.values
            drop_idx = run_ids[drop_idx]
            if len(drop_idx) == self.obsensemble.shape[0]:
                raise Exception("dropped all realizations as 'bad'")
            if len(drop_idx) > 0:
                self.logger.warn("{0} realizations dropped as 'bad' (indices :{1})".\
                                 format(len(drop_idx),','.join([str(d) for d in drop_idx])))
                self.parensemble.loc[drop_idx, :] = np.NaN
                self.parensemble = self.parensemble.dropna()
                self.obsensemble.loc[drop_idx, :] = np.NaN
                self.obsensemble = self.obsensemble.dropna()

                self.phi.update()

        self.phi.report(cur_lam=0.0)

        self.last_best_mean = self.phi.comp_phi.mean()
        self.last_best_std = self.phi.comp_phi.std()

        #self.logger.statement("initial phi (mean, std): {0:15.6G},{1:15.6G}".\
        #                      format(self.last_best_mean,self.last_best_std))
        if init_lambda is not None:
            self.current_lambda = float(init_lambda)
        else:
            #following chen and oliver
            x = self.last_best_mean / (2.0 * float(self.obsensemble.shape[1]))
            self.current_lambda = 10.0**(np.floor(np.log10(x)))

        self.logger.statement("current lambda:{0:15.6g}".format(
            self.current_lambda))

        self.delta_par_prior = self._calc_delta_par(self.parensemble_0)
        u, s, v = self.delta_par_prior.pseudo_inv_components(
            eigthresh=self.pst.svd_data.eigthresh)
        self.Am = u * s.inv
        if self.save_mats:
            np.savetxt(self.pst.filename.replace(".pst", '.') +
                       "0.prior_par_diff.dat",
                       self.delta_par_prior.x,
                       fmt="%15.6e")
            np.savetxt(self.pst.filename.replace(".pst", '.') + "0.am_u.dat",
                       u.x,
                       fmt="%15.6e")
            np.savetxt(self.pst.filename.replace(".pst", '.') + "0.am_v.dat",
                       v.x,
                       fmt="%15.6e")
            np.savetxt(self.pst.filename.replace(".pst", '.') +
                       "0.am_s_inv.dat",
                       s.inv.as_2d,
                       fmt="%15.6e")
            np.savetxt(self.pst.filename.replace(".pst", '.') + "0.am.dat",
                       self.Am.x,
                       fmt="%15.6e")

        self._initialized = True
コード例 #16
0
class MonteCarlo(LinearAnalysis):
    """LinearAnalysis derived type for monte carlo analysis

       Note: requires a pest control file, which can be
             derived from a jco argument
             MonteCarlo.project_parsensemble also
             requires a jacobian

    """
    def __init__(self, **kwargs):
        super(MonteCarlo, self).__init__(**kwargs)
        assert self.pst is not None, \
            "monte carlo requires a pest control file"
        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)

    @property
    def num_reals(self):
        return self.parensemble.shape[0]

    def get_nsing(self, epsilon=1.0e-4):
        """ get the number of solution space dimensions given
            a ratio between the largest and smallest singular
            values

        Parameters:
            epsilon: ratio
        Returns : integer (or None)
            number of singular components above the epsilon ratio threshold
            If nsing == nadj_par, then None is returned
        """
        mx = self.xtqx.shape[0]
        nsing = mx - np.searchsorted(
            np.sort((self.xtqx.s.x / self.xtqx.s.x.max())[:, 0]), epsilon)
        if nsing == mx:
            self.logger.warn("optimal nsing=npar")
            nsing = None
        return nsing

    def get_null_proj(self, nsing=None):
        """ get a null-space projection matrix of XTQX

        Parameters:
        ----------
            nsing: optional number of singular components to use
                      if none, call self.get_nsing()
        Returns:
        -------
            Matrix instance : V2V2^T
        """
        if nsing is None:
            nsing = self.get_nsing()
        if nsing is None:
            raise Exception("nsing is None")
        print("using {0} singular components".format(nsing))
        self.log("forming null space projection matrix with " +\
                 "{0} of {1} singular components".format(nsing,self.jco.shape[1]))

        v2_proj = (self.xtqx.v[:, nsing:] * self.xtqx.v[:, nsing:].T)
        self.log("forming null space projection matrix with " +\
                 "{0} of {1} singular components".format(nsing,self.jco.shape[1]))

        return v2_proj

    def draw(self,
             num_reals=1,
             par_file=None,
             obs=False,
             enforce_bounds=False,
             cov=None,
             how="gaussian"):
        """draw stochastic realizations of parameters and
           optionally observations

        Parameters:
        ----------
            num_reals (int): number of realization to generate

            par_file (str): parameter file to use as mean values

            obs (bool): add a realization of measurement noise to obs

            enforce_bounds (bool): enforce parameter bounds in control file

            how (str): type of distribution.  Must be in ["gaussian","uniform"]
        Returns:
            None
        Raises:
            None
        """
        if par_file is not None:
            self.pst.parrep(par_file)
        how = how.lower().strip()
        assert how in ["gaussian", "uniform"]

        if cov is not None:
            assert isinstance(cov, Cov)
            if how == "uniform":
                raise Exception("MonteCarlo.draw() error: 'how'='uniform'," +\
                                " 'cov' arg cannot be passed")
        else:
            cov = self.parcov

        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)
        self.log("generating {0:d} parameter realizations".format(num_reals))
        self.parensemble.draw(cov, num_reals=num_reals, how=how)
        if enforce_bounds:
            self.parensemble.enforce()
        self.log("generating {0:d} parameter realizations".format(num_reals))
        if obs:
            self.log(
                "generating {0:d} observation realizations".format(num_reals))
            self.obsensemble.draw(self.obscov, num_reals=num_reals)
            self.log(
                "generating {0:d} observation realizations".format(num_reals))

    def project_parensemble(self, par_file=None, nsing=None, inplace=True):
        """ perform the null-space projection operations for null-space monte carlo

        Parameters:
            par_file: str
                an optional file of parameter values to use
            nsing: int
                number of singular values to in forming null subspace matrix
            inplace: bool
                overwrite the existing parameter ensemble with the
                projected values
        Returns:
        -------
            if inplace is False, ParameterEnsemble instance, otherwise None
        """
        assert self.jco is not None,"MonteCarlo.project_parensemble()" +\
                                    "requires a jacobian attribute"
        if par_file is not None:
            assert os.path.exists(par_file),"monte_carlo.draw() error: par_file not found:" +\
                par_file
            self.parensemble.pst.parrep(par_file)

        # project the ensemble
        self.log("projecting parameter ensemble")
        en = self.parensemble.project(self.get_null_proj(nsing),
                                      inplace=inplace,
                                      log=self.log)
        self.log("projecting parameter ensemble")
        return en

    def write_psts(self, prefix, existing_jco=None, noptmax=None):
        """ write parameter and optionally observation realizations
            to pest control files
        Parameters:
        ----------
            prefix: str
                pest control file prefix
            existing_jco: str
                filename of an existing jacobian matrix to add to the
                pest++ options in the control file.  This is useful for
                NSMC since this jco can be used to get the first set of
                parameter upgrades for free!  Needs to be the path the jco
                file as seen from the location where pest++ will be run
            noptmax: int
                value of NOPTMAX to set in new pest control files
        Returns:
        -------
            None
        """
        self.log("writing realized pest control files")
        # get a copy of the pest control file
        pst = self.pst.get(par_names=self.pst.par_names,
                           obs_names=self.pst.obs_names)

        if noptmax is not None:
            pst.control_data.noptmax = noptmax
            pst.control_data.noptmax = noptmax

        if existing_jco is not None:
            pst.pestpp_options["BASE_JACOBIAN"] = existing_jco

        # set the indices
        pst.parameter_data.index = pst.parameter_data.parnme
        pst.observation_data.index = pst.observation_data.obsnme

        if self.parensemble.istransformed:
            par_en = self.parensemble._back_transform(inplace=False)
        else:
            par_en = self.parensemble

        for i in range(self.num_reals):
            pst_name = prefix + "{0:d}.pst".format(i)
            self.log("writing realized pest control file " + pst_name)
            pst.parameter_data.loc[par_en.columns,
                                   "parval1"] = par_en.iloc[i, :].T

            # reset the regularization
            if pst.control_data.pestmode == "regularization":
                pst.zero_order_tikhonov(parbounds=True)

            # add the obs noise realization if needed
            if self.obsensemble.shape[0] == self.num_reals:
                pst.observation_data.loc[self.obsensemble.columns,"obsval"] = \
                    self.obsensemble.iloc[i, :].T

            # write
            pst.write(pst_name)
            self.log("writing realized pest control file " + pst_name)
        self.log("writing realized pest control files")
コード例 #17
0
ファイル: smoother.py プロジェクト: xuexianwu/pyemu
    def update(self,
               lambda_mults=[1.0],
               localizer=None,
               run_subset=None,
               use_approx=True):

        if run_subset is not None:
            if run_subset >= self.obsensemble.shape[0]:
                self.logger.warn("run_subset ({0}) >= num of active reals ({1})...ignoring ".\
                                 format(run_subset,self.obsensemble.shape[0]))
                run_subset = None

        self.iter_num += 1
        self.logger.log("iteration {0}".format(self.iter_num))
        self.logger.statement("{0} active realizations".format(
            self.obsensemble.shape[0]))
        if self.obsensemble.shape[0] < 2:
            self.logger.lraise(
                "at least active 2 realizations (really like 300) are needed to update"
            )
        if not self.__initialized:
            #raise Exception("must call initialize() before update()")
            self.logger.lraise("must call initialize() before update()")

        self.logger.log("calculate scaled delta obs")
        scaled_delta_obs = self._calc_delta_obs(self.obsensemble)
        self.logger.log("calculate scaled delta obs")
        self.logger.log("calculate scaled delta par")
        scaled_delta_par = self._calc_delta_par(self.parensemble)
        self.logger.log("calculate scaled delta par")

        self.logger.log("calculate pseudo inv comps")
        u, s, v = scaled_delta_obs.pseudo_inv_components()
        self.logger.log("calculate pseudo inv comps")

        self.logger.log("calculate obs diff matrix")
        obs_diff = self.obscov_inv_sqrt * self._get_residual_matrix(
            self.obsensemble).T
        self.logger.log("calculate obs diff matrix")

        # here is the math part...calculate upgrade matrices
        mean_lam, std_lam, paren_lam, obsen_lam = [], [], [], []
        lam_vals = []
        for ilam, cur_lam_mult in enumerate(lambda_mults):

            parensemble_cur_lam = self.parensemble.copy()
            #print(parensemble_cur_lam.isnull().values.any())

            cur_lam = self.current_lambda * cur_lam_mult
            lam_vals.append(cur_lam)
            self.logger.log("calcs for  lambda {0}".format(cur_lam_mult))
            scaled_ident = Cov.identity_like(s) * (cur_lam + 1.0)
            scaled_ident += s**2
            scaled_ident = scaled_ident.inv

            # build up this matrix as a single element so we can apply
            # localization
            self.logger.log("building upgrade_1 matrix")
            upgrade_1 = -1.0 * (self.half_parcov_diag * scaled_delta_par) *\
                        v * s * scaled_ident * u.T
            self.logger.log("building upgrade_1 matrix")

            # apply localization
            if localizer is not None:
                self.logger.log("applying localization")
                upgrade_1.hadamard_product(localizer)
                self.logger.log("applying localization")

            # apply residual information
            self.logger.log("applying residuals")
            upgrade_1 *= obs_diff
            self.logger.log("applying residuals")

            self.logger.log("processing upgrade_1")
            upgrade_1 = upgrade_1.to_dataframe()
            upgrade_1.index.name = "parnme"
            upgrade_1 = upgrade_1.T
            upgrade_1.index = [int(i) for i in upgrade_1.index]
            upgrade_1.to_csv(self.pst.filename+".upgrade_1.{0:04d}.csv".\
                               format(self.iter_num))
            if upgrade_1.isnull().values.any():
                self.logger.lraise("NaNs in upgrade_1")
            self.logger.log("processing upgrade_1")

            #print(upgrade_1.isnull().values.any())
            #print(parensemble_cur_lam.index)
            #print(upgrade_1.index)
            parensemble_cur_lam += upgrade_1

            # parameter-based upgrade portion
            if not use_approx and self.iter_num > 1:
                self.logger.log("building upgrade_2 matrix")
                par_diff = (self.parensemble - self.parensemble_0.loc[self.parensemble.index,:]).\
                    as_pyemu_matrix().T
                x4 = self.Am.T * self.half_parcov_diag * par_diff
                x5 = self.Am * x4
                x6 = scaled_delta_par.T * x5
                x7 = v * scaled_ident * v.T * x6
                upgrade_2 = -1.0 * (self.half_parcov_diag * scaled_delta_par *
                                    x7).to_dataframe()
                upgrade_2.index.name = "parnme"
                upgrade_2 = upgrade_2.T
                upgrade_2.to_csv(self.pst.filename+".upgrade_2.{0:04d}.csv".\
                                   format(self.iter_num))
                upgrade_2.index = [int(i) for i in upgrade_2.index]

                if upgrade_2.isnull().values.any():
                    self.logger.lraise("NaNs in upgrade_2")

                parensemble_cur_lam += upgrade_2
                self.logger.log("building upgrade_2 matrix")
            parensemble_cur_lam.enforce(self.enforce_bounds)

            # this is for testing failed runs on upgrade testing
            # works with the 10par_xsec smoother test
            #parensemble_cur_lam.iloc[:,:] = -1000000.0

            paren_lam.append(pd.DataFrame(parensemble_cur_lam.loc[:, :]))
            self.logger.log("calcs for  lambda {0}".format(cur_lam_mult))

        # subset if needed
        # and combine lambda par ensembles into one par ensemble for evaluation
        if run_subset is not None and run_subset < self.parensemble.shape[0]:
            #subset_idx = ["{0:d}".format(i) for i in np.random.randint(0,self.parensemble.shape[0]-1,run_subset)]
            subset_idx = self.parensemble.iloc[:run_subset, :].index.values
            self.logger.statement("subset idxs: " +
                                  ','.join([str(s) for s in subset_idx]))
            paren_lam_subset = [pe.loc[subset_idx, :] for pe in paren_lam]
            paren_combine = pd.concat(paren_lam_subset, ignore_index=True)
            paren_lam_subset = None
        else:
            subset_idx = self.parensemble.index.values
            paren_combine = pd.concat(paren_lam, ignore_index=True)


        self.logger.log("evaluating ensembles for lambdas : {0}".\
                        format(','.join(["{0:8.3E}".format(l) for l in lam_vals])))
        failed_runs, obsen_combine = self._calc_obs(paren_combine)
        #if failed_runs is not None:
        #    obsen_combine.loc[failed_runs,:] = np.NaN
        self.logger.log("evaluating ensembles for lambdas : {0}".\
                        format(','.join(["{0:8.3E}".format(l) for l in lam_vals])))
        paren_combine = None

        if failed_runs is not None and len(
                failed_runs) == obsen_combine.shape[0]:
            self.logger.lraise("all runs failed - cannot continue")

        # unpack lambda obs ensembles from combined obs ensemble
        nrun_per_lam = self.obsensemble.shape[0]
        if run_subset is not None:
            nrun_per_lam = run_subset
        obsen_lam = []
        for i in range(len(lam_vals)):
            sidx = i * nrun_per_lam
            eidx = sidx + nrun_per_lam
            oe = ObservationEnsemble.from_dataframe(
                df=obsen_combine.iloc[sidx:eidx, :].copy(), pst=self.pst)
            oe.index = subset_idx
            # check for failed runs in this set - drop failed runs from obs ensembles
            if failed_runs is not None:
                failed_runs_this = np.array(
                    [f for f in failed_runs if f >= sidx and f < eidx]) - sidx
                if len(failed_runs_this) > 0:
                    if len(failed_runs_this) == oe.shape[0]:
                        self.logger.warn(
                            "all runs failed for lambda {0}".format(
                                lam_vals[i]))
                    else:
                        self.logger.warn("{0} run failed for lambda {1}".\
                                         format(len(failed_runs_this),lam_vals[i]))
                    oe.iloc[failed_runs_this, :] = np.NaN
                    oe = oe.dropna()
            obsen_lam.append(oe)
        obsen_combine = None

        # here is where we need to select out the "best" lambda par and obs
        # ensembles
        self.logger.statement("\n**************************")
        self.logger.statement(str(datetime.now()))
        self.logger.statement("total runs:{0}".format(self.total_runs))
        self.logger.statement("iteration: {0}".format(self.iter_num))
        self.logger.statement("current lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                              format(self.current_lambda,
                         self.last_best_mean,self.last_best_std))
        phi_vecs = [self._calc_phi_vec(obsen) for obsen in obsen_lam]
        mean_std = [(pv.mean(), pv.std()) for pv in phi_vecs]
        update_pars = False
        update_lambda = False
        # accept a new best if its within 10%
        best_mean = self.last_best_mean * 1.1
        best_std = self.last_best_std * 1.1
        best_i = 0
        for i, (m, s) in enumerate(mean_std):
            self.logger.statement(" tested lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                                 format(self.current_lambda * lambda_mults[i],m,s))
            if m < best_mean:
                update_pars = True
                best_mean = m
                best_i = i
                if s < best_std:
                    update_lambda = True
                    best_std = s
        if np.isnan(best_mean):
            self.logger.lraise("best mean = NaN")
        if np.isnan(best_std):
            self.logger.lraise("best std = NaN")

        if not update_pars:
            self.current_lambda *= max(lambda_mults) * 10.0
            self.current_lambda = min(self.current_lambda, 100000)
            self.logger.statement("not accepting iteration, increased lambda:{0}".\
                  format(self.current_lambda))
        else:
            self.parensemble = ParameterEnsemble.from_dataframe(
                df=paren_lam[best_i], pst=self.pst)
            if run_subset is not None:
                failed_runs, self.obsensemble = self._calc_obs(
                    self.parensemble)
                if failed_runs is not None:
                    self.logger.warn("dropping failed realizations")
                    self.parensemble = self.parensemble.drop(failed_runs)
                    self.obsensemble = self.obsensemble.drop(failed_runs)
                self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
                self._phi_report(self.current_phi_vec,
                                 self.current_lambda * lambda_mults[best_i])
                best_mean = self.current_phi_vec.mean()
                best_std = self.current_phi_vec.std()
            else:
                self.obsensemble = obsen_lam[best_i]
                # reindex parensemble in case failed runs
                self.parensemble = ParameterEnsemble.from_dataframe(
                    df=self.parensemble.loc[self.obsensemble.index],
                    pst=self.pst)
                self._phi_report(phi_vecs[best_i],
                                 self.current_lambda * lambda_mults[best_i])
                self.current_phi_vec = phi_vecs[best_i]

            self.logger.statement("   best lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda*lambda_mults[best_i],
                         best_mean,best_std))
            self.last_best_mean = best_mean
            self.last_best_std = best_std

        if update_lambda:
            # be aggressive
            self.current_lambda *= (lambda_mults[best_i] * 0.75)
            # but don't let lambda get too small
            self.current_lambda = max(self.current_lambda, 0.001)
            self.logger.statement("updating lambda: {0:15.6G}".\
                  format(self.current_lambda ))

        self.logger.statement("**************************\n")
        self.parensemble.to_csv(self.pst.filename+self.paren_prefix.\
                                    format(self.iter_num))
        self.obsensemble.to_csv(self.pst.filename+self.obsen_prefix.\
                                    format(self.iter_num))
        if self.raw_sweep_out is not None:
            self.raw_sweep_out.to_csv(self.pst.filename+"_raw{0}".\
                                        format(self.iter_num))
        self.logger.log("iteration {0}".format(self.iter_num))
コード例 #18
0
 def __init__(self, **kwargs):
     super(MonteCarlo, self).__init__(**kwargs)
     assert self.pst is not None, \
         "monte carlo requires a pest control file"
     self.parensemble = ParameterEnsemble(pst=self.pst)
     self.obsensemble = ObservationEnsemble(pst=self.pst)
コード例 #19
0
ファイル: smoother.py プロジェクト: wkitlasten/pyemu
    def initialize(
        self,
        num_reals=1,
        init_lambda=None,
        enforce_bounds="reset",
        parensemble=None,
        obsensemble=None,
        restart_obsensemble=None,
    ):
        """Initialize the iES process.  Depending on arguments, draws or loads
        initial parameter observations ensembles and runs the initial parameter
        ensemble

        Parameters
        ----------
            num_reals : int
                the number of realizations to draw.  Ignored if parensemble/obsensemble
                are not None
            init_lambda : float
                the initial lambda to use.  During subsequent updates, the lambda is
                updated according to upgrade success
            enforce_bounds : str
                how to enfore parameter bound transgression.  options are
                reset, drop, or None
            parensemble : pyemu.ParameterEnsemble or str
                a parameter ensemble or filename to use as the initial
                parameter ensemble.  If not None, then obsenemble must not be
                None
            obsensemble : pyemu.ObservationEnsemble or str
                an observation ensemble or filename to use as the initial
                observation ensemble.  If not None, then parensemble must
                not be None
            restart_obsensemble : pyemu.ObservationEnsemble or str
                an observation ensemble or filename to use as an
                evaluated observation ensemble.  If not None, this will skip the initial
                parameter ensemble evaluation - user beware!


        Example
        -------
        ``>>>import pyemu``

        ``>>>es = pyemu.EnsembleSmoother(pst="pest.pst")``

        ``>>>es.initialize(num_reals=100)``

        """
        '''
        (re)initialize the process
        '''
        # initialize the phi report csv
        self.enforce_bounds = enforce_bounds

        self.total_runs = 0
        # this matrix gets used a lot, so only calc once and store
        self.obscov_inv_sqrt = self.obscov.get(self.pst.nnz_obs_names).inv.sqrt

        if parensemble is not None and obsensemble is not None:
            self.logger.log("initializing with existing ensembles")
            if isinstance(parensemble, str):
                self.logger.log("loading parensemble from file")
                if not os.path.exists(obsensemble):
                    self.logger.lraise("can not find parensemble file: {0}".\
                                       format(parensemble))
                df = pd.read_csv(parensemble, index_col=0)
                #df.index = [str(i) for i in df.index]
                self.parensemble_0 = ParameterEnsemble.from_dataframe(
                    df=df, pst=self.pst)
                self.logger.log("loading parensemble from file")

            elif isinstance(parensemble, ParameterEnsemble):
                self.parensemble_0 = parensemble.copy()
            else:
                raise Exception("unrecognized arg type for parensemble, " +\
                                "should be filename or ParameterEnsemble" +\
                                ", not {0}".format(type(parensemble)))
            self.parensemble = self.parensemble_0.copy()
            if isinstance(obsensemble, str):
                self.logger.log("loading obsensemble from file")
                if not os.path.exists(obsensemble):
                    self.logger.lraise("can not find obsensemble file: {0}".\
                                       format(obsensemble))
                df = pd.read_csv(obsensemble,
                                 index_col=0).loc[:, self.pst.nnz_obs_names]
                #df.index = [str(i) for i in df.index]
                self.obsensemble_0 = ObservationEnsemble.from_dataframe(
                    df=df, pst=self.pst)
                self.logger.log("loading obsensemble from file")

            elif isinstance(obsensemble, ObservationEnsemble):
                self.obsensemble_0 = obsensemble.copy()
            else:
                raise Exception("unrecognized arg type for obsensemble, " +\
                                "should be filename or ObservationEnsemble" +\
                                ", not {0}".format(type(obsensemble)))

            assert self.parensemble_0.shape[0] == self.obsensemble_0.shape[0]
            #self.num_reals = self.parensemble_0.shape[0]
            num_reals = self.parensemble.shape[0]
            self.logger.log("initializing with existing ensembles")

        else:
            self.logger.log(
                "initializing smoother with {0} realizations".format(
                    num_reals))
            #self.num_reals = int(num_reals)
            #assert self.num_reals > 1
            self.logger.log("initializing parensemble")
            #self.parensemble_0 = ParameterEnsemble(self.pst)
            #self.parensemble_0.draw(cov=self.parcov,num_reals=num_reals)
            self.parensemble_0 = pyemu.ParameterEnsemble.from_gaussian_draw(
                ParameterEnsemble(self.pst), self.parcov, num_reals=num_reals)
            self.parensemble_0.enforce(enforce_bounds=enforce_bounds)
            self.logger.log("initializing parensemble")
            self.parensemble = self.parensemble_0.copy()
            self.parensemble_0.to_csv(self.pst.filename +\
                                      self.paren_prefix.format(0))
            self.logger.log("initializing parensemble")
            self.logger.log("initializing obsensemble")
            #self.obsensemble_0 = ObservationEnsemble(self.pst)
            #self.obsensemble_0.draw(cov=self.obscov,num_reals=num_reals)
            self.obsensemble_0 = pyemu.ObservationEnsemble.from_id_gaussian_draw(
                ObservationEnsemble(self.pst), num_reals=num_reals)
            #self.obsensemble = self.obsensemble_0.copy()

            # save the base obsensemble
            self.obsensemble_0.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(-1))
            self.logger.log("initializing obsensemble")
            self.logger.log(
                "initializing smoother with {0} realizations".format(
                    num_reals))

        self.obs0_matrix = self.obsensemble_0.nonzero.as_pyemu_matrix()
        self.enforce_bounds = enforce_bounds

        self.phi_csv = open(self.pst.filename + ".iobj.csv", 'w')
        self.phi_csv.write(
            "iter_num,total_runs,lambda,min,max,mean,median,std,")
        self.phi_csv.write(','.join(["{0:010d}". \
                                    format(i + 1) for i in range(num_reals)]))
        self.phi_csv.write('\n')
        self.phi_act_csv = open(self.pst.filename + ".iobj.actual.csv", 'w')
        self.phi_act_csv.write(
            "iter_num,total_runs,lambda,min,max,mean,median,std,")
        self.phi_act_csv.write(','.join(["{0:010d}". \
                                    format(i + 1) for i in range(num_reals)]))
        self.phi_act_csv.write('\n')

        if restart_obsensemble is not None:
            self.logger.log(
                "loading restart_obsensemble {0}".format(restart_obsensemble))
            failed_runs, self.obsensemble = self._load_obs_ensemble(
                restart_obsensemble)
            assert self.obsensemble.shape[0] == self.obsensemble_0.shape[0]
            assert list(self.obsensemble.columns) == list(
                self.obsensemble_0.columns)
            self.logger.log(
                "loading restart_obsensemble {0}".format(restart_obsensemble))

        else:
            # run the initial parameter ensemble
            self.logger.log("evaluating initial ensembles")
            failed_runs, self.obsensemble = self._calc_obs(self.parensemble)
            self.obsensemble.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(0))
            self.logger.log("evaluating initial ensembles")

        if failed_runs is not None:
            self.logger.warn("dropping failed realizations")
            #failed_runs_str = [str(f) for f in failed_runs]
            #self.parensemble = self.parensemble.drop(failed_runs)
            #self.obsensemble = self.obsensemble.drop(failed_runs)
            self.parensemble.loc[failed_runs, :] = np.NaN
            self.parensemble = self.parensemble.dropna()
            self.obsensemble.loc[failed_runs, :] = np.NaN
            self.obsensemble = self.obsensemble.dropna()

        self.current_phi_vec = self._calc_phi_vec(self.obsensemble)

        if self.drop_bad_reals is not None:
            drop_idx = np.argwhere(
                self.current_phi_vec > self.drop_bad_reals).flatten()
            run_ids = self.obsensemble.index.values
            drop_idx = run_ids[drop_idx]
            if len(drop_idx) == self.obsensemble.shape[0]:
                raise Exception("dropped all realizations as 'bad'")
            if len(drop_idx) > 0:
                self.logger.warn("{0} realizations dropped as 'bad' (indices :{1})".\
                                 format(len(drop_idx),','.join([str(d) for d in drop_idx])))
                self.parensemble.loc[drop_idx, :] = np.NaN
                self.parensemble = self.parensemble.dropna()
                self.obsensemble.loc[drop_idx, :] = np.NaN
                self.obsensemble = self.obsensemble.dropna()

                self.current_phi_vec = self._calc_phi_vec(self.obsensemble)

        self._phi_report(self.phi_csv, self.current_phi_vec, 0.0)
        self._phi_report(self.phi_act_csv, self.obsensemble.phi_vector.values,
                         0.0)

        self.last_best_mean = self.current_phi_vec.mean()
        self.last_best_std = self.current_phi_vec.std()
        self.logger.statement("initial phi (mean, std): {0:15.6G},{1:15.6G}".\
                              format(self.last_best_mean,self.last_best_std))
        if init_lambda is not None:
            self.current_lambda = float(init_lambda)
        else:
            #following chen and oliver
            x = self.last_best_mean / (2.0 * float(self.obsensemble.shape[1]))
            self.current_lambda = 10.0**(np.floor(np.log10(x)))

        # if using the approximate form of the algorithm, let
        # the parameter scaling matrix be the identity matrix
        # jwhite - dec 5 2016 - using the actual parcov inv
        # for upgrades seems to be pushing parameters around
        # too much.  for now, just not using it, maybe
        # better choices of lambda will tame it
        self.logger.statement("current lambda:{0:15.6g}".format(
            self.current_lambda))

        if self.use_approx_prior:
            self.logger.statement("using approximate parcov in solution")
            self.half_parcov_diag = 1.0
        else:
            #self.logger.statement("using full parcov in solution")
            # if self.parcov.isdiagonal:
            #     self.half_parcov_diag = self.parcov.sqrt.inv
            # else:
            #     self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
            #                                 names=self.parcov.col_names,
            #                                 isdiagonal=True).inv.sqrt
            self.half_parcov_diag = 1.0
        self.delta_par_prior = self._calc_delta_par(self.parensemble_0)
        u, s, v = self.delta_par_prior.pseudo_inv_components()
        self.Am = u * s.inv

        self.__initialized = True
コード例 #20
0
class EnsembleSmoother():

    def __init__(self,pst,parcov=None,obscov=None,num_slaves=0,use_approx=True,
                 restart_iter=0,submit_file=None):
        self.num_slaves = int(num_slaves)
        self.submit_file = submit_file
        self.use_approx = bool(use_approx)
        self.paren_prefix = ".parensemble.{0:04d}.csv"
        self.obsen_prefix = ".obsensemble.{0:04d}.csv"

        if isinstance(pst,str):
            pst = Pst(pst)
        assert isinstance(pst,Pst)
        self.pst = pst
        self.sweep_in_csv = pst.pestpp_options.get("sweep_parameter_csv_file","sweep_in.csv")
        self.sweep_out_csv = pst.pestpp_options.get("sweep_output_csv_file","sweep_out.csv")
        if parcov is not None:
            assert isinstance(parcov,Cov)
        else:
            parcov = Cov.from_parameter_data(self.pst)
        if obscov is not None:
            assert isinstance(obscov,Cov)
        else:
            obscov = Cov.from_observation_data(pst)

        self.parcov = parcov
        self.obscov = obscov
        self.restart = False

        if restart_iter > 0:
            self.restart_iter = restart_iter
            paren = self.pst.filename+self.paren_prefix.format(restart_iter)
            assert os.path.exists(paren),\
                "could not find restart par ensemble {0}".format(paren)
            obsen0 = self.pst.filename+self.obsen_prefix.format(0)
            assert os.path.exists(obsen0),\
                "could not find restart obs ensemble 0 {0}".format(obsen0)
            obsen = self.pst.filename+self.obsen_prefix.format(restart_iter)
            assert os.path.exists(obsen),\
                "could not find restart obs ensemble {0}".format(obsen)
            self.restart = True


        self.__initialized = False
        self.num_reals = 0
        self.half_parcov_diag = None
        self.half_obscov_diag = None
        self.delta_par_prior = None
        self.iter_num = 0

    def initialize(self,num_reals,init_lambda=None):
        '''
        (re)initialize the process
        '''
        assert num_reals > 1
        # initialize the phi report csv
        self.phi_csv = open(self.pst.filename+".iobj.csv",'w')
        self.phi_csv.write("iter_num,total_runs,lambda,min,max,mean,median,std,")
        self.phi_csv.write(','.join(["{0:010d}".\
                                    format(i+1) for i in range(num_reals)]))
        self.phi_csv.write('\n')
        self.total_runs = 0
        # this matrix gets used a lot, so only calc once and store
        self.obscov_inv_sqrt = self.obscov.get(self.pst.nnz_obs_names).inv.sqrt
        if self.restart:
            print("restarting...ignoring num_reals")
            raise NotImplementedError()
            df = pd.read_csv(self.pst.filename+self.paren_prefix.format(self.restart_iter))
            self.parensemble_0 = ParameterEnsemble.from_dataframe(df=df,pst=self.pst)
            self.parensemble = self.parensemble_0.copy()
            df = pd.read_csv(self.pst.filename+self.obsen_prefix.format(0))
            self.obsensemble_0 = ObservationEnsemble.from_dataframe(df=df.loc[:,self.pst.nnz_obs_names],
                                                                    pst=self.pst)
            # this matrix gets used a lot, so only calc once
            self.obs0_matrix = self.obsensemble_0.as_pyemu_matrix()
            df = pd.read_csv(self.pst.filename+self.obsen_prefix.format(self.restart_iter))
            self.obsensemble = ObservationEnsemble.from_dataframe(df=df.loc[:,self.pst.nnz_obs_names],
                                                                  pst=self.pst)
            assert self.parensemble.shape[0] == self.obsensemble.shape[0]
            self.num_reals = self.parensemble.shape[0]

        else:
            self.num_reals = int(num_reals)
            self.parensemble_0 = ParameterEnsemble(self.pst)
            self.parensemble_0.draw(cov=self.parcov,num_reals=num_reals)
            self.parensemble_0.enforce()
            self.parensemble = self.parensemble_0.copy()
            self.parensemble_0.to_csv(self.pst.filename +\
                                      self.paren_prefix.format(0))
            self.obsensemble_0 = ObservationEnsemble(self.pst)
            self.obsensemble_0.draw(cov=self.obscov,num_reals=num_reals)
            #self.obsensemble = self.obsensemble_0.copy()

            # save the base obsensemble
            self.obsensemble_0.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(-1))
            self.obs0_matrix = self.obsensemble_0.nonzero.as_pyemu_matrix()

            # run the initial parameter ensemble
            self.obsensemble = self._calc_obs(self.parensemble)
            self.obsensemble.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(0))
        self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
        self._phi_report(self.current_phi_vec,0.0)
        self.last_best_mean = self.current_phi_vec.mean()
        self.last_best_std = self.current_phi_vec.std()
        if init_lambda is not None:
            self.current_lambda = float(init_lambda)
        else:
            #following chen and oliver
            x = self.last_best_mean / (2.0 * float(self.obsensemble.shape[1]))
            self.current_lambda = 10.0**(np.floor(np.log10(x)))

        # if using the approximate form of the algorithm, let
        # the parameter scaling matrix be the identity matrix
        # jwhite - dec 5 2016 - using the actual parcov inv
        # for upgrades seems to be pushing parameters around
        # too much.  for now, just not using it, maybe
        # better choices of lambda will tame it
        if self.use_approx:
            self.half_parcov_diag = 1.0
        else:
            # if self.parcov.isdiagonal:
            #     self.half_parcov_diag = self.parcov.sqrt.inv
            # else:
            #     self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
            #                                 names=self.parcov.col_names,
            #                                 isdiagonal=True).inv.sqrt
            self.half_parcov_diag = 1.0
            self.delta_par_prior = self._calc_delta_par(self.parensemble_0)
            u,s,v = self.delta_par_prior.pseudo_inv_components()
            self.Am = u * s.inv
        self.__initialized = True

    def get_localizer(self):
        onames = self.pst.nnz_obs_names
        pnames = self.pst.adj_par_names
        localizer = Matrix(x=np.ones((len(onames),len(pnames))),row_names=onames,col_names=pnames)
        return localizer

    def _calc_delta_par(self,parensemble):
        '''
        calc the scaled parameter ensemble differences from the mean
        '''
        return self._calc_delta(parensemble, self.half_parcov_diag)

    def _calc_delta_obs(self,obsensemble):
        '''
        calc the scaled observation ensemble differences from the mean
        '''
        return self._calc_delta(obsensemble.nonzero, self.obscov.inv.sqrt)

    def _calc_delta(self,ensemble,scaling_matrix):
        '''
        calc the scaled  ensemble differences from the mean
        '''
        mean = np.array(ensemble.mean(axis=0))
        delta = ensemble.as_pyemu_matrix()
        for i in range(self.num_reals):
            delta.x[i,:] -= mean
        delta = scaling_matrix * delta.T
        delta *= (1.0 / np.sqrt(float(self.num_reals - 1.0)))
        return delta

    def _calc_obs(self,parensemble):
        if self.submit_file is None:
            self._calc_obs_local(parensemble)
        else:
            self._calc_obs_condor(parensemble)

    def _calc_obs_condor(self,parensemble):
        parensemble.to_csv(self.sweep_in_csv)
        os.system("condor_rm -all")
        port = 4004
        def master():
            os.system("sweep {0} /h :{1} >nul".format(self.pst.filename,port))
        master_thread = threading.Thread(target=master)
        master_thread.start()
        time.sleep(1.5) #just some time for the master to get up and running to take slaves
        pyemu.utils.start_slaves("template","sweep",self.pst.filename,
                                 self.num_slaves,slave_root='.',port=port)
        os.system("condor_submit {0}".format(self.submit_file))
        master_thread.join()

    def _calc_obs_local(self,parensemble):
        '''
        propagate the ensemble forward using sweep.
        '''
        parensemble.to_csv(self.sweep_in_csv)
        if self.num_slaves > 0:
            port = 4004
            def master():
                os.system("sweep {0} /h :{1} >nul".format(self.pst.filename,port))
            master_thread = threading.Thread(target=master)
            master_thread.start()
            time.sleep(1.5) #just some time for the master to get up and running to take slaves
            pyemu.utils.start_slaves("template","sweep",self.pst.filename,
                                     self.num_slaves,slave_root='.',port=port)
            master_thread.join()
        else:
            os.system("sweep {0}".format(self.pst.filename))

        obs = pd.read_csv(self.sweep_out_csv)
        obs.columns = [item.lower() for item in obs.columns]
        self.total_runs += obs.shape[0]
        return ObservationEnsemble.from_dataframe(df=obs.loc[:,self.obscov.row_names],
                                                  pst=self.pst)

    def _calc_phi_vec(self,obsensemble):
        obs_diff = self._get_residual_matrix(obsensemble)
        phi_vec = np.diagonal((obs_diff * self.obscov_inv_sqrt.get(row_names=obs_diff.col_names,
                                                                   col_names=obs_diff.col_names) * obs_diff.T).x)
        return phi_vec

    def _phi_report(self,phi_vec,cur_lam):
        assert phi_vec.shape[0] == self.num_reals
        self.phi_csv.write("{0},{1},{2},{3},{4},{5},{6}".format(self.iter_num,
                                                             self.total_runs,
                                                             cur_lam,
                                                             phi_vec.min(),
                                                             phi_vec.max(),
                                                             phi_vec.mean(),
                                                             np.median(phi_vec),
                                                             phi_vec.std()))
        self.phi_csv.write(",".join(["{0:20.8}".format(phi) for phi in phi_vec]))
        self.phi_csv.write("\n")
        self.phi_csv.flush()

    def _get_residual_matrix(self, obsensemble):
        obs_matrix = obsensemble.nonzero.as_pyemu_matrix()
        return  obs_matrix - self.obs0_matrix.get(col_names=obs_matrix.col_names,row_names=obs_matrix.row_names)

    def update(self,lambda_mults=[1.0],localizer=None,run_subset=None):

        self.iter_num += 1
        if not self.__initialized:
            raise Exception("must call initialize() before update()")

        scaled_delta_obs = self._calc_delta_obs(self.obsensemble)
        scaled_delta_par = self._calc_delta_par(self.parensemble)

        u,s,v = scaled_delta_obs.pseudo_inv_components()

        obs_diff = self._get_residual_matrix(self.obsensemble)

        if run_subset is not None:
            subset_idx = ["{0:d}".format(i) for i in np.random.randint(0,self.num_reals-1,run_subset)]
            print("subset idxs: " + ','.join(subset_idx))

        mean_lam,std_lam,paren_lam,obsen_lam = [],[],[],[]
        for ilam,cur_lam_mult in enumerate(lambda_mults):

            parensemble_cur_lam = self.parensemble.copy()

            cur_lam = self.current_lambda * cur_lam_mult

            scaled_ident = Cov.identity_like(s) * (cur_lam+1.0)
            scaled_ident += s**2
            scaled_ident = scaled_ident.inv

            # build up this matrix as a single element so we can apply
            # localization
            upgrade_1 = -1.0 * (self.half_parcov_diag * scaled_delta_par) *\
                        v * s * scaled_ident * u.T

            # apply localization
            #print(cur_lam,upgrade_1)
            if localizer is not None:
                upgrade_1.hadamard_product(localizer)

            # apply residual information
            upgrade_1 *= (self.obscov_inv_sqrt * obs_diff.T)

            upgrade_1 = upgrade_1.to_dataframe()
            upgrade_1.index.name = "parnme"
            upgrade_1 = upgrade_1.T
            upgrade_1.to_csv(self.pst.filename+".upgrade_1.{0:04d}.csv".\
                               format(self.iter_num))
            parensemble_cur_lam += upgrade_1

            # parameter-based upgrade portion
            if not self.use_approx and self.iter_num > 1:
                par_diff = (self.parensemble - self.parensemble_0).\
                    as_pyemu_matrix().T
                x4 = self.Am.T * self.half_parcov_diag * par_diff
                x5 = self.Am * x4
                x6 = scaled_delta_par.T * x5
                x7 = v * scaled_ident * v.T * x6
                upgrade_2 = -1.0 * (self.half_parcov_diag *
                                   scaled_delta_par * x7).to_dataframe()

                upgrade_2.index.name = "parnme"
                upgrade_2.T.to_csv(self.pst.filename+".upgrade_2.{0:04d}.csv".\
                                   format(self.iter_num))
                parensemble_cur_lam += upgrade_2.T
            parensemble_cur_lam.enforce()
            paren_lam.append(parensemble_cur_lam)
            if run_subset is not None:
                #phi_series = pd.Series(data=self.current_phi_vec)
                #phi_series.sort_values(inplace=True,ascending=False)
                #subset_idx = ["{0:d}".format(i) for i in phi_series.index.values[:run_subset]]

                parensemble_subset = parensemble_cur_lam.loc[subset_idx,:]
                obsensemble_cur_lam = self._calc_obs(parensemble_subset)
            else:
                obsensemble_cur_lam = self._calc_obs(parensemble_cur_lam)
            #print(obsensemble_cur_lam.head())
            obsen_lam.append(obsensemble_cur_lam)


        # here is where we need to select out the "best" lambda par and obs
        # ensembles
        print("\n**************************")
        print(str(datetime.now()))
        print("total runs:{0}".format(self.total_runs))
        print("iteration: {0}".format(self.iter_num))
        print("current lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda,
                         self.last_best_mean,self.last_best_std))
        phi_vecs = [self._calc_phi_vec(obsen) for obsen in obsen_lam]
        mean_std = [(pv.mean(),pv.std()) for pv in phi_vecs]
        update_pars = False
        update_lambda = False
        # accept a new best if its within 10%
        best_mean = self.last_best_mean * 1.1
        best_std = self.last_best_std * 1.1
        best_i = 0
        for i,(m,s) in enumerate(mean_std):
            print(" tested lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda * lambda_mults[i],m,s))
            if m < best_mean:
                update_pars = True
                best_mean = m
                best_i = i
                if s < best_std:
                    update_lambda = True
                    best_std = s

        if not update_pars:
            self.current_lambda *= max(lambda_mults) * 3.0
            self.current_lambda = min(self.current_lambda,100000)
            print("not accepting iteration, increased lambda:{0}".\
                  format(self.current_lambda))

        else:

            self.parensemble = paren_lam[best_i]
            if run_subset is not None:
                self.obsensemble = self._calc_obs(self.parensemble)
                self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
                self._phi_report(self.current_phi_vec,self.current_lambda * lambda_mults[best_i])
                best_mean = self.current_phi_vec.mean()
                best_std = self.current_phi_vec.std()
            else:
                self.obsensemble = obsen_lam[best_i]
                self._phi_report(phi_vecs[best_i],self.current_lambda * lambda_mults[best_i])
                self.current_phi_vec = phi_vecs[best_i]

            print("\n" + "   best lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda*lambda_mults[best_i],
                         best_mean,best_std))
            self.last_best_mean = best_mean
            self.last_best_std = best_std

        if update_lambda:
            # be aggressive - cut best lambda in half
            self.current_lambda *= (lambda_mults[best_i] * 0.75)
            # but don't let lambda get too small
            self.current_lambda = max(self.current_lambda,0.001)
            print("updating lambda: {0:15.6G}".\
                  format(self.current_lambda ))


        print("**************************\n")

        self.parensemble.to_csv(self.pst.filename+self.paren_prefix.\
                                    format(self.iter_num))

        self.obsensemble.to_csv(self.pst.filename+self.obsen_prefix.\
                                    format(self.iter_num))
コード例 #21
0
ファイル: mc.py プロジェクト: whejs/pyemu
    def draw(
        self,
        num_reals=1,
        par_file=None,
        obs=False,
        enforce_bounds=None,
        cov=None,
        how="gaussian",
    ):
        """draw stochastic realizations of parameters and
           optionally observations, filling MonteCarlo.parensemble and
           optionally MonteCarlo.obsensemble.

        Parameters
        ----------
        num_reals : int
            number of realization to generate
        par_file : str
            parameter file to use as mean values. If None,
            use MonteCarlo.pst.parameter_data.parval1.
            Default is None
        obs : bool
            add a realization of measurement noise to observation values,
            forming MonteCarlo.obsensemble.Default is False
        enforce_bounds : str
            enforce parameter bounds based on control file information.
            options are 'reset', 'drop' or None.  Default is None
        how : str
            type of distribution to draw from. Must be in ["gaussian","uniform"]
            default is "gaussian".

        Example
        -------
        ``>>>import pyemu``

        ``>>>mc = pyemu.MonteCarlo(pst="pest.pst")``

        ``>>>mc.draw(1000)``

        """
        if par_file is not None:
            self.pst.parrep(par_file)
        how = how.lower().strip()
        assert how in ["gaussian", "uniform"]

        if cov is not None:
            assert isinstance(cov, Cov)
            if how == "uniform":
                raise Exception("MonteCarlo.draw() error: 'how'='uniform'," +
                                " 'cov' arg cannot be passed")
        else:
            cov = self.parcov

        self.log("generating {0:d} parameter realizations".format(num_reals))

        if how == "gaussian":
            self.parensemble = ParameterEnsemble.from_gaussian_draw(
                pst=self.pst,
                cov=cov,
                num_reals=num_reals,
                use_homegrown=True,
                enforce_bounds=False,
            )

        elif how == "uniform":
            self.parensemble = ParameterEnsemble.from_uniform_draw(
                pst=self.pst, num_reals=num_reals)

        else:
            raise Exception(
                "MonteCarlo.draw(): unrecognized 'how' arg: {0}".format(how))

        # self.parensemble = ParameterEnsemble(pst=self.pst)
        # self.obsensemble = ObservationEnsemble(pst=self.pst)
        # self.parensemble.draw(cov,num_reals=num_reals, how=how,
        #                      enforce_bounds=enforce_bounds)
        if enforce_bounds is not None:
            self.parensemble.enforce(enforce_bounds)
        self.log("generating {0:d} parameter realizations".format(num_reals))

        if obs:
            self.log(
                "generating {0:d} observation realizations".format(num_reals))
            self.obsensemble = ObservationEnsemble.from_id_gaussian_draw(
                pst=self.pst, num_reals=num_reals)
            self.log(
                "generating {0:d} observation realizations".format(num_reals))
コード例 #22
0
ファイル: smoother.py プロジェクト: wk1984/pyemu
class EnsembleSmoother():

    def __init__(self,pst,parcov=None,obscov=None):
        if isinstance(pst,str):
            pst = Pst(pst)
        assert isinstance(pst,Pst)
        self.pst = pst
        if parcov is not None:
            assert isinstance(parcov,Cov)
        else:
            parcov = Cov.from_parameter_data(self.pst)
        if obscov is not None:
            assert isinstance(obscov,Cov)
        else:
            obscov = Cov.from_observation_data(pst)

        self.parcov = parcov
        self.obscov = obscov

        self.__initialized = False
        self.num_reals = 0
        self.half_parcov_diag = None
        self.half_obscov_diag = None
        self.delta_par_prior = None
        self.iter_num = 0

    def initialize(self,num_reals):
        '''
        (re)initialize the process
        '''
        self.num_reals = int(num_reals)
        self.parensemble_0 = ParameterEnsemble(self.pst)
        self.parensemble_0.draw(cov=self.parcov,num_reals=num_reals)
        self.parensemble = self.parensemble_0.copy()
        self.parensemble_0.to_csv("parensemble.0.csv")


        self.obsensemble_0 = ObservationEnsemble(self.pst)
        self.obsensemble_0.draw(cov=self.obscov,num_reals=num_reals)
        self.obsensemble = self.obsensemble_0.copy()
        self.obsensemble_0.to_csv("obsensemble.0.csv")

        if self.parcov.isdiagonal:
            self.half_parcov_diag = self.parcov.inv.sqrt
        else:
            self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
                                        names=self.parcov.col_names,
                                        isdiagonal=True).inv.sqrt
        #if self.obscov.isdiagonal:
        #self.half_obscov_inv = self.obscov.inv.sqrt
       # else:
        #    self.half_obscov_diag = Cov(x=np.diag(self.obscov.x),
        #                                names=self.obscov.col_names,
        #                                isdiagonal=True)

        self.delta_par_prior = self._calc_delta_par()

        u,s,v = self.delta_par_prior.pseudo_inv_components()
        self.Am = u * s.inv


        self.__initialized = True

    def _calc_delta_par(self):
        '''
        calc the scaled parameter ensemble differences from the mean
        '''
        mean = np.array(self.parensemble.mean(axis=0))
        delta = self.parensemble.as_pyemu_matrix()
        for i in range(self.num_reals):
            delta.x[i,:] -= mean
        #delta = Matrix(x=(self.half_parcov_diag * delta.transpose()).x,
        #               row_names=self.parensemble.columns)
        delta = self.half_parcov_diag * delta.T
        return delta * (1.0 / np.sqrt(float(self.num_reals - 1.0)))

    def _calc_delta_obs(self):
        '''
        calc the scaled observation ensemble differences from the mean
        '''

        mean = np.array(self.obsensemble.mean(axis=0))
        delta = self.obsensemble.as_pyemu_matrix()
        for i in range(self.num_reals):
            delta.x[i,:] -= mean
        delta = self.obscov.inv.sqrt * delta.T
        return delta * (1.0 / np.sqrt(float(self.num_reals - 1.0)))


    def _calc_obs(self):
        '''
        propagate the ensemble forward...
        '''
        self.parensemble.to_csv(os.path.join("sweep_in.csv"))
        #os.chdir("smoother")
        print(os.listdir('.'))
        os.system("sweep {0}".format(self.pst.filename))
        #os.chdir('..')
        obs = ObservationEnsemble.from_csv(os.path.join('sweep_out.csv'))
        obs.columns = [item.lower() for item in obs.columns]
        self.obsensemble = ObservationEnsemble.from_dataframe(df=obs.loc[:,self.obscov.row_names],pst=self.pst)
        #todo: modifiy sweep to be interactive...
        return

    @property
    def current_lambda(self):
        return 1.0

    def update(self):
        if not self.__initialized:
            raise Exception("must call initialize() before update()")

        self._calc_obs()
        self.iter_num += 1
        self.obsensemble.to_csv("obsensemble.{0}.csv".format(self.iter_num))
        delta_obs = self._calc_delta_obs()

        u,s,v = delta_obs.pseudo_inv_components()
        scaled_par_diff = self._calc_delta_par()
        scaled_obs_diff = self.obsensemble.as_pyemu_matrix() -\
               self.obsensemble_0.as_pyemu_matrix()
        scaled_ident = (self.current_lambda*Cov.identity_like(s) + s**2).inv

        x1 = u.T * self.obscov.inv.sqrt * scaled_obs_diff.T
        x1.autoalign = False
        x2 = scaled_ident * x1
        x3 = v * s * x2
        upgrade_1 = -1.0 *  (self.half_parcov_diag * scaled_par_diff *\
                             x3).to_dataframe()
        upgrade_1.index.name = "parnme"
        upgrade_1.T.to_csv("upgrade_1.{0}.csv".format(self.iter_num))
        self.parensemble += upgrade_1.T
        if self.iter_num > 1:
            par_diff = (self.parensemble - self.parensemble_0).\
                as_pyemu_matrix().T
            x4 = self.Am.T * self.half_parcov_diag * par_diff
            x5 = self.Am * x4
            x6 = scaled_par_diff.T * x5
            x7 = v * scaled_ident * v.T * x6
            upgrade_2 = -1.0 * (self.half_parcov_diag *
                                scaled_par_diff * x7).to_dataframe()
            upgrade_2.index.name = "parnme"
            upgrade_2.T.to_csv("upgrade_2.{0}.csv".format(self.iter_num))
            self.parensemble += upgrade_2.T
        self.parensemble.to_csv("parensemble.{0}.csv".format(self.iter_num))
コード例 #23
0
ファイル: mc.py プロジェクト: mnfienen/pyemu
 def __init__(self,**kwargs):
     super(MonteCarlo,self).__init__(**kwargs)
     assert self.pst is not None, \
         "monte carlo requires a pest control file"
     self.parensemble = ParameterEnsemble(pst=self.pst)
     self.obsensemble = ObservationEnsemble(pst=self.pst)
コード例 #24
0
ファイル: smoother.py プロジェクト: xuexianwu/pyemu
    def initialize(self,
                   num_reals=1,
                   init_lambda=None,
                   enforce_bounds="reset",
                   parensemble=None,
                   obsensemble=None,
                   restart_obsensemble=None):
        '''
        (re)initialize the process
        '''
        # initialize the phi report csv
        self.enforce_bounds = enforce_bounds
        self.phi_csv = open(self.pst.filename + ".iobj.csv", 'w')
        self.phi_csv.write(
            "iter_num,total_runs,lambda,min,max,mean,median,std,")
        self.phi_csv.write(','.join(["{0:010d}".\
                                    format(i+1) for i in range(num_reals)]))
        self.phi_csv.write('\n')
        self.total_runs = 0
        # this matrix gets used a lot, so only calc once and store
        self.obscov_inv_sqrt = self.obscov.get(self.pst.nnz_obs_names).inv.sqrt

        if parensemble is not None and obsensemble is not None:
            self.logger.log("initializing with existing ensembles")
            if isinstance(parensemble, str):
                self.logger.log("loading parensemble from file")
                if not os.path.exists(obsensemble):
                    self.logger.lraise("can not find parensemble file: {0}".\
                                       format(parensemble))
                df = pd.read_csv(parensemble, index_col=0)
                #df.index = [str(i) for i in df.index]
                self.parensemble_0 = ParameterEnsemble.from_dataframe(
                    df=df, pst=self.pst)
                self.logger.log("loading parensemble from file")

            elif isinstance(parensemble, ParameterEnsemble):
                self.parensemble_0 = parensemble.copy()
            else:
                raise Exception("unrecognized arg type for parensemble, " +\
                                "should be filename or ParameterEnsemble" +\
                                ", not {0}".format(type(parensemble)))
            self.parensemble = self.parensemble_0.copy()
            if isinstance(obsensemble, str):
                self.logger.log("loading obsensemble from file")
                if not os.path.exists(obsensemble):
                    self.logger.lraise("can not find obsensemble file: {0}".\
                                       format(obsensemble))
                df = pd.read_csv(obsensemble,
                                 index_col=0).loc[:, self.pst.nnz_obs_names]
                #df.index = [str(i) for i in df.index]
                self.obsensemble_0 = ObservationEnsemble.from_dataframe(
                    df=df, pst=self.pst)
                self.logger.log("loading obsensemble from file")

            elif isinstance(obsensemble, ObservationEnsemble):
                self.obsensemble_0 = obsensemble.copy()
            else:
                raise Exception("unrecognized arg type for obsensemble, " +\
                                "should be filename or ObservationEnsemble" +\
                                ", not {0}".format(type(obsensemble)))

            assert self.parensemble_0.shape[0] == self.obsensemble_0.shape[0]
            #self.num_reals = self.parensemble_0.shape[0]
            self.logger.log("initializing with existing ensembles")

        else:
            self.logger.log(
                "initializing smoother with {0} realizations".format(
                    num_reals))
            #self.num_reals = int(num_reals)
            #assert self.num_reals > 1
            self.logger.log("initializing parensemble")
            self.parensemble_0 = ParameterEnsemble(self.pst)
            self.parensemble_0.draw(cov=self.parcov, num_reals=num_reals)
            self.parensemble_0.enforce(enforce_bounds=enforce_bounds)
            self.logger.log("initializing parensemble")
            self.parensemble = self.parensemble_0.copy()
            self.parensemble_0.to_csv(self.pst.filename +\
                                      self.paren_prefix.format(0))
            self.logger.log("initializing parensemble")
            self.logger.log("initializing obsensemble")
            self.obsensemble_0 = ObservationEnsemble(self.pst)
            self.obsensemble_0.draw(cov=self.obscov, num_reals=num_reals)
            #self.obsensemble = self.obsensemble_0.copy()

            # save the base obsensemble
            self.obsensemble_0.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(-1))
            self.logger.log("initializing obsensemble")
            self.logger.log(
                "initializing smoother with {0} realizations".format(
                    num_reals))

        self.obs0_matrix = self.obsensemble_0.nonzero.as_pyemu_matrix()
        self.enforce_bounds = enforce_bounds

        if restart_obsensemble is not None:
            self.logger.log(
                "loading restart_obsensemble {0}".format(restart_obsensemble))
            failed_runs, self.obsensemble = self._load_obs_ensemble(
                restart_obsensemble)
            assert self.obsensemble.shape[0] == self.obsensemble_0.shape[0]
            assert list(self.obsensemble.columns) == list(
                self.obsensemble_0.columns)
            self.logger.log(
                "loading restart_obsensemble {0}".format(restart_obsensemble))

        else:
            # run the initial parameter ensemble
            self.logger.log("evaluating initial ensembles")
            failed_runs, self.obsensemble = self._calc_obs(self.parensemble)
            self.obsensemble.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(0))
            self.logger.log("evaluating initial ensembles")

        if failed_runs is not None:
            self.logger.warn("dropping failed realizations")
            #failed_runs_str = [str(f) for f in failed_runs]
            self.parensemble = self.parensemble.drop(failed_runs)
            self.obsensemble = self.obsensemble.drop(failed_runs)
        self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
        self._phi_report(self.current_phi_vec, 0.0)

        self.last_best_mean = self.current_phi_vec.mean()
        self.last_best_std = self.current_phi_vec.std()
        self.logger.statement("initial phi (mean, std): {0:15.6G},{1:15.6G}".\
                              format(self.last_best_mean,self.last_best_std))
        if init_lambda is not None:
            self.current_lambda = float(init_lambda)
        else:
            #following chen and oliver
            x = self.last_best_mean / (2.0 * float(self.obsensemble.shape[1]))
            self.current_lambda = 10.0**(np.floor(np.log10(x)))

        # if using the approximate form of the algorithm, let
        # the parameter scaling matrix be the identity matrix
        # jwhite - dec 5 2016 - using the actual parcov inv
        # for upgrades seems to be pushing parameters around
        # too much.  for now, just not using it, maybe
        # better choices of lambda will tame it
        self.logger.statement("current lambda:{0:15.6g}".format(
            self.current_lambda))

        if self.use_approx_prior:
            self.logger.statement("using approximate parcov in solution")
            self.half_parcov_diag = 1.0
        else:
            #self.logger.statement("using full parcov in solution")
            # if self.parcov.isdiagonal:
            #     self.half_parcov_diag = self.parcov.sqrt.inv
            # else:
            #     self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
            #                                 names=self.parcov.col_names,
            #                                 isdiagonal=True).inv.sqrt
            self.half_parcov_diag = 1.0
        self.delta_par_prior = self._calc_delta_par(self.parensemble_0)
        u, s, v = self.delta_par_prior.pseudo_inv_components()
        self.Am = u * s.inv

        self.__initialized = True
コード例 #25
0
    def initialize(self,num_reals,init_lambda=None):
        '''
        (re)initialize the process
        '''
        assert num_reals > 1
        # initialize the phi report csv
        self.phi_csv = open(self.pst.filename+".iobj.csv",'w')
        self.phi_csv.write("iter_num,total_runs,lambda,min,max,mean,median,std,")
        self.phi_csv.write(','.join(["{0:010d}".\
                                    format(i+1) for i in range(num_reals)]))
        self.phi_csv.write('\n')
        self.total_runs = 0
        # this matrix gets used a lot, so only calc once and store
        self.obscov_inv_sqrt = self.obscov.get(self.pst.nnz_obs_names).inv.sqrt
        if self.restart:
            print("restarting...ignoring num_reals")
            raise NotImplementedError()
            df = pd.read_csv(self.pst.filename+self.paren_prefix.format(self.restart_iter))
            self.parensemble_0 = ParameterEnsemble.from_dataframe(df=df,pst=self.pst)
            self.parensemble = self.parensemble_0.copy()
            df = pd.read_csv(self.pst.filename+self.obsen_prefix.format(0))
            self.obsensemble_0 = ObservationEnsemble.from_dataframe(df=df.loc[:,self.pst.nnz_obs_names],
                                                                    pst=self.pst)
            # this matrix gets used a lot, so only calc once
            self.obs0_matrix = self.obsensemble_0.as_pyemu_matrix()
            df = pd.read_csv(self.pst.filename+self.obsen_prefix.format(self.restart_iter))
            self.obsensemble = ObservationEnsemble.from_dataframe(df=df.loc[:,self.pst.nnz_obs_names],
                                                                  pst=self.pst)
            assert self.parensemble.shape[0] == self.obsensemble.shape[0]
            self.num_reals = self.parensemble.shape[0]

        else:
            self.num_reals = int(num_reals)
            self.parensemble_0 = ParameterEnsemble(self.pst)
            self.parensemble_0.draw(cov=self.parcov,num_reals=num_reals)
            self.parensemble_0.enforce()
            self.parensemble = self.parensemble_0.copy()
            self.parensemble_0.to_csv(self.pst.filename +\
                                      self.paren_prefix.format(0))
            self.obsensemble_0 = ObservationEnsemble(self.pst)
            self.obsensemble_0.draw(cov=self.obscov,num_reals=num_reals)
            #self.obsensemble = self.obsensemble_0.copy()

            # save the base obsensemble
            self.obsensemble_0.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(-1))
            self.obs0_matrix = self.obsensemble_0.nonzero.as_pyemu_matrix()

            # run the initial parameter ensemble
            self.obsensemble = self._calc_obs(self.parensemble)
            self.obsensemble.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(0))
        self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
        self._phi_report(self.current_phi_vec,0.0)
        self.last_best_mean = self.current_phi_vec.mean()
        self.last_best_std = self.current_phi_vec.std()
        if init_lambda is not None:
            self.current_lambda = float(init_lambda)
        else:
            #following chen and oliver
            x = self.last_best_mean / (2.0 * float(self.obsensemble.shape[1]))
            self.current_lambda = 10.0**(np.floor(np.log10(x)))

        # if using the approximate form of the algorithm, let
        # the parameter scaling matrix be the identity matrix
        # jwhite - dec 5 2016 - using the actual parcov inv
        # for upgrades seems to be pushing parameters around
        # too much.  for now, just not using it, maybe
        # better choices of lambda will tame it
        if self.use_approx:
            self.half_parcov_diag = 1.0
        else:
            # if self.parcov.isdiagonal:
            #     self.half_parcov_diag = self.parcov.sqrt.inv
            # else:
            #     self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
            #                                 names=self.parcov.col_names,
            #                                 isdiagonal=True).inv.sqrt
            self.half_parcov_diag = 1.0
            self.delta_par_prior = self._calc_delta_par(self.parensemble_0)
            u,s,v = self.delta_par_prior.pseudo_inv_components()
            self.Am = u * s.inv
        self.__initialized = True
コード例 #26
0
ファイル: mc.py プロジェクト: mnfienen/pyemu
class MonteCarlo(LinearAnalysis):
    """LinearAnalysis derived type for monte carlo analysis

       Note: requires a pest control file, which can be
             derived from a jco argument
             MonteCarlo.project_parsensemble also
             requires a jacobian

    """
    def __init__(self,**kwargs):
        super(MonteCarlo,self).__init__(**kwargs)
        assert self.pst is not None, \
            "monte carlo requires a pest control file"
        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)

    @property
    def num_reals(self):
        return self.parensemble.shape[0]

    def get_nsing(self,epsilon=1.0e-6):
        """ get the number of solution space dimensions given
            a machine floating point precision (epsilon)

        Parameters:
            epsilon: machine floating point precision
        Returns : integer
            number of singular components above the epsilon ratio threshold
        """
        nsing = self.xtqx.shape[0] - np.searchsorted(
                np.sort((self.xtqx.s.x / self.xtqx.s.x.max())[:,0]),epsilon)
        return nsing

    def get_null_proj(self,nsing=None):
        """ get a null-space projection matrix of XTQX

        Parameters:
        ----------
            nsing: optional number of singular components to use
                      if none, call self.get_nsing()
        Returns:
        -------
            Matrix instance : V2V2^T
        """
        if nsing is None:
            nsing = self.get_nsing()

        v2_proj = (self.xtqx.v[:,nsing:] * self.xtqx.v[:,nsing:].T)
        #v2_proj = (self.qhalfx.v[:,nsing:] * self.qhalfx.v[:,nsing:].T)
        #self.__parcov = self.parcov.identity
        return v2_proj

    def draw(self, num_reals=1, par_file = None, obs=False,
             enforce_bounds=False,cov=None):
        """draw stochastic realizations of parameters and
           optionally observations

        Parameters:
        ----------
            num_reals (int): number of realization to generate

            par_file (str): parameter file to use as mean values

            obs (bool): add a realization of measurement noise to obs

            enforce_bounds (bool): enforce parameter bounds in control file


        Returns:
            None
        Raises:
            None
        """
        if par_file is not None:
            self.pst.parrep(par_file)

        if cov is not None:
            assert isinstance(cov,Cov)
        else:
            cov = self.parcov

        self.log("generating {0:d} parameter realizations".format(num_reals))
        self.parensemble.draw(cov,num_reals=num_reals)
        if enforce_bounds:
            self.parensemble.enforce()
        self.log("generating {0:d} parameter realizations".format(num_reals))
        if obs:
            self.log("generating {0:d} observation realizations".format(num_reals))
            self.obsensemble.draw(self.obscov,num_reals=num_reals)
            self.log("generating {0:d} observation realizations".format(num_reals))




    def project_parensemble(self,par_file=None,nsing=None,
                            inplace=True):
        """ perform the null-space projection operations for null-space monte carlo

        Parameters:
            par_file: str
                an optional file of parameter values to use
            nsing: int
                number of singular values to in forming null subspace matrix
            inplace: bool
                overwrite the existing parameter ensemble with the
                projected values
        Returns:
        -------
            if inplace is False, ParameterEnsemble instance, otherwise None
        """
        assert self.jco is not None,"MonteCarlo.project_parensemble()" +\
                                    "requires a jacobian attribute"
        if par_file is not None:
            assert os.path.exists(par_file),"monte_carlo.draw() error: par_file not found:" +\
                par_file
            self.parensemble.pst.parrep(par_file)

        # project the ensemble
        self.log("projecting parameter ensemble")
        en = self.parensemble.project(self.get_null_proj(nsing),inplace=inplace,log=self.log)
        self.log("projecting parameter ensemble")
        return en

    def write_psts(self,prefix):
        """ write parameter and optionally observation realizations
            to pest control files
        Parameters:
        ----------
            prefix: str
                pest control file prefix
        Returns:
        -------
            None
        """
        self.log("writing realized pest control files")
        # get a copy of the pest control file
        pst = self.pst.get(par_names=self.pst.par_names,obs_names=self.pst.obs_names)

        # set the indices
        pst.parameter_data.index = pst.parameter_data.parnme
        pst.observation_data.index = pst.observation_data.obsnme

        if self.parensemble.islog:
            par_en = self.parensemble._back_transform(inplace=False)
        else:
            par_en = self.parensemble

        for i in range(self.num_reals):
            pst_name = prefix + "{0:d}.pst".format(i)
            self.log("writing realized pest control file " + pst_name)
            pst.parameter_data.loc[par_en.columns,"parval1"] = par_en.iloc[i, :].T
            if self.obsensemble.shape[0] == self.num_reals:
                pst.observation_data.loc[self.obsensemble.columns,"obsval"] = \
                    self.obsensemble.iloc[i, :].T
            pst.write(pst_name)
            self.log("writing realized pest control file " + pst_name)
        self.log("writing realized pest control files")