Esempio n. 1
0
File: mc.py Progetto: whejs/pyemu
class MonteCarlo(LinearAnalysis):
    """LinearAnalysis derived type for monte carlo analysis

    Parameters
    ----------
    **kwargs : dict
        dictionary of keyword arguments.  See pyemu.LinearAnalysis for
        complete definitions

    Attributes
    ----------
    parensemble : pyemu.ParameterEnsemble
        pyemu object derived from a pandas dataframe, the ensemble
        of parameters from the PEST control file with associated 
        starting value and bounds.  Object also exposes methods
        relevant to the dataframe and parameters-- see documentation.
    obsensemble : pyemu.ObservationEnsemble
        pyemu object derived from a pandas dataframe, the ensemble
        of observations from the PEST control file with associated 
        starting weights.  Object also exposes methods
        relevant to the dataframe and observations-- see documentation.
        
    Returns
    -------
    MonteCarlo
       pyEMU MonteCarlo object

    Example
    -------
    ``>>>import pyemu``

    ``>>>mc = pyemu.MonteCarlo(pst="pest.pst")``

    """
    def __init__(self, **kwargs):
        warnings.warn(
            "pyemu.MonteCarlo class is deprecated.  " +
            "Please use the ensemble classes directly",
            PyemuWarning,
        )
        super(MonteCarlo, self).__init__(**kwargs)
        assert self.pst is not None, "monte carlo requires a pest control file"
        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)

    @property
    def num_reals(self):
        """ get the number of realizations in the parameter ensemble

        Returns
        -------
        num_real : int
        
        """
        return self.parensemble.shape[0]

    def get_nsing(self, epsilon=1.0e-4):
        """ get the number of solution space dimensions given
        a ratio between the largest and smallest singular values

        Parameters
        ----------
        epsilon: float
            singular value ratio

        Returns
        -------
        nsing : float
            number of singular components above the epsilon ratio threshold
        
        Note
        -----
            If nsing == nadj_par, then None is returned
        
        """
        mx = self.xtqx.shape[0]
        nsing = mx - np.searchsorted(
            np.sort((self.xtqx.s.x / self.xtqx.s.x.max())[:, 0]), epsilon)
        if nsing == mx:
            self.logger.warn("optimal nsing=npar")
            nsing = None
        return nsing

    def get_null_proj(self, nsing=None):
        """ get a null-space projection matrix of XTQX

        Parameters
        ----------
        nsing: int
            optional number of singular components to use
            If Nonte, then nsing is determined from
            call to MonteCarlo.get_nsing()
        
        Returns
        -------
        v2_proj : pyemu.Matrix
            the null-space projection matrix (V2V2^T)
        
        """
        if nsing is None:
            nsing = self.get_nsing()
        if nsing is None:
            raise Exception("nsing is None")
        print("using {0} singular components".format(nsing))
        self.log(
            "forming null space projection matrix with " +
            "{0} of {1} singular components".format(nsing, self.jco.shape[1]))

        v2_proj = self.xtqx.v[:, nsing:] * self.xtqx.v[:, nsing:].T
        self.log(
            "forming null space projection matrix with " +
            "{0} of {1} singular components".format(nsing, self.jco.shape[1]))

        return v2_proj

    def draw(
        self,
        num_reals=1,
        par_file=None,
        obs=False,
        enforce_bounds=None,
        cov=None,
        how="gaussian",
    ):
        """draw stochastic realizations of parameters and
           optionally observations, filling MonteCarlo.parensemble and
           optionally MonteCarlo.obsensemble.

        Parameters
        ----------
        num_reals : int
            number of realization to generate
        par_file : str
            parameter file to use as mean values. If None,
            use MonteCarlo.pst.parameter_data.parval1.
            Default is None
        obs : bool
            add a realization of measurement noise to observation values,
            forming MonteCarlo.obsensemble.Default is False
        enforce_bounds : str
            enforce parameter bounds based on control file information.
            options are 'reset', 'drop' or None.  Default is None
        how : str
            type of distribution to draw from. Must be in ["gaussian","uniform"]
            default is "gaussian".

        Example
        -------
        ``>>>import pyemu``

        ``>>>mc = pyemu.MonteCarlo(pst="pest.pst")``

        ``>>>mc.draw(1000)``

        """
        if par_file is not None:
            self.pst.parrep(par_file)
        how = how.lower().strip()
        assert how in ["gaussian", "uniform"]

        if cov is not None:
            assert isinstance(cov, Cov)
            if how == "uniform":
                raise Exception("MonteCarlo.draw() error: 'how'='uniform'," +
                                " 'cov' arg cannot be passed")
        else:
            cov = self.parcov

        self.log("generating {0:d} parameter realizations".format(num_reals))

        if how == "gaussian":
            self.parensemble = ParameterEnsemble.from_gaussian_draw(
                pst=self.pst,
                cov=cov,
                num_reals=num_reals,
                use_homegrown=True,
                enforce_bounds=False,
            )

        elif how == "uniform":
            self.parensemble = ParameterEnsemble.from_uniform_draw(
                pst=self.pst, num_reals=num_reals)

        else:
            raise Exception(
                "MonteCarlo.draw(): unrecognized 'how' arg: {0}".format(how))

        # self.parensemble = ParameterEnsemble(pst=self.pst)
        # self.obsensemble = ObservationEnsemble(pst=self.pst)
        # self.parensemble.draw(cov,num_reals=num_reals, how=how,
        #                      enforce_bounds=enforce_bounds)
        if enforce_bounds is not None:
            self.parensemble.enforce(enforce_bounds)
        self.log("generating {0:d} parameter realizations".format(num_reals))

        if obs:
            self.log(
                "generating {0:d} observation realizations".format(num_reals))
            self.obsensemble = ObservationEnsemble.from_id_gaussian_draw(
                pst=self.pst, num_reals=num_reals)
            self.log(
                "generating {0:d} observation realizations".format(num_reals))

    def project_parensemble(self,
                            par_file=None,
                            nsing=None,
                            inplace=True,
                            enforce_bounds="reset"):
        """ perform the null-space projection operations for null-space monte carlo

        Parameters
        ----------
        par_file: str
            an optional file of parameter values to use
        nsing: int
            number of singular values to in forming null subspace matrix
        inplace: bool
            overwrite the existing parameter ensemble with the
            projected values
        enforce_bounds: str
            how to enforce parameter bounds.  can be None, 'reset', or 'drop'.
            Default is None

        Returns
        -------
        par_en : pyemu.ParameterEnsemble
            if inplace is False, otherwise None

        Note
        ----
        to use this method, the MonteCarlo instance must have been constructed
        with the ``jco`` argument.

        Example
        -------
        ``>>>import pyemu``

        ``>>>mc = pyemu.MonteCarlo(jco="pest.jcb")``

        ``>>>mc.draw(1000)``

        ``>>>mc.project_parensemble(par_file="final.par",nsing=100)``

        """
        assert self.jco is not None, ("MonteCarlo.project_parensemble()" +
                                      "requires a jacobian attribute")
        if par_file is not None:
            assert os.path.exists(par_file), (
                "monte_carlo.draw() error: par_file not found:" + par_file)
            self.parensemble.pst.parrep(par_file)

        # project the ensemble
        self.log("projecting parameter ensemble")
        en = self.parensemble.project(self.get_null_proj(nsing),
                                      inplace=inplace,
                                      log=self.log)
        self.log("projecting parameter ensemble")
        return en

    def write_psts(self, prefix, existing_jco=None, noptmax=None):
        """ write parameter and optionally observation realizations
            to a series of pest control files

        Parameters
        ----------
        prefix: str
            pest control file prefix

        existing_jco: str
            filename of an existing jacobian matrix to add to the
            pest++ options in the control file.  This is useful for
            NSMC since this jco can be used to get the first set of
            parameter upgrades for free!  Needs to be the path the jco
            file as seen from the location where pest++ will be run

        noptmax: int
            value of NOPTMAX to set in new pest control files

        Example
        -------
        ``>>>import pyemu``

        ``>>>mc = pyemu.MonteCarlo(jco="pest.jcb")``

        ``>>>mc.draw(1000, obs=True)``

        ``>>>mc.write_psts("mc_", existing_jco="pest.jcb", noptmax=1)``

        """
        self.log("writing realized pest control files")
        # get a copy of the pest control file
        pst = self.pst.get(par_names=self.pst.par_names,
                           obs_names=self.pst.obs_names)

        if noptmax is not None:
            pst.control_data.noptmax = noptmax
            pst.control_data.noptmax = noptmax

        if existing_jco is not None:
            pst.pestpp_options["BASE_JACOBIAN"] = existing_jco

        # set the indices
        pst.parameter_data.index = pst.parameter_data.parnme
        pst.observation_data.index = pst.observation_data.obsnme

        if self.parensemble.istransformed:
            par_en = self.parensemble._back_transform(inplace=False)
        else:
            par_en = self.parensemble

        for i in range(self.num_reals):
            pst_name = prefix + "{0:d}.pst".format(i)
            self.log("writing realized pest control file " + pst_name)
            pst.parameter_data.loc[par_en.columns,
                                   "parval1"] = par_en.iloc[i, :].T

            # reset the regularization
            # if pst.control_data.pestmode == "regularization":
            # pst.zero_order_tikhonov(parbounds=True)
            # zero_order_tikhonov(pst,parbounds=True)
            # add the obs noise realization if needed
            if self.obsensemble.shape[0] == self.num_reals:
                pst.observation_data.loc[self.obsensemble.columns,
                                         "obsval"] = self.obsensemble.iloc[
                                             i, :].T

            # write
            pst.write(pst_name)
            self.log("writing realized pest control file " + pst_name)
        self.log("writing realized pest control files")
Esempio n. 2
0
class EnsembleSmoother():

    def __init__(self,pst,parcov=None,obscov=None,num_slaves=0,use_approx=True,
                 restart_iter=0,submit_file=None):
        self.num_slaves = int(num_slaves)
        self.submit_file = submit_file
        self.use_approx = bool(use_approx)
        self.paren_prefix = ".parensemble.{0:04d}.csv"
        self.obsen_prefix = ".obsensemble.{0:04d}.csv"

        if isinstance(pst,str):
            pst = Pst(pst)
        assert isinstance(pst,Pst)
        self.pst = pst
        self.sweep_in_csv = pst.pestpp_options.get("sweep_parameter_csv_file","sweep_in.csv")
        self.sweep_out_csv = pst.pestpp_options.get("sweep_output_csv_file","sweep_out.csv")
        if parcov is not None:
            assert isinstance(parcov,Cov)
        else:
            parcov = Cov.from_parameter_data(self.pst)
        if obscov is not None:
            assert isinstance(obscov,Cov)
        else:
            obscov = Cov.from_observation_data(pst)

        self.parcov = parcov
        self.obscov = obscov
        self.restart = False

        if restart_iter > 0:
            self.restart_iter = restart_iter
            paren = self.pst.filename+self.paren_prefix.format(restart_iter)
            assert os.path.exists(paren),\
                "could not find restart par ensemble {0}".format(paren)
            obsen0 = self.pst.filename+self.obsen_prefix.format(0)
            assert os.path.exists(obsen0),\
                "could not find restart obs ensemble 0 {0}".format(obsen0)
            obsen = self.pst.filename+self.obsen_prefix.format(restart_iter)
            assert os.path.exists(obsen),\
                "could not find restart obs ensemble {0}".format(obsen)
            self.restart = True


        self.__initialized = False
        self.num_reals = 0
        self.half_parcov_diag = None
        self.half_obscov_diag = None
        self.delta_par_prior = None
        self.iter_num = 0

    def initialize(self,num_reals,init_lambda=None):
        '''
        (re)initialize the process
        '''
        assert num_reals > 1
        # initialize the phi report csv
        self.phi_csv = open(self.pst.filename+".iobj.csv",'w')
        self.phi_csv.write("iter_num,total_runs,lambda,min,max,mean,median,std,")
        self.phi_csv.write(','.join(["{0:010d}".\
                                    format(i+1) for i in range(num_reals)]))
        self.phi_csv.write('\n')
        self.total_runs = 0
        # this matrix gets used a lot, so only calc once and store
        self.obscov_inv_sqrt = self.obscov.get(self.pst.nnz_obs_names).inv.sqrt
        if self.restart:
            print("restarting...ignoring num_reals")
            raise NotImplementedError()
            df = pd.read_csv(self.pst.filename+self.paren_prefix.format(self.restart_iter))
            self.parensemble_0 = ParameterEnsemble.from_dataframe(df=df,pst=self.pst)
            self.parensemble = self.parensemble_0.copy()
            df = pd.read_csv(self.pst.filename+self.obsen_prefix.format(0))
            self.obsensemble_0 = ObservationEnsemble.from_dataframe(df=df.loc[:,self.pst.nnz_obs_names],
                                                                    pst=self.pst)
            # this matrix gets used a lot, so only calc once
            self.obs0_matrix = self.obsensemble_0.as_pyemu_matrix()
            df = pd.read_csv(self.pst.filename+self.obsen_prefix.format(self.restart_iter))
            self.obsensemble = ObservationEnsemble.from_dataframe(df=df.loc[:,self.pst.nnz_obs_names],
                                                                  pst=self.pst)
            assert self.parensemble.shape[0] == self.obsensemble.shape[0]
            self.num_reals = self.parensemble.shape[0]

        else:
            self.num_reals = int(num_reals)
            self.parensemble_0 = ParameterEnsemble(self.pst)
            self.parensemble_0.draw(cov=self.parcov,num_reals=num_reals)
            self.parensemble_0.enforce()
            self.parensemble = self.parensemble_0.copy()
            self.parensemble_0.to_csv(self.pst.filename +\
                                      self.paren_prefix.format(0))
            self.obsensemble_0 = ObservationEnsemble(self.pst)
            self.obsensemble_0.draw(cov=self.obscov,num_reals=num_reals)
            #self.obsensemble = self.obsensemble_0.copy()

            # save the base obsensemble
            self.obsensemble_0.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(-1))
            self.obs0_matrix = self.obsensemble_0.nonzero.as_pyemu_matrix()

            # run the initial parameter ensemble
            self.obsensemble = self._calc_obs(self.parensemble)
            self.obsensemble.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(0))
        self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
        self._phi_report(self.current_phi_vec,0.0)
        self.last_best_mean = self.current_phi_vec.mean()
        self.last_best_std = self.current_phi_vec.std()
        if init_lambda is not None:
            self.current_lambda = float(init_lambda)
        else:
            #following chen and oliver
            x = self.last_best_mean / (2.0 * float(self.obsensemble.shape[1]))
            self.current_lambda = 10.0**(np.floor(np.log10(x)))

        # if using the approximate form of the algorithm, let
        # the parameter scaling matrix be the identity matrix
        # jwhite - dec 5 2016 - using the actual parcov inv
        # for upgrades seems to be pushing parameters around
        # too much.  for now, just not using it, maybe
        # better choices of lambda will tame it
        if self.use_approx:
            self.half_parcov_diag = 1.0
        else:
            # if self.parcov.isdiagonal:
            #     self.half_parcov_diag = self.parcov.sqrt.inv
            # else:
            #     self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
            #                                 names=self.parcov.col_names,
            #                                 isdiagonal=True).inv.sqrt
            self.half_parcov_diag = 1.0
            self.delta_par_prior = self._calc_delta_par(self.parensemble_0)
            u,s,v = self.delta_par_prior.pseudo_inv_components()
            self.Am = u * s.inv
        self.__initialized = True

    def get_localizer(self):
        onames = self.pst.nnz_obs_names
        pnames = self.pst.adj_par_names
        localizer = Matrix(x=np.ones((len(onames),len(pnames))),row_names=onames,col_names=pnames)
        return localizer

    def _calc_delta_par(self,parensemble):
        '''
        calc the scaled parameter ensemble differences from the mean
        '''
        return self._calc_delta(parensemble, self.half_parcov_diag)

    def _calc_delta_obs(self,obsensemble):
        '''
        calc the scaled observation ensemble differences from the mean
        '''
        return self._calc_delta(obsensemble.nonzero, self.obscov.inv.sqrt)

    def _calc_delta(self,ensemble,scaling_matrix):
        '''
        calc the scaled  ensemble differences from the mean
        '''
        mean = np.array(ensemble.mean(axis=0))
        delta = ensemble.as_pyemu_matrix()
        for i in range(self.num_reals):
            delta.x[i,:] -= mean
        delta = scaling_matrix * delta.T
        delta *= (1.0 / np.sqrt(float(self.num_reals - 1.0)))
        return delta

    def _calc_obs(self,parensemble):
        if self.submit_file is None:
            self._calc_obs_local(parensemble)
        else:
            self._calc_obs_condor(parensemble)

    def _calc_obs_condor(self,parensemble):
        parensemble.to_csv(self.sweep_in_csv)
        os.system("condor_rm -all")
        port = 4004
        def master():
            os.system("sweep {0} /h :{1} >nul".format(self.pst.filename,port))
        master_thread = threading.Thread(target=master)
        master_thread.start()
        time.sleep(1.5) #just some time for the master to get up and running to take slaves
        pyemu.utils.start_slaves("template","sweep",self.pst.filename,
                                 self.num_slaves,slave_root='.',port=port)
        os.system("condor_submit {0}".format(self.submit_file))
        master_thread.join()

    def _calc_obs_local(self,parensemble):
        '''
        propagate the ensemble forward using sweep.
        '''
        parensemble.to_csv(self.sweep_in_csv)
        if self.num_slaves > 0:
            port = 4004
            def master():
                os.system("sweep {0} /h :{1} >nul".format(self.pst.filename,port))
            master_thread = threading.Thread(target=master)
            master_thread.start()
            time.sleep(1.5) #just some time for the master to get up and running to take slaves
            pyemu.utils.start_slaves("template","sweep",self.pst.filename,
                                     self.num_slaves,slave_root='.',port=port)
            master_thread.join()
        else:
            os.system("sweep {0}".format(self.pst.filename))

        obs = pd.read_csv(self.sweep_out_csv)
        obs.columns = [item.lower() for item in obs.columns]
        self.total_runs += obs.shape[0]
        return ObservationEnsemble.from_dataframe(df=obs.loc[:,self.obscov.row_names],
                                                  pst=self.pst)

    def _calc_phi_vec(self,obsensemble):
        obs_diff = self._get_residual_matrix(obsensemble)
        phi_vec = np.diagonal((obs_diff * self.obscov_inv_sqrt.get(row_names=obs_diff.col_names,
                                                                   col_names=obs_diff.col_names) * obs_diff.T).x)
        return phi_vec

    def _phi_report(self,phi_vec,cur_lam):
        assert phi_vec.shape[0] == self.num_reals
        self.phi_csv.write("{0},{1},{2},{3},{4},{5},{6}".format(self.iter_num,
                                                             self.total_runs,
                                                             cur_lam,
                                                             phi_vec.min(),
                                                             phi_vec.max(),
                                                             phi_vec.mean(),
                                                             np.median(phi_vec),
                                                             phi_vec.std()))
        self.phi_csv.write(",".join(["{0:20.8}".format(phi) for phi in phi_vec]))
        self.phi_csv.write("\n")
        self.phi_csv.flush()

    def _get_residual_matrix(self, obsensemble):
        obs_matrix = obsensemble.nonzero.as_pyemu_matrix()
        return  obs_matrix - self.obs0_matrix.get(col_names=obs_matrix.col_names,row_names=obs_matrix.row_names)

    def update(self,lambda_mults=[1.0],localizer=None,run_subset=None):

        self.iter_num += 1
        if not self.__initialized:
            raise Exception("must call initialize() before update()")

        scaled_delta_obs = self._calc_delta_obs(self.obsensemble)
        scaled_delta_par = self._calc_delta_par(self.parensemble)

        u,s,v = scaled_delta_obs.pseudo_inv_components()

        obs_diff = self._get_residual_matrix(self.obsensemble)

        if run_subset is not None:
            subset_idx = ["{0:d}".format(i) for i in np.random.randint(0,self.num_reals-1,run_subset)]
            print("subset idxs: " + ','.join(subset_idx))

        mean_lam,std_lam,paren_lam,obsen_lam = [],[],[],[]
        for ilam,cur_lam_mult in enumerate(lambda_mults):

            parensemble_cur_lam = self.parensemble.copy()

            cur_lam = self.current_lambda * cur_lam_mult

            scaled_ident = Cov.identity_like(s) * (cur_lam+1.0)
            scaled_ident += s**2
            scaled_ident = scaled_ident.inv

            # build up this matrix as a single element so we can apply
            # localization
            upgrade_1 = -1.0 * (self.half_parcov_diag * scaled_delta_par) *\
                        v * s * scaled_ident * u.T

            # apply localization
            #print(cur_lam,upgrade_1)
            if localizer is not None:
                upgrade_1.hadamard_product(localizer)

            # apply residual information
            upgrade_1 *= (self.obscov_inv_sqrt * obs_diff.T)

            upgrade_1 = upgrade_1.to_dataframe()
            upgrade_1.index.name = "parnme"
            upgrade_1 = upgrade_1.T
            upgrade_1.to_csv(self.pst.filename+".upgrade_1.{0:04d}.csv".\
                               format(self.iter_num))
            parensemble_cur_lam += upgrade_1

            # parameter-based upgrade portion
            if not self.use_approx and self.iter_num > 1:
                par_diff = (self.parensemble - self.parensemble_0).\
                    as_pyemu_matrix().T
                x4 = self.Am.T * self.half_parcov_diag * par_diff
                x5 = self.Am * x4
                x6 = scaled_delta_par.T * x5
                x7 = v * scaled_ident * v.T * x6
                upgrade_2 = -1.0 * (self.half_parcov_diag *
                                   scaled_delta_par * x7).to_dataframe()

                upgrade_2.index.name = "parnme"
                upgrade_2.T.to_csv(self.pst.filename+".upgrade_2.{0:04d}.csv".\
                                   format(self.iter_num))
                parensemble_cur_lam += upgrade_2.T
            parensemble_cur_lam.enforce()
            paren_lam.append(parensemble_cur_lam)
            if run_subset is not None:
                #phi_series = pd.Series(data=self.current_phi_vec)
                #phi_series.sort_values(inplace=True,ascending=False)
                #subset_idx = ["{0:d}".format(i) for i in phi_series.index.values[:run_subset]]

                parensemble_subset = parensemble_cur_lam.loc[subset_idx,:]
                obsensemble_cur_lam = self._calc_obs(parensemble_subset)
            else:
                obsensemble_cur_lam = self._calc_obs(parensemble_cur_lam)
            #print(obsensemble_cur_lam.head())
            obsen_lam.append(obsensemble_cur_lam)


        # here is where we need to select out the "best" lambda par and obs
        # ensembles
        print("\n**************************")
        print(str(datetime.now()))
        print("total runs:{0}".format(self.total_runs))
        print("iteration: {0}".format(self.iter_num))
        print("current lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda,
                         self.last_best_mean,self.last_best_std))
        phi_vecs = [self._calc_phi_vec(obsen) for obsen in obsen_lam]
        mean_std = [(pv.mean(),pv.std()) for pv in phi_vecs]
        update_pars = False
        update_lambda = False
        # accept a new best if its within 10%
        best_mean = self.last_best_mean * 1.1
        best_std = self.last_best_std * 1.1
        best_i = 0
        for i,(m,s) in enumerate(mean_std):
            print(" tested lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda * lambda_mults[i],m,s))
            if m < best_mean:
                update_pars = True
                best_mean = m
                best_i = i
                if s < best_std:
                    update_lambda = True
                    best_std = s

        if not update_pars:
            self.current_lambda *= max(lambda_mults) * 3.0
            self.current_lambda = min(self.current_lambda,100000)
            print("not accepting iteration, increased lambda:{0}".\
                  format(self.current_lambda))

        else:

            self.parensemble = paren_lam[best_i]
            if run_subset is not None:
                self.obsensemble = self._calc_obs(self.parensemble)
                self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
                self._phi_report(self.current_phi_vec,self.current_lambda * lambda_mults[best_i])
                best_mean = self.current_phi_vec.mean()
                best_std = self.current_phi_vec.std()
            else:
                self.obsensemble = obsen_lam[best_i]
                self._phi_report(phi_vecs[best_i],self.current_lambda * lambda_mults[best_i])
                self.current_phi_vec = phi_vecs[best_i]

            print("\n" + "   best lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda*lambda_mults[best_i],
                         best_mean,best_std))
            self.last_best_mean = best_mean
            self.last_best_std = best_std

        if update_lambda:
            # be aggressive - cut best lambda in half
            self.current_lambda *= (lambda_mults[best_i] * 0.75)
            # but don't let lambda get too small
            self.current_lambda = max(self.current_lambda,0.001)
            print("updating lambda: {0:15.6G}".\
                  format(self.current_lambda ))


        print("**************************\n")

        self.parensemble.to_csv(self.pst.filename+self.paren_prefix.\
                                    format(self.iter_num))

        self.obsensemble.to_csv(self.pst.filename+self.obsen_prefix.\
                                    format(self.iter_num))
Esempio n. 3
0
File: mc.py Progetto: aleaf/pyemu
class MonteCarlo(LinearAnalysis):
    """LinearAnalysis derived type for monte carlo analysis

    Parameters
    ----------
    **kwargs : dict
        dictionary of keyword arguments.  See pyemu.LinearAnalysis for
        complete definitions

    Attributes
    ----------
    parensemble : pyemu.ParameterEnsemble
    obsensemble : pyemu.ObservationEnsemble

    Returns
    -------
    MonteCarlo : MonteCarlo

    Example
    -------
    ``>>>import pyemu``

    ``>>>mc = pyemu.MonteCarlo(pst="pest.pst")``

    """
    def __init__(self,**kwargs):
        super(MonteCarlo,self).__init__(**kwargs)
        assert self.pst is not None, \
            "monte carlo requires a pest control file"
        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)

    @property
    def num_reals(self):
        """ get the number of realizations in the parameter ensemble

        Returns
        -------
        num_real : int
        
        """
        return self.parensemble.shape[0]

    def get_nsing(self,epsilon=1.0e-4):
        """ get the number of solution space dimensions given
            a ratio between the largest and smallest singular
            values

        Parameters
        ----------
        epsilon: float
            singular value ratio

        Returns
        -------
        nsing : float
            number of singular components above the epsilon ratio threshold
        
        Note
        -----
            If nsing == nadj_par, then None is returned
        
        """
        mx = self.xtqx.shape[0]
        nsing = mx - np.searchsorted(
                np.sort((self.xtqx.s.x / self.xtqx.s.x.max())[:,0]),epsilon)
        if nsing == mx:
            self.logger.warn("optimal nsing=npar")
            nsing = None
        return nsing

    def get_null_proj(self,nsing=None):
        """ get a null-space projection matrix of XTQX

        Parameters
        ----------
        nsing: int
            optional number of singular components to use
            If Nonte, then nsing is determined from
            call to MonteCarlo.get_nsing()
        
        Returns
        -------
        v2_proj : pyemu.Matrix
            the null-space projection matrix (V2V2^T)
        
        """
        if nsing is None:
            nsing = self.get_nsing()
        if nsing is None:
            raise Exception("nsing is None")
        print("using {0} singular components".format(nsing))
        self.log("forming null space projection matrix with " +\
                 "{0} of {1} singular components".format(nsing,self.jco.shape[1]))

        v2_proj = (self.xtqx.v[:,nsing:] * self.xtqx.v[:,nsing:].T)
        self.log("forming null space projection matrix with " +\
                 "{0} of {1} singular components".format(nsing,self.jco.shape[1]))

        return v2_proj

    def draw(self, num_reals=1, par_file = None, obs=False,
             enforce_bounds=None, cov=None, how="gaussian"):
        """draw stochastic realizations of parameters and
           optionally observations, filling MonteCarlo.parensemble and
           optionally MonteCarlo.obsensemble.

        Parameters
        ----------
        num_reals : int
            number of realization to generate
        par_file : str
            parameter file to use as mean values. If None,
            use MonteCarlo.pst.parameter_data.parval1.
            Default is None
        obs : bool
            add a realization of measurement noise to observation values,
            forming MonteCarlo.obsensemble.Default is False
        enforce_bounds : str
            enforce parameter bounds based on control file information.
            options are 'reset', 'drop' or None.  Default is None
        how : str
            type of distribution to draw from. Must be in ["gaussian","uniform"]
            default is "gaussian".

        Example
        -------
        ``>>>import pyemu``

        ``>>>mc = pyemu.MonteCarlo(pst="pest.pst")``

        ``>>>mc.draw(1000)``

        """
        if par_file is not None:
            self.pst.parrep(par_file)
        how = how.lower().strip()
        assert how in ["gaussian","uniform"]

        if cov is not None:
            assert isinstance(cov,Cov)
            if how == "uniform":
                raise Exception("MonteCarlo.draw() error: 'how'='uniform'," +\
                                " 'cov' arg cannot be passed")
        else:
            cov = self.parcov

        self.log("generating {0:d} parameter realizations".format(num_reals))

        if how == "gaussian":
            self.parensemble = ParameterEnsemble.from_gaussian_draw(pst=self.pst,cov=cov,
                                                                    num_reals=num_reals,
                                                                    use_homegrown=True)

        elif how == "uniform":
            self.parensemble = ParameterEnsemble.from_uniform_draw(pst=self.pst,num_reals=num_reals)

        else:
            raise Exception("MonteCarlo.draw(): unrecognized 'how' arg: {0}".format(how))

        #self.parensemble = ParameterEnsemble(pst=self.pst)
        #self.obsensemble = ObservationEnsemble(pst=self.pst)
        #self.parensemble.draw(cov,num_reals=num_reals, how=how,
        #                      enforce_bounds=enforce_bounds)
        if enforce_bounds is not  None:
            self.parensemble.enforce(enforce_bounds)
        self.log("generating {0:d} parameter realizations".format(num_reals))

        if obs:
            self.log("generating {0:d} observation realizations".format(num_reals))
            self.obsensemble = ObservationEnsemble.from_id_gaussian_draw(pst=self.pst,num_reals=num_reals)
            self.log("generating {0:d} observation realizations".format(num_reals))




    def project_parensemble(self,par_file=None,nsing=None,
                            inplace=True,enforce_bounds='reset'):
        """ perform the null-space projection operations for null-space monte carlo

        Parameters
        ----------
        par_file: str
            an optional file of parameter values to use
        nsing: int
            number of singular values to in forming null subspace matrix
        inplace: bool
            overwrite the existing parameter ensemble with the
            projected values
        enforce_bounds: str
            how to enforce parameter bounds.  can be None, 'reset', or 'drop'.
            Default is None

        Returns
        -------
        par_en : pyemu.ParameterEnsemble
            if inplace is False, otherwise None

        Note
        ----
        to use this method, the MonteCarlo instance must have been constructed
        with the ``jco`` argument.

        Example
        -------
        ``>>>import pyemu``

        ``>>>mc = pyemu.MonteCarlo(jco="pest.jcb")``

        ``>>>mc.draw(1000)``

        ``>>>mc.project_parensemble(par_file="final.par",nsing=100)``

        """
        assert self.jco is not None,"MonteCarlo.project_parensemble()" +\
                                    "requires a jacobian attribute"
        if par_file is not None:
            assert os.path.exists(par_file),"monte_carlo.draw() error: par_file not found:" +\
                par_file
            self.parensemble.pst.parrep(par_file)

        # project the ensemble
        self.log("projecting parameter ensemble")
        en = self.parensemble.project(self.get_null_proj(nsing),inplace=inplace,log=self.log)
        self.log("projecting parameter ensemble")
        return en

    def write_psts(self,prefix,existing_jco=None,noptmax=None):
        """ write parameter and optionally observation realizations
            to a series of pest control files

        Parameters
        ----------
        prefix: str
            pest control file prefix

        existing_jco: str
            filename of an existing jacobian matrix to add to the
            pest++ options in the control file.  This is useful for
            NSMC since this jco can be used to get the first set of
            parameter upgrades for free!  Needs to be the path the jco
            file as seen from the location where pest++ will be run

        noptmax: int
            value of NOPTMAX to set in new pest control files

        Example
        -------
        ``>>>import pyemu``

        ``>>>mc = pyemu.MonteCarlo(jco="pest.jcb")``

        ``>>>mc.draw(1000, obs=True)``

        ``>>>mc.write_psts("mc_", existing_jco="pest.jcb", noptmax=1)``

        """
        self.log("writing realized pest control files")
        # get a copy of the pest control file
        pst = self.pst.get(par_names=self.pst.par_names,obs_names=self.pst.obs_names)

        if noptmax is not None:
            pst.control_data.noptmax = noptmax
            pst.control_data.noptmax = noptmax

        if existing_jco is not None:
            pst.pestpp_options["BASE_JACOBIAN"] = existing_jco

        # set the indices
        pst.parameter_data.index = pst.parameter_data.parnme
        pst.observation_data.index = pst.observation_data.obsnme

        if self.parensemble.istransformed:
            par_en = self.parensemble._back_transform(inplace=False)
        else:
            par_en = self.parensemble

        for i in range(self.num_reals):
            pst_name = prefix + "{0:d}.pst".format(i)
            self.log("writing realized pest control file " + pst_name)
            pst.parameter_data.loc[par_en.columns,"parval1"] = par_en.iloc[i, :].T

            # reset the regularization
            #if pst.control_data.pestmode == "regularization":
                #pst.zero_order_tikhonov(parbounds=True)
                #zero_order_tikhonov(pst,parbounds=True)
            # add the obs noise realization if needed
            if self.obsensemble.shape[0] == self.num_reals:
                pst.observation_data.loc[self.obsensemble.columns,"obsval"] = \
                    self.obsensemble.iloc[i, :].T

            # write
            pst.write(pst_name)
            self.log("writing realized pest control file " + pst_name)
        self.log("writing realized pest control files")
Esempio n. 4
0
File: mc.py Progetto: mnfienen/pyemu
class MonteCarlo(LinearAnalysis):
    """LinearAnalysis derived type for monte carlo analysis

       Note: requires a pest control file, which can be
             derived from a jco argument
             MonteCarlo.project_parsensemble also
             requires a jacobian

    """
    def __init__(self,**kwargs):
        super(MonteCarlo,self).__init__(**kwargs)
        assert self.pst is not None, \
            "monte carlo requires a pest control file"
        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)

    @property
    def num_reals(self):
        return self.parensemble.shape[0]

    def get_nsing(self,epsilon=1.0e-6):
        """ get the number of solution space dimensions given
            a machine floating point precision (epsilon)

        Parameters:
            epsilon: machine floating point precision
        Returns : integer
            number of singular components above the epsilon ratio threshold
        """
        nsing = self.xtqx.shape[0] - np.searchsorted(
                np.sort((self.xtqx.s.x / self.xtqx.s.x.max())[:,0]),epsilon)
        return nsing

    def get_null_proj(self,nsing=None):
        """ get a null-space projection matrix of XTQX

        Parameters:
        ----------
            nsing: optional number of singular components to use
                      if none, call self.get_nsing()
        Returns:
        -------
            Matrix instance : V2V2^T
        """
        if nsing is None:
            nsing = self.get_nsing()

        v2_proj = (self.xtqx.v[:,nsing:] * self.xtqx.v[:,nsing:].T)
        #v2_proj = (self.qhalfx.v[:,nsing:] * self.qhalfx.v[:,nsing:].T)
        #self.__parcov = self.parcov.identity
        return v2_proj

    def draw(self, num_reals=1, par_file = None, obs=False,
             enforce_bounds=False,cov=None):
        """draw stochastic realizations of parameters and
           optionally observations

        Parameters:
        ----------
            num_reals (int): number of realization to generate

            par_file (str): parameter file to use as mean values

            obs (bool): add a realization of measurement noise to obs

            enforce_bounds (bool): enforce parameter bounds in control file


        Returns:
            None
        Raises:
            None
        """
        if par_file is not None:
            self.pst.parrep(par_file)

        if cov is not None:
            assert isinstance(cov,Cov)
        else:
            cov = self.parcov

        self.log("generating {0:d} parameter realizations".format(num_reals))
        self.parensemble.draw(cov,num_reals=num_reals)
        if enforce_bounds:
            self.parensemble.enforce()
        self.log("generating {0:d} parameter realizations".format(num_reals))
        if obs:
            self.log("generating {0:d} observation realizations".format(num_reals))
            self.obsensemble.draw(self.obscov,num_reals=num_reals)
            self.log("generating {0:d} observation realizations".format(num_reals))




    def project_parensemble(self,par_file=None,nsing=None,
                            inplace=True):
        """ perform the null-space projection operations for null-space monte carlo

        Parameters:
            par_file: str
                an optional file of parameter values to use
            nsing: int
                number of singular values to in forming null subspace matrix
            inplace: bool
                overwrite the existing parameter ensemble with the
                projected values
        Returns:
        -------
            if inplace is False, ParameterEnsemble instance, otherwise None
        """
        assert self.jco is not None,"MonteCarlo.project_parensemble()" +\
                                    "requires a jacobian attribute"
        if par_file is not None:
            assert os.path.exists(par_file),"monte_carlo.draw() error: par_file not found:" +\
                par_file
            self.parensemble.pst.parrep(par_file)

        # project the ensemble
        self.log("projecting parameter ensemble")
        en = self.parensemble.project(self.get_null_proj(nsing),inplace=inplace,log=self.log)
        self.log("projecting parameter ensemble")
        return en

    def write_psts(self,prefix):
        """ write parameter and optionally observation realizations
            to pest control files
        Parameters:
        ----------
            prefix: str
                pest control file prefix
        Returns:
        -------
            None
        """
        self.log("writing realized pest control files")
        # get a copy of the pest control file
        pst = self.pst.get(par_names=self.pst.par_names,obs_names=self.pst.obs_names)

        # set the indices
        pst.parameter_data.index = pst.parameter_data.parnme
        pst.observation_data.index = pst.observation_data.obsnme

        if self.parensemble.islog:
            par_en = self.parensemble._back_transform(inplace=False)
        else:
            par_en = self.parensemble

        for i in range(self.num_reals):
            pst_name = prefix + "{0:d}.pst".format(i)
            self.log("writing realized pest control file " + pst_name)
            pst.parameter_data.loc[par_en.columns,"parval1"] = par_en.iloc[i, :].T
            if self.obsensemble.shape[0] == self.num_reals:
                pst.observation_data.loc[self.obsensemble.columns,"obsval"] = \
                    self.obsensemble.iloc[i, :].T
            pst.write(pst_name)
            self.log("writing realized pest control file " + pst_name)
        self.log("writing realized pest control files")
Esempio n. 5
0
class MonteCarlo(LinearAnalysis):
    """LinearAnalysis derived type for monte carlo analysis

       Note: requires a pest control file, which can be
             derived from a jco argument
             MonteCarlo.project_parsensemble also
             requires a jacobian

    """
    def __init__(self,**kwargs):
        super(MonteCarlo,self).__init__(**kwargs)
        assert self.pst is not None, \
            "monte carlo requires a pest control file"
        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)

    @property
    def num_reals(self):
        return self.parensemble.shape[0]

    def get_nsing(self,epsilon=1.0e-4):
        """ get the number of solution space dimensions given
            a ratio between the largest and smallest singular
            values

        Parameters:
            epsilon: ratio
        Returns : integer (or None)
            number of singular components above the epsilon ratio threshold
            If nsing == nadj_par, then None is returned
        """
        mx = self.xtqx.shape[0]
        nsing = mx - np.searchsorted(
                np.sort((self.xtqx.s.x / self.xtqx.s.x.max())[:,0]),epsilon)
        if nsing == mx:
            self.logger.warn("optimal nsing=npar")
            nsing = None
        return nsing

    def get_null_proj(self,nsing=None):
        """ get a null-space projection matrix of XTQX

        Parameters:
        ----------
            nsing: optional number of singular components to use
                      if none, call self.get_nsing()
        Returns:
        -------
            Matrix instance : V2V2^T
        """
        if nsing is None:
            nsing = self.get_nsing()
        if nsing is None:
            raise Exception("nsing is None")
        print("using {0} singular components".format(nsing))
        self.log("forming null space projection matrix with " +\
                 "{0} of {1} singular components".format(nsing,self.jco.shape[1]))

        v2_proj = (self.xtqx.v[:,nsing:] * self.xtqx.v[:,nsing:].T)
        self.log("forming null space projection matrix with " +\
                 "{0} of {1} singular components".format(nsing,self.jco.shape[1]))

        return v2_proj

    def draw(self, num_reals=1, par_file = None, obs=False,
             enforce_bounds=False,cov=None, how="gaussian"):
        """draw stochastic realizations of parameters and
           optionally observations

        Parameters:
        ----------
            num_reals (int): number of realization to generate

            par_file (str): parameter file to use as mean values

            obs (bool): add a realization of measurement noise to obs

            enforce_bounds (bool): enforce parameter bounds in control file

            how (str): type of distribution.  Must be in ["gaussian","uniform"]
        Returns:
            None
        Raises:
            None
        """
        if par_file is not None:
            self.pst.parrep(par_file)
        how = how.lower().strip()
        assert how in ["gaussian","uniform"]

        if cov is not None:
            assert isinstance(cov,Cov)
            if how == "uniform":
                raise Exception("MonteCarlo.draw() error: 'how'='uniform'," +\
                                " 'cov' arg cannot be passed")
        else:
            cov = self.parcov

        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)
        self.log("generating {0:d} parameter realizations".format(num_reals))
        self.parensemble.draw(cov,num_reals=num_reals, how=how)
        if enforce_bounds:
            self.parensemble.enforce()
        self.log("generating {0:d} parameter realizations".format(num_reals))
        if obs:
            self.log("generating {0:d} observation realizations".format(num_reals))
            self.obsensemble.draw(self.obscov,num_reals=num_reals)
            self.log("generating {0:d} observation realizations".format(num_reals))




    def project_parensemble(self,par_file=None,nsing=None,
                            inplace=True):
        """ perform the null-space projection operations for null-space monte carlo

        Parameters:
            par_file: str
                an optional file of parameter values to use
            nsing: int
                number of singular values to in forming null subspace matrix
            inplace: bool
                overwrite the existing parameter ensemble with the
                projected values
        Returns:
        -------
            if inplace is False, ParameterEnsemble instance, otherwise None
        """
        assert self.jco is not None,"MonteCarlo.project_parensemble()" +\
                                    "requires a jacobian attribute"
        if par_file is not None:
            assert os.path.exists(par_file),"monte_carlo.draw() error: par_file not found:" +\
                par_file
            self.parensemble.pst.parrep(par_file)

        # project the ensemble
        self.log("projecting parameter ensemble")
        en = self.parensemble.project(self.get_null_proj(nsing),inplace=inplace,log=self.log)
        self.log("projecting parameter ensemble")
        return en

    def write_psts(self,prefix,existing_jco=None,noptmax=None):
        """ write parameter and optionally observation realizations
            to pest control files
        Parameters:
        ----------
            prefix: str
                pest control file prefix
            existing_jco: str
                filename of an existing jacobian matrix to add to the
                pest++ options in the control file.  This is useful for
                NSMC since this jco can be used to get the first set of
                parameter upgrades for free!  Needs to be the path the jco
                file as seen from the location where pest++ will be run
            noptmax: int
                value of NOPTMAX to set in new pest control files
        Returns:
        -------
            None
        """
        self.log("writing realized pest control files")
        # get a copy of the pest control file
        pst = self.pst.get(par_names=self.pst.par_names,obs_names=self.pst.obs_names)

        if noptmax is not None:
            pst.control_data.noptmax = noptmax
            pst.control_data.noptmax = noptmax

        if existing_jco is not None:
            pst.pestpp_options["BASE_JACOBIAN"] = existing_jco

        # set the indices
        pst.parameter_data.index = pst.parameter_data.parnme
        pst.observation_data.index = pst.observation_data.obsnme

        if self.parensemble.istransformed:
            par_en = self.parensemble._back_transform(inplace=False)
        else:
            par_en = self.parensemble

        for i in range(self.num_reals):
            pst_name = prefix + "{0:d}.pst".format(i)
            self.log("writing realized pest control file " + pst_name)
            pst.parameter_data.loc[par_en.columns,"parval1"] = par_en.iloc[i, :].T

            # reset the regularization
            if pst.control_data.pestmode == "regularization":
                pst.zero_order_tikhonov(parbounds=True)

            # add the obs noise realization if needed
            if self.obsensemble.shape[0] == self.num_reals:
                pst.observation_data.loc[self.obsensemble.columns,"obsval"] = \
                    self.obsensemble.iloc[i, :].T

            # write
            pst.write(pst_name)
            self.log("writing realized pest control file " + pst_name)
        self.log("writing realized pest control files")
Esempio n. 6
0
class MonteCarlo(LinearAnalysis):
    """LinearAnalysis derived type for monte carlo analysis

       Note: requires a pest control file, which can be
             derived from a jco argument
             MonteCarlo.project_parsensemble also
             requires a jacobian

    """
    def __init__(self, **kwargs):
        super(MonteCarlo, self).__init__(**kwargs)
        assert self.pst is not None, \
            "monte carlo requires a pest control file"
        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)

    @property
    def num_reals(self):
        return self.parensemble.shape[0]

    def get_nsing(self, epsilon=1.0e-4):
        """ get the number of solution space dimensions given
            a ratio between the largest and smallest singular
            values

        Parameters:
            epsilon: ratio
        Returns : integer (or None)
            number of singular components above the epsilon ratio threshold
            If nsing == nadj_par, then None is returned
        """
        mx = self.xtqx.shape[0]
        nsing = mx - np.searchsorted(
            np.sort((self.xtqx.s.x / self.xtqx.s.x.max())[:, 0]), epsilon)
        if nsing == mx:
            self.logger.warn("optimal nsing=npar")
            nsing = None
        return nsing

    def get_null_proj(self, nsing=None):
        """ get a null-space projection matrix of XTQX

        Parameters:
        ----------
            nsing: optional number of singular components to use
                      if none, call self.get_nsing()
        Returns:
        -------
            Matrix instance : V2V2^T
        """
        if nsing is None:
            nsing = self.get_nsing()
        if nsing is None:
            raise Exception("nsing is None")
        print("using {0} singular components".format(nsing))
        self.log("forming null space projection matrix with " +\
                 "{0} of {1} singular components".format(nsing,self.jco.shape[1]))

        v2_proj = (self.xtqx.v[:, nsing:] * self.xtqx.v[:, nsing:].T)
        self.log("forming null space projection matrix with " +\
                 "{0} of {1} singular components".format(nsing,self.jco.shape[1]))

        return v2_proj

    def draw(self,
             num_reals=1,
             par_file=None,
             obs=False,
             enforce_bounds=False,
             cov=None,
             how="gaussian"):
        """draw stochastic realizations of parameters and
           optionally observations

        Parameters:
        ----------
            num_reals (int): number of realization to generate

            par_file (str): parameter file to use as mean values

            obs (bool): add a realization of measurement noise to obs

            enforce_bounds (bool): enforce parameter bounds in control file

            how (str): type of distribution.  Must be in ["gaussian","uniform"]
        Returns:
            None
        Raises:
            None
        """
        if par_file is not None:
            self.pst.parrep(par_file)
        how = how.lower().strip()
        assert how in ["gaussian", "uniform"]

        if cov is not None:
            assert isinstance(cov, Cov)
            if how == "uniform":
                raise Exception("MonteCarlo.draw() error: 'how'='uniform'," +\
                                " 'cov' arg cannot be passed")
        else:
            cov = self.parcov

        self.parensemble = ParameterEnsemble(pst=self.pst)
        self.obsensemble = ObservationEnsemble(pst=self.pst)
        self.log("generating {0:d} parameter realizations".format(num_reals))
        self.parensemble.draw(cov, num_reals=num_reals, how=how)
        if enforce_bounds:
            self.parensemble.enforce()
        self.log("generating {0:d} parameter realizations".format(num_reals))
        if obs:
            self.log(
                "generating {0:d} observation realizations".format(num_reals))
            self.obsensemble.draw(self.obscov, num_reals=num_reals)
            self.log(
                "generating {0:d} observation realizations".format(num_reals))

    def project_parensemble(self, par_file=None, nsing=None, inplace=True):
        """ perform the null-space projection operations for null-space monte carlo

        Parameters:
            par_file: str
                an optional file of parameter values to use
            nsing: int
                number of singular values to in forming null subspace matrix
            inplace: bool
                overwrite the existing parameter ensemble with the
                projected values
        Returns:
        -------
            if inplace is False, ParameterEnsemble instance, otherwise None
        """
        assert self.jco is not None,"MonteCarlo.project_parensemble()" +\
                                    "requires a jacobian attribute"
        if par_file is not None:
            assert os.path.exists(par_file),"monte_carlo.draw() error: par_file not found:" +\
                par_file
            self.parensemble.pst.parrep(par_file)

        # project the ensemble
        self.log("projecting parameter ensemble")
        en = self.parensemble.project(self.get_null_proj(nsing),
                                      inplace=inplace,
                                      log=self.log)
        self.log("projecting parameter ensemble")
        return en

    def write_psts(self, prefix, existing_jco=None, noptmax=None):
        """ write parameter and optionally observation realizations
            to pest control files
        Parameters:
        ----------
            prefix: str
                pest control file prefix
            existing_jco: str
                filename of an existing jacobian matrix to add to the
                pest++ options in the control file.  This is useful for
                NSMC since this jco can be used to get the first set of
                parameter upgrades for free!  Needs to be the path the jco
                file as seen from the location where pest++ will be run
            noptmax: int
                value of NOPTMAX to set in new pest control files
        Returns:
        -------
            None
        """
        self.log("writing realized pest control files")
        # get a copy of the pest control file
        pst = self.pst.get(par_names=self.pst.par_names,
                           obs_names=self.pst.obs_names)

        if noptmax is not None:
            pst.control_data.noptmax = noptmax
            pst.control_data.noptmax = noptmax

        if existing_jco is not None:
            pst.pestpp_options["BASE_JACOBIAN"] = existing_jco

        # set the indices
        pst.parameter_data.index = pst.parameter_data.parnme
        pst.observation_data.index = pst.observation_data.obsnme

        if self.parensemble.istransformed:
            par_en = self.parensemble._back_transform(inplace=False)
        else:
            par_en = self.parensemble

        for i in range(self.num_reals):
            pst_name = prefix + "{0:d}.pst".format(i)
            self.log("writing realized pest control file " + pst_name)
            pst.parameter_data.loc[par_en.columns,
                                   "parval1"] = par_en.iloc[i, :].T

            # reset the regularization
            if pst.control_data.pestmode == "regularization":
                pst.zero_order_tikhonov(parbounds=True)

            # add the obs noise realization if needed
            if self.obsensemble.shape[0] == self.num_reals:
                pst.observation_data.loc[self.obsensemble.columns,"obsval"] = \
                    self.obsensemble.iloc[i, :].T

            # write
            pst.write(pst_name)
            self.log("writing realized pest control file " + pst_name)
        self.log("writing realized pest control files")
Esempio n. 7
0
class EnsembleSmoother():
    def __init__(self,
                 pst,
                 parcov=None,
                 obscov=None,
                 num_slaves=0,
                 use_approx_prior=True,
                 submit_file=None,
                 verbose=False,
                 port=4004,
                 slave_dir="template"):
        self.logger = Logger(verbose)
        if verbose is not False:
            self.logger.echo = True
        self.num_slaves = int(num_slaves)
        if submit_file is not None:
            if not os.path.exists(submit_file):
                self.logger.lraise(
                    "submit_file {0} not found".format(submit_file))
        elif num_slaves > 0:
            if not os.path.exists(slave_dir):
                self.logger.lraise(
                    "template dir {0} not found".format(slave_dir))

        self.slave_dir = slave_dir
        self.submit_file = submit_file
        self.port = int(port)
        self.use_approx_prior = bool(use_approx_prior)
        self.paren_prefix = ".parensemble.{0:04d}.csv"
        self.obsen_prefix = ".obsensemble.{0:04d}.csv"

        if isinstance(pst, str):
            pst = Pst(pst)
        assert isinstance(pst, Pst)
        self.pst = pst
        self.sweep_in_csv = pst.pestpp_options.get("sweep_parameter_csv_file",
                                                   "sweep_in.csv")
        self.sweep_out_csv = pst.pestpp_options.get("sweep_output_csv_file",
                                                    "sweep_out.csv")
        if parcov is not None:
            assert isinstance(parcov, Cov)
        else:
            parcov = Cov.from_parameter_data(self.pst)
        if obscov is not None:
            assert isinstance(obscov, Cov)
        else:
            obscov = Cov.from_observation_data(pst)

        self.parcov = parcov
        self.obscov = obscov

        # if restart_iter > 0:
        #     self.restart_iter = restart_iter
        #     paren = self.pst.filename+self.paren_prefix.format(restart_iter)
        #     assert os.path.exists(paren),\
        #         "could not find restart par ensemble {0}".format(paren)
        #     obsen0 = self.pst.filename+self.obsen_prefix.format(0)
        #     assert os.path.exists(obsen0),\
        #         "could not find restart obs ensemble 0 {0}".format(obsen0)
        #     obsen = self.pst.filename+self.obsen_prefix.format(restart_iter)
        #     assert os.path.exists(obsen),\
        #         "could not find restart obs ensemble {0}".format(obsen)
        #     self.restart = True

        self.__initialized = False
        #self.num_reals = 0
        self.half_parcov_diag = None
        self.half_obscov_diag = None
        self.delta_par_prior = None
        self.iter_num = 0
        #self.enforce_bounds = None
        self.raw_sweep_out = None

    @property
    def current_phi(self):
        return pd.DataFrame(data={"phi":self._calc_phi_vec(self.obsensemble)},\
                            index=self.obsensemble.index)

    def initialize(self,
                   num_reals=1,
                   init_lambda=None,
                   enforce_bounds="reset",
                   parensemble=None,
                   obsensemble=None,
                   restart_obsensemble=None):
        '''
        (re)initialize the process
        '''
        # initialize the phi report csv
        self.enforce_bounds = enforce_bounds
        self.phi_csv = open(self.pst.filename + ".iobj.csv", 'w')
        self.phi_csv.write(
            "iter_num,total_runs,lambda,min,max,mean,median,std,")
        self.phi_csv.write(','.join(["{0:010d}".\
                                    format(i+1) for i in range(num_reals)]))
        self.phi_csv.write('\n')
        self.total_runs = 0
        # this matrix gets used a lot, so only calc once and store
        self.obscov_inv_sqrt = self.obscov.get(self.pst.nnz_obs_names).inv.sqrt

        if parensemble is not None and obsensemble is not None:
            self.logger.log("initializing with existing ensembles")
            if isinstance(parensemble, str):
                self.logger.log("loading parensemble from file")
                if not os.path.exists(obsensemble):
                    self.logger.lraise("can not find parensemble file: {0}".\
                                       format(parensemble))
                df = pd.read_csv(parensemble, index_col=0)
                #df.index = [str(i) for i in df.index]
                self.parensemble_0 = ParameterEnsemble.from_dataframe(
                    df=df, pst=self.pst)
                self.logger.log("loading parensemble from file")

            elif isinstance(parensemble, ParameterEnsemble):
                self.parensemble_0 = parensemble.copy()
            else:
                raise Exception("unrecognized arg type for parensemble, " +\
                                "should be filename or ParameterEnsemble" +\
                                ", not {0}".format(type(parensemble)))
            self.parensemble = self.parensemble_0.copy()
            if isinstance(obsensemble, str):
                self.logger.log("loading obsensemble from file")
                if not os.path.exists(obsensemble):
                    self.logger.lraise("can not find obsensemble file: {0}".\
                                       format(obsensemble))
                df = pd.read_csv(obsensemble,
                                 index_col=0).loc[:, self.pst.nnz_obs_names]
                #df.index = [str(i) for i in df.index]
                self.obsensemble_0 = ObservationEnsemble.from_dataframe(
                    df=df, pst=self.pst)
                self.logger.log("loading obsensemble from file")

            elif isinstance(obsensemble, ObservationEnsemble):
                self.obsensemble_0 = obsensemble.copy()
            else:
                raise Exception("unrecognized arg type for obsensemble, " +\
                                "should be filename or ObservationEnsemble" +\
                                ", not {0}".format(type(obsensemble)))

            assert self.parensemble_0.shape[0] == self.obsensemble_0.shape[0]
            #self.num_reals = self.parensemble_0.shape[0]
            self.logger.log("initializing with existing ensembles")

        else:
            self.logger.log(
                "initializing smoother with {0} realizations".format(
                    num_reals))
            #self.num_reals = int(num_reals)
            #assert self.num_reals > 1
            self.logger.log("initializing parensemble")
            self.parensemble_0 = ParameterEnsemble(self.pst)
            self.parensemble_0.draw(cov=self.parcov, num_reals=num_reals)
            self.parensemble_0.enforce(enforce_bounds=enforce_bounds)
            self.logger.log("initializing parensemble")
            self.parensemble = self.parensemble_0.copy()
            self.parensemble_0.to_csv(self.pst.filename +\
                                      self.paren_prefix.format(0))
            self.logger.log("initializing parensemble")
            self.logger.log("initializing obsensemble")
            self.obsensemble_0 = ObservationEnsemble(self.pst)
            self.obsensemble_0.draw(cov=self.obscov, num_reals=num_reals)
            #self.obsensemble = self.obsensemble_0.copy()

            # save the base obsensemble
            self.obsensemble_0.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(-1))
            self.logger.log("initializing obsensemble")
            self.logger.log(
                "initializing smoother with {0} realizations".format(
                    num_reals))

        self.obs0_matrix = self.obsensemble_0.nonzero.as_pyemu_matrix()
        self.enforce_bounds = enforce_bounds

        if restart_obsensemble is not None:
            self.logger.log(
                "loading restart_obsensemble {0}".format(restart_obsensemble))
            failed_runs, self.obsensemble = self._load_obs_ensemble(
                restart_obsensemble)
            assert self.obsensemble.shape[0] == self.obsensemble_0.shape[0]
            assert list(self.obsensemble.columns) == list(
                self.obsensemble_0.columns)
            self.logger.log(
                "loading restart_obsensemble {0}".format(restart_obsensemble))

        else:
            # run the initial parameter ensemble
            self.logger.log("evaluating initial ensembles")
            failed_runs, self.obsensemble = self._calc_obs(self.parensemble)
            self.obsensemble.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(0))
            self.logger.log("evaluating initial ensembles")

        if failed_runs is not None:
            self.logger.warn("dropping failed realizations")
            #failed_runs_str = [str(f) for f in failed_runs]
            self.parensemble = self.parensemble.drop(failed_runs)
            self.obsensemble = self.obsensemble.drop(failed_runs)
        self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
        self._phi_report(self.current_phi_vec, 0.0)

        self.last_best_mean = self.current_phi_vec.mean()
        self.last_best_std = self.current_phi_vec.std()
        self.logger.statement("initial phi (mean, std): {0:15.6G},{1:15.6G}".\
                              format(self.last_best_mean,self.last_best_std))
        if init_lambda is not None:
            self.current_lambda = float(init_lambda)
        else:
            #following chen and oliver
            x = self.last_best_mean / (2.0 * float(self.obsensemble.shape[1]))
            self.current_lambda = 10.0**(np.floor(np.log10(x)))

        # if using the approximate form of the algorithm, let
        # the parameter scaling matrix be the identity matrix
        # jwhite - dec 5 2016 - using the actual parcov inv
        # for upgrades seems to be pushing parameters around
        # too much.  for now, just not using it, maybe
        # better choices of lambda will tame it
        self.logger.statement("current lambda:{0:15.6g}".format(
            self.current_lambda))

        if self.use_approx_prior:
            self.logger.statement("using approximate parcov in solution")
            self.half_parcov_diag = 1.0
        else:
            #self.logger.statement("using full parcov in solution")
            # if self.parcov.isdiagonal:
            #     self.half_parcov_diag = self.parcov.sqrt.inv
            # else:
            #     self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
            #                                 names=self.parcov.col_names,
            #                                 isdiagonal=True).inv.sqrt
            self.half_parcov_diag = 1.0
        self.delta_par_prior = self._calc_delta_par(self.parensemble_0)
        u, s, v = self.delta_par_prior.pseudo_inv_components()
        self.Am = u * s.inv

        self.__initialized = True

    def get_localizer(self):
        onames = self.pst.nnz_obs_names
        pnames = self.pst.adj_par_names
        localizer = Matrix(x=np.ones((len(onames), len(pnames))),
                           row_names=onames,
                           col_names=pnames)
        return localizer

    def _calc_delta_par(self, parensemble):
        '''
        calc the scaled parameter ensemble differences from the mean
        '''
        return self._calc_delta(parensemble, self.half_parcov_diag)

    def _calc_delta_obs(self, obsensemble):
        '''
        calc the scaled observation ensemble differences from the mean
        '''
        return self._calc_delta(obsensemble.nonzero, self.obscov.inv.sqrt)

    def _calc_delta(self, ensemble, scaling_matrix):
        '''
        calc the scaled  ensemble differences from the mean
        '''
        mean = np.array(ensemble.mean(axis=0))
        delta = ensemble.as_pyemu_matrix()
        for i in range(ensemble.shape[0]):
            delta.x[i, :] -= mean
        delta = scaling_matrix * delta.T
        delta *= (1.0 / np.sqrt(float(ensemble.shape[0] - 1.0)))
        return delta

    def _calc_obs(self, parensemble):
        self.logger.log("removing existing sweep in/out files")
        try:
            os.remove(self.sweep_in_csv)
        except Exception as e:
            self.logger.warn(
                "error removing existing sweep in file:{0}".format(str(e)))
        try:
            os.remove(self.sweep_out_csv)
        except Exception as e:
            self.logger.warn(
                "error removing existing sweep out file:{0}".format(str(e)))
        self.logger.log("removing existing sweep in/out files")

        if parensemble.isnull().values.any():
            parensemble.to_csv("_nan.csv")
            self.logger.lraise(
                "_calc_obs() error: NaNs in parensemble (written to '_nan.csv')"
            )

        if self.submit_file is None:
            self._calc_obs_local(parensemble)
        else:
            self._calc_obs_condor(parensemble)

        # make a copy of sweep out for restart purposes
        # sweep_out = str(self.iter_num)+"_raw_"+self.sweep_out_csv
        # if os.path.exists(sweep_out):
        #     os.remove(sweep_out)
        # shutil.copy2(self.sweep_out_csv,sweep_out)

        self.logger.log("reading sweep out csv {0}".format(self.sweep_out_csv))
        failed_runs, obs = self._load_obs_ensemble(self.sweep_out_csv)
        self.logger.log("reading sweep out csv {0}".format(self.sweep_out_csv))
        self.total_runs += obs.shape[0]
        self.logger.statement("total runs:{0}".format(self.total_runs))
        return failed_runs, obs

    def _load_obs_ensemble(self, filename):
        if not os.path.exists(filename):
            self.logger.lraise(
                "obsensemble file {0} does not exists".format(filename))
        obs = pd.read_csv(filename)
        obs.columns = [item.lower() for item in obs.columns]
        self.raw_sweep_out = obs.copy(
        )  # save this for later to support restart
        assert "input_run_id" in obs.columns,\
            "'input_run_id' col missing...need newer version of sweep"
        obs.index = obs.input_run_id
        failed_runs = None
        if 1 in obs.failed_flag.values:
            failed_runs = obs.loc[obs.failed_flag == 1].index.values
            self.logger.warn("{0} runs failed (indices: {1})".\
                             format(len(failed_runs),','.join([str(f) for f in failed_runs])))
        obs = ObservationEnsemble.from_dataframe(
            df=obs.loc[:, self.obscov.row_names], pst=self.pst)
        if obs.isnull().values.any():
            self.logger.lraise("_calc_obs() error: NaNs in obsensemble")
        return failed_runs, obs

    def _get_master_thread(self):
        master_stdout = "_master_stdout.dat"
        master_stderr = "_master_stderr.dat"

        def master():
            try:
                #os.system("sweep {0} /h :{1} 1>{2} 2>{3}". \
                #          format(self.pst.filename, self.port, master_stdout, master_stderr))
                pyemu.helpers.run("sweep {0} /h :{1} 1>{2} 2>{3}". \
                          format(self.pst.filename, self.port, master_stdout, master_stderr))

            except Exception as e:
                self.logger.lraise("error starting condor master: {0}".format(
                    str(e)))
            with open(master_stderr, 'r') as f:
                err_lines = f.readlines()
            if len(err_lines) > 0:
                self.logger.warn("master stderr lines: {0}".format(','.join(
                    [l.strip() for l in err_lines])))

        master_thread = threading.Thread(target=master)
        master_thread.start()
        time.sleep(2.0)
        return master_thread

    def _calc_obs_condor(self, parensemble):
        self.logger.log("evaluating ensemble of size {0} with htcondor".\
                        format(parensemble.shape[0]))

        parensemble.to_csv(self.sweep_in_csv)
        master_thread = self._get_master_thread()
        condor_temp_file = "_condor_submit_stdout.dat"
        condor_err_file = "_condor_submit_stderr.dat"
        self.logger.log("calling condor_submit with submit file {0}".format(
            self.submit_file))
        try:
            os.system("condor_submit {0} 1>{1} 2>{2}".\
                      format(self.submit_file,condor_temp_file,condor_err_file))
        except Exception as e:
            self.logger.lraise("error in condor_submit: {0}".format(str(e)))
        self.logger.log("calling condor_submit with submit file {0}".format(
            self.submit_file))
        time.sleep(
            2.0)  #some time for condor to submit the job and echo to stdout
        condor_submit_string = "submitted to cluster"
        with open(condor_temp_file, 'r') as f:
            lines = f.readlines()
        self.logger.statement("condor_submit stdout: {0}".\
                              format(','.join([l.strip() for l in lines])))
        with open(condor_err_file, 'r') as f:
            err_lines = f.readlines()
        if len(err_lines) > 0:
            self.logger.warn("stderr from condor_submit:{0}".\
                             format([l.strip() for l in err_lines]))
        cluster_number = None
        for line in lines:
            if condor_submit_string in line.lower():
                cluster_number = int(
                    float(line.split(condor_submit_string)[-1]))
        if cluster_number is None:
            self.logger.lraise("couldn't find cluster number...")
        self.logger.statement("condor cluster: {0}".format(cluster_number))
        master_thread.join()
        self.logger.statement("condor master thread exited")
        self.logger.log(
            "calling condor_rm on cluster {0}".format(cluster_number))
        os.system("condor_rm cluster {0}".format(cluster_number))
        self.logger.log(
            "calling condor_rm on cluster {0}".format(cluster_number))
        self.logger.log("evaluating ensemble of size {0} with htcondor".\
                        format(parensemble.shape[0]))

    def _calc_obs_local(self, parensemble):
        '''
        propagate the ensemble forward using sweep.
        '''
        self.logger.log("evaluating ensemble of size {0} locally with sweep".\
                        format(parensemble.shape[0]))
        parensemble.to_csv(self.sweep_in_csv)
        if self.num_slaves > 0:
            master_thread = self._get_master_thread()
            pyemu.utils.start_slaves(self.slave_dir,
                                     "sweep",
                                     self.pst.filename,
                                     self.num_slaves,
                                     slave_root='..',
                                     port=self.port)
            master_thread.join()
        else:
            os.system("sweep {0}".format(self.pst.filename))

        self.logger.log("evaluating ensemble of size {0} locally with sweep".\
                        format(parensemble.shape[0]))

    def _calc_phi_vec(self, obsensemble):
        obs_diff = self._get_residual_matrix(obsensemble)
        #phi_vec = np.diagonal((obs_diff * self.obscov_inv_sqrt.get(row_names=obs_diff.col_names,
        #                                                           col_names=obs_diff.col_names) * obs_diff.T).x)
        q = np.diagonal(
            self.obscov_inv_sqrt.get(row_names=obs_diff.col_names,
                                     col_names=obs_diff.col_names).x)
        phi_vec = []
        for i in range(obs_diff.shape[0]):
            o = obs_diff.x[i, :]
            phi_vec.append(((obs_diff.x[i, :] * q)**2).sum())
        return np.array(phi_vec)

    def _phi_report(self, phi_vec, cur_lam):
        self.phi_csv.write("{0},{1},{2},{3},{4},{5},{6}".format(
            self.iter_num, self.total_runs, cur_lam, phi_vec.min(),
            phi_vec.max(), phi_vec.mean(), np.median(phi_vec), phi_vec.std()))
        self.phi_csv.write(",".join(
            ["{0:20.8}".format(phi) for phi in phi_vec]))
        self.phi_csv.write("\n")
        self.phi_csv.flush()

    def _get_residual_matrix(self, obsensemble):
        obs_matrix = obsensemble.nonzero.as_pyemu_matrix()
        return obs_matrix - self.obs0_matrix.get(
            col_names=obs_matrix.col_names, row_names=obs_matrix.row_names)

    def update(self,
               lambda_mults=[1.0],
               localizer=None,
               run_subset=None,
               use_approx=True):

        if run_subset is not None:
            if run_subset >= self.obsensemble.shape[0]:
                self.logger.warn("run_subset ({0}) >= num of active reals ({1})...ignoring ".\
                                 format(run_subset,self.obsensemble.shape[0]))
                run_subset = None

        self.iter_num += 1
        self.logger.log("iteration {0}".format(self.iter_num))
        self.logger.statement("{0} active realizations".format(
            self.obsensemble.shape[0]))
        if self.obsensemble.shape[0] < 2:
            self.logger.lraise(
                "at least active 2 realizations (really like 300) are needed to update"
            )
        if not self.__initialized:
            #raise Exception("must call initialize() before update()")
            self.logger.lraise("must call initialize() before update()")

        self.logger.log("calculate scaled delta obs")
        scaled_delta_obs = self._calc_delta_obs(self.obsensemble)
        self.logger.log("calculate scaled delta obs")
        self.logger.log("calculate scaled delta par")
        scaled_delta_par = self._calc_delta_par(self.parensemble)
        self.logger.log("calculate scaled delta par")

        self.logger.log("calculate pseudo inv comps")
        u, s, v = scaled_delta_obs.pseudo_inv_components()
        self.logger.log("calculate pseudo inv comps")

        self.logger.log("calculate obs diff matrix")
        obs_diff = self.obscov_inv_sqrt * self._get_residual_matrix(
            self.obsensemble).T
        self.logger.log("calculate obs diff matrix")

        # here is the math part...calculate upgrade matrices
        mean_lam, std_lam, paren_lam, obsen_lam = [], [], [], []
        lam_vals = []
        for ilam, cur_lam_mult in enumerate(lambda_mults):

            parensemble_cur_lam = self.parensemble.copy()
            #print(parensemble_cur_lam.isnull().values.any())

            cur_lam = self.current_lambda * cur_lam_mult
            lam_vals.append(cur_lam)
            self.logger.log("calcs for  lambda {0}".format(cur_lam_mult))
            scaled_ident = Cov.identity_like(s) * (cur_lam + 1.0)
            scaled_ident += s**2
            scaled_ident = scaled_ident.inv

            # build up this matrix as a single element so we can apply
            # localization
            self.logger.log("building upgrade_1 matrix")
            upgrade_1 = -1.0 * (self.half_parcov_diag * scaled_delta_par) *\
                        v * s * scaled_ident * u.T
            self.logger.log("building upgrade_1 matrix")

            # apply localization
            if localizer is not None:
                self.logger.log("applying localization")
                upgrade_1.hadamard_product(localizer)
                self.logger.log("applying localization")

            # apply residual information
            self.logger.log("applying residuals")
            upgrade_1 *= obs_diff
            self.logger.log("applying residuals")

            self.logger.log("processing upgrade_1")
            upgrade_1 = upgrade_1.to_dataframe()
            upgrade_1.index.name = "parnme"
            upgrade_1 = upgrade_1.T
            upgrade_1.index = [int(i) for i in upgrade_1.index]
            upgrade_1.to_csv(self.pst.filename+".upgrade_1.{0:04d}.csv".\
                               format(self.iter_num))
            if upgrade_1.isnull().values.any():
                self.logger.lraise("NaNs in upgrade_1")
            self.logger.log("processing upgrade_1")

            #print(upgrade_1.isnull().values.any())
            #print(parensemble_cur_lam.index)
            #print(upgrade_1.index)
            parensemble_cur_lam += upgrade_1

            # parameter-based upgrade portion
            if not use_approx and self.iter_num > 1:
                self.logger.log("building upgrade_2 matrix")
                par_diff = (self.parensemble - self.parensemble_0.loc[self.parensemble.index,:]).\
                    as_pyemu_matrix().T
                x4 = self.Am.T * self.half_parcov_diag * par_diff
                x5 = self.Am * x4
                x6 = scaled_delta_par.T * x5
                x7 = v * scaled_ident * v.T * x6
                upgrade_2 = -1.0 * (self.half_parcov_diag * scaled_delta_par *
                                    x7).to_dataframe()
                upgrade_2.index.name = "parnme"
                upgrade_2 = upgrade_2.T
                upgrade_2.to_csv(self.pst.filename+".upgrade_2.{0:04d}.csv".\
                                   format(self.iter_num))
                upgrade_2.index = [int(i) for i in upgrade_2.index]

                if upgrade_2.isnull().values.any():
                    self.logger.lraise("NaNs in upgrade_2")

                parensemble_cur_lam += upgrade_2
                self.logger.log("building upgrade_2 matrix")
            parensemble_cur_lam.enforce(self.enforce_bounds)

            # this is for testing failed runs on upgrade testing
            # works with the 10par_xsec smoother test
            #parensemble_cur_lam.iloc[:,:] = -1000000.0

            paren_lam.append(pd.DataFrame(parensemble_cur_lam.loc[:, :]))
            self.logger.log("calcs for  lambda {0}".format(cur_lam_mult))

        # subset if needed
        # and combine lambda par ensembles into one par ensemble for evaluation
        if run_subset is not None and run_subset < self.parensemble.shape[0]:
            #subset_idx = ["{0:d}".format(i) for i in np.random.randint(0,self.parensemble.shape[0]-1,run_subset)]
            subset_idx = self.parensemble.iloc[:run_subset, :].index.values
            self.logger.statement("subset idxs: " +
                                  ','.join([str(s) for s in subset_idx]))
            paren_lam_subset = [pe.loc[subset_idx, :] for pe in paren_lam]
            paren_combine = pd.concat(paren_lam_subset, ignore_index=True)
            paren_lam_subset = None
        else:
            subset_idx = self.parensemble.index.values
            paren_combine = pd.concat(paren_lam, ignore_index=True)


        self.logger.log("evaluating ensembles for lambdas : {0}".\
                        format(','.join(["{0:8.3E}".format(l) for l in lam_vals])))
        failed_runs, obsen_combine = self._calc_obs(paren_combine)
        #if failed_runs is not None:
        #    obsen_combine.loc[failed_runs,:] = np.NaN
        self.logger.log("evaluating ensembles for lambdas : {0}".\
                        format(','.join(["{0:8.3E}".format(l) for l in lam_vals])))
        paren_combine = None

        if failed_runs is not None and len(
                failed_runs) == obsen_combine.shape[0]:
            self.logger.lraise("all runs failed - cannot continue")

        # unpack lambda obs ensembles from combined obs ensemble
        nrun_per_lam = self.obsensemble.shape[0]
        if run_subset is not None:
            nrun_per_lam = run_subset
        obsen_lam = []
        for i in range(len(lam_vals)):
            sidx = i * nrun_per_lam
            eidx = sidx + nrun_per_lam
            oe = ObservationEnsemble.from_dataframe(
                df=obsen_combine.iloc[sidx:eidx, :].copy(), pst=self.pst)
            oe.index = subset_idx
            # check for failed runs in this set - drop failed runs from obs ensembles
            if failed_runs is not None:
                failed_runs_this = np.array(
                    [f for f in failed_runs if f >= sidx and f < eidx]) - sidx
                if len(failed_runs_this) > 0:
                    if len(failed_runs_this) == oe.shape[0]:
                        self.logger.warn(
                            "all runs failed for lambda {0}".format(
                                lam_vals[i]))
                    else:
                        self.logger.warn("{0} run failed for lambda {1}".\
                                         format(len(failed_runs_this),lam_vals[i]))
                    oe.iloc[failed_runs_this, :] = np.NaN
                    oe = oe.dropna()
            obsen_lam.append(oe)
        obsen_combine = None

        # here is where we need to select out the "best" lambda par and obs
        # ensembles
        self.logger.statement("\n**************************")
        self.logger.statement(str(datetime.now()))
        self.logger.statement("total runs:{0}".format(self.total_runs))
        self.logger.statement("iteration: {0}".format(self.iter_num))
        self.logger.statement("current lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                              format(self.current_lambda,
                         self.last_best_mean,self.last_best_std))
        phi_vecs = [self._calc_phi_vec(obsen) for obsen in obsen_lam]
        mean_std = [(pv.mean(), pv.std()) for pv in phi_vecs]
        update_pars = False
        update_lambda = False
        # accept a new best if its within 10%
        best_mean = self.last_best_mean * 1.1
        best_std = self.last_best_std * 1.1
        best_i = 0
        for i, (m, s) in enumerate(mean_std):
            self.logger.statement(" tested lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                                 format(self.current_lambda * lambda_mults[i],m,s))
            if m < best_mean:
                update_pars = True
                best_mean = m
                best_i = i
                if s < best_std:
                    update_lambda = True
                    best_std = s
        if np.isnan(best_mean):
            self.logger.lraise("best mean = NaN")
        if np.isnan(best_std):
            self.logger.lraise("best std = NaN")

        if not update_pars:
            self.current_lambda *= max(lambda_mults) * 10.0
            self.current_lambda = min(self.current_lambda, 100000)
            self.logger.statement("not accepting iteration, increased lambda:{0}".\
                  format(self.current_lambda))
        else:
            self.parensemble = ParameterEnsemble.from_dataframe(
                df=paren_lam[best_i], pst=self.pst)
            if run_subset is not None:
                failed_runs, self.obsensemble = self._calc_obs(
                    self.parensemble)
                if failed_runs is not None:
                    self.logger.warn("dropping failed realizations")
                    self.parensemble = self.parensemble.drop(failed_runs)
                    self.obsensemble = self.obsensemble.drop(failed_runs)
                self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
                self._phi_report(self.current_phi_vec,
                                 self.current_lambda * lambda_mults[best_i])
                best_mean = self.current_phi_vec.mean()
                best_std = self.current_phi_vec.std()
            else:
                self.obsensemble = obsen_lam[best_i]
                # reindex parensemble in case failed runs
                self.parensemble = ParameterEnsemble.from_dataframe(
                    df=self.parensemble.loc[self.obsensemble.index],
                    pst=self.pst)
                self._phi_report(phi_vecs[best_i],
                                 self.current_lambda * lambda_mults[best_i])
                self.current_phi_vec = phi_vecs[best_i]

            self.logger.statement("   best lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda*lambda_mults[best_i],
                         best_mean,best_std))
            self.last_best_mean = best_mean
            self.last_best_std = best_std

        if update_lambda:
            # be aggressive
            self.current_lambda *= (lambda_mults[best_i] * 0.75)
            # but don't let lambda get too small
            self.current_lambda = max(self.current_lambda, 0.001)
            self.logger.statement("updating lambda: {0:15.6G}".\
                  format(self.current_lambda ))

        self.logger.statement("**************************\n")
        self.parensemble.to_csv(self.pst.filename+self.paren_prefix.\
                                    format(self.iter_num))
        self.obsensemble.to_csv(self.pst.filename+self.obsen_prefix.\
                                    format(self.iter_num))
        if self.raw_sweep_out is not None:
            self.raw_sweep_out.to_csv(self.pst.filename+"_raw{0}".\
                                        format(self.iter_num))
        self.logger.log("iteration {0}".format(self.iter_num))