예제 #1
0
파일: smoother.py 프로젝트: xuexianwu/pyemu
class EnsembleSmoother():
    def __init__(self,
                 pst,
                 parcov=None,
                 obscov=None,
                 num_slaves=0,
                 use_approx_prior=True,
                 submit_file=None,
                 verbose=False,
                 port=4004,
                 slave_dir="template"):
        self.logger = Logger(verbose)
        if verbose is not False:
            self.logger.echo = True
        self.num_slaves = int(num_slaves)
        if submit_file is not None:
            if not os.path.exists(submit_file):
                self.logger.lraise(
                    "submit_file {0} not found".format(submit_file))
        elif num_slaves > 0:
            if not os.path.exists(slave_dir):
                self.logger.lraise(
                    "template dir {0} not found".format(slave_dir))

        self.slave_dir = slave_dir
        self.submit_file = submit_file
        self.port = int(port)
        self.use_approx_prior = bool(use_approx_prior)
        self.paren_prefix = ".parensemble.{0:04d}.csv"
        self.obsen_prefix = ".obsensemble.{0:04d}.csv"

        if isinstance(pst, str):
            pst = Pst(pst)
        assert isinstance(pst, Pst)
        self.pst = pst
        self.sweep_in_csv = pst.pestpp_options.get("sweep_parameter_csv_file",
                                                   "sweep_in.csv")
        self.sweep_out_csv = pst.pestpp_options.get("sweep_output_csv_file",
                                                    "sweep_out.csv")
        if parcov is not None:
            assert isinstance(parcov, Cov)
        else:
            parcov = Cov.from_parameter_data(self.pst)
        if obscov is not None:
            assert isinstance(obscov, Cov)
        else:
            obscov = Cov.from_observation_data(pst)

        self.parcov = parcov
        self.obscov = obscov

        # if restart_iter > 0:
        #     self.restart_iter = restart_iter
        #     paren = self.pst.filename+self.paren_prefix.format(restart_iter)
        #     assert os.path.exists(paren),\
        #         "could not find restart par ensemble {0}".format(paren)
        #     obsen0 = self.pst.filename+self.obsen_prefix.format(0)
        #     assert os.path.exists(obsen0),\
        #         "could not find restart obs ensemble 0 {0}".format(obsen0)
        #     obsen = self.pst.filename+self.obsen_prefix.format(restart_iter)
        #     assert os.path.exists(obsen),\
        #         "could not find restart obs ensemble {0}".format(obsen)
        #     self.restart = True

        self.__initialized = False
        #self.num_reals = 0
        self.half_parcov_diag = None
        self.half_obscov_diag = None
        self.delta_par_prior = None
        self.iter_num = 0
        #self.enforce_bounds = None
        self.raw_sweep_out = None

    @property
    def current_phi(self):
        return pd.DataFrame(data={"phi":self._calc_phi_vec(self.obsensemble)},\
                            index=self.obsensemble.index)

    def initialize(self,
                   num_reals=1,
                   init_lambda=None,
                   enforce_bounds="reset",
                   parensemble=None,
                   obsensemble=None,
                   restart_obsensemble=None):
        '''
        (re)initialize the process
        '''
        # initialize the phi report csv
        self.enforce_bounds = enforce_bounds
        self.phi_csv = open(self.pst.filename + ".iobj.csv", 'w')
        self.phi_csv.write(
            "iter_num,total_runs,lambda,min,max,mean,median,std,")
        self.phi_csv.write(','.join(["{0:010d}".\
                                    format(i+1) for i in range(num_reals)]))
        self.phi_csv.write('\n')
        self.total_runs = 0
        # this matrix gets used a lot, so only calc once and store
        self.obscov_inv_sqrt = self.obscov.get(self.pst.nnz_obs_names).inv.sqrt

        if parensemble is not None and obsensemble is not None:
            self.logger.log("initializing with existing ensembles")
            if isinstance(parensemble, str):
                self.logger.log("loading parensemble from file")
                if not os.path.exists(obsensemble):
                    self.logger.lraise("can not find parensemble file: {0}".\
                                       format(parensemble))
                df = pd.read_csv(parensemble, index_col=0)
                #df.index = [str(i) for i in df.index]
                self.parensemble_0 = ParameterEnsemble.from_dataframe(
                    df=df, pst=self.pst)
                self.logger.log("loading parensemble from file")

            elif isinstance(parensemble, ParameterEnsemble):
                self.parensemble_0 = parensemble.copy()
            else:
                raise Exception("unrecognized arg type for parensemble, " +\
                                "should be filename or ParameterEnsemble" +\
                                ", not {0}".format(type(parensemble)))
            self.parensemble = self.parensemble_0.copy()
            if isinstance(obsensemble, str):
                self.logger.log("loading obsensemble from file")
                if not os.path.exists(obsensemble):
                    self.logger.lraise("can not find obsensemble file: {0}".\
                                       format(obsensemble))
                df = pd.read_csv(obsensemble,
                                 index_col=0).loc[:, self.pst.nnz_obs_names]
                #df.index = [str(i) for i in df.index]
                self.obsensemble_0 = ObservationEnsemble.from_dataframe(
                    df=df, pst=self.pst)
                self.logger.log("loading obsensemble from file")

            elif isinstance(obsensemble, ObservationEnsemble):
                self.obsensemble_0 = obsensemble.copy()
            else:
                raise Exception("unrecognized arg type for obsensemble, " +\
                                "should be filename or ObservationEnsemble" +\
                                ", not {0}".format(type(obsensemble)))

            assert self.parensemble_0.shape[0] == self.obsensemble_0.shape[0]
            #self.num_reals = self.parensemble_0.shape[0]
            self.logger.log("initializing with existing ensembles")

        else:
            self.logger.log(
                "initializing smoother with {0} realizations".format(
                    num_reals))
            #self.num_reals = int(num_reals)
            #assert self.num_reals > 1
            self.logger.log("initializing parensemble")
            self.parensemble_0 = ParameterEnsemble(self.pst)
            self.parensemble_0.draw(cov=self.parcov, num_reals=num_reals)
            self.parensemble_0.enforce(enforce_bounds=enforce_bounds)
            self.logger.log("initializing parensemble")
            self.parensemble = self.parensemble_0.copy()
            self.parensemble_0.to_csv(self.pst.filename +\
                                      self.paren_prefix.format(0))
            self.logger.log("initializing parensemble")
            self.logger.log("initializing obsensemble")
            self.obsensemble_0 = ObservationEnsemble(self.pst)
            self.obsensemble_0.draw(cov=self.obscov, num_reals=num_reals)
            #self.obsensemble = self.obsensemble_0.copy()

            # save the base obsensemble
            self.obsensemble_0.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(-1))
            self.logger.log("initializing obsensemble")
            self.logger.log(
                "initializing smoother with {0} realizations".format(
                    num_reals))

        self.obs0_matrix = self.obsensemble_0.nonzero.as_pyemu_matrix()
        self.enforce_bounds = enforce_bounds

        if restart_obsensemble is not None:
            self.logger.log(
                "loading restart_obsensemble {0}".format(restart_obsensemble))
            failed_runs, self.obsensemble = self._load_obs_ensemble(
                restart_obsensemble)
            assert self.obsensemble.shape[0] == self.obsensemble_0.shape[0]
            assert list(self.obsensemble.columns) == list(
                self.obsensemble_0.columns)
            self.logger.log(
                "loading restart_obsensemble {0}".format(restart_obsensemble))

        else:
            # run the initial parameter ensemble
            self.logger.log("evaluating initial ensembles")
            failed_runs, self.obsensemble = self._calc_obs(self.parensemble)
            self.obsensemble.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(0))
            self.logger.log("evaluating initial ensembles")

        if failed_runs is not None:
            self.logger.warn("dropping failed realizations")
            #failed_runs_str = [str(f) for f in failed_runs]
            self.parensemble = self.parensemble.drop(failed_runs)
            self.obsensemble = self.obsensemble.drop(failed_runs)
        self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
        self._phi_report(self.current_phi_vec, 0.0)

        self.last_best_mean = self.current_phi_vec.mean()
        self.last_best_std = self.current_phi_vec.std()
        self.logger.statement("initial phi (mean, std): {0:15.6G},{1:15.6G}".\
                              format(self.last_best_mean,self.last_best_std))
        if init_lambda is not None:
            self.current_lambda = float(init_lambda)
        else:
            #following chen and oliver
            x = self.last_best_mean / (2.0 * float(self.obsensemble.shape[1]))
            self.current_lambda = 10.0**(np.floor(np.log10(x)))

        # if using the approximate form of the algorithm, let
        # the parameter scaling matrix be the identity matrix
        # jwhite - dec 5 2016 - using the actual parcov inv
        # for upgrades seems to be pushing parameters around
        # too much.  for now, just not using it, maybe
        # better choices of lambda will tame it
        self.logger.statement("current lambda:{0:15.6g}".format(
            self.current_lambda))

        if self.use_approx_prior:
            self.logger.statement("using approximate parcov in solution")
            self.half_parcov_diag = 1.0
        else:
            #self.logger.statement("using full parcov in solution")
            # if self.parcov.isdiagonal:
            #     self.half_parcov_diag = self.parcov.sqrt.inv
            # else:
            #     self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
            #                                 names=self.parcov.col_names,
            #                                 isdiagonal=True).inv.sqrt
            self.half_parcov_diag = 1.0
        self.delta_par_prior = self._calc_delta_par(self.parensemble_0)
        u, s, v = self.delta_par_prior.pseudo_inv_components()
        self.Am = u * s.inv

        self.__initialized = True

    def get_localizer(self):
        onames = self.pst.nnz_obs_names
        pnames = self.pst.adj_par_names
        localizer = Matrix(x=np.ones((len(onames), len(pnames))),
                           row_names=onames,
                           col_names=pnames)
        return localizer

    def _calc_delta_par(self, parensemble):
        '''
        calc the scaled parameter ensemble differences from the mean
        '''
        return self._calc_delta(parensemble, self.half_parcov_diag)

    def _calc_delta_obs(self, obsensemble):
        '''
        calc the scaled observation ensemble differences from the mean
        '''
        return self._calc_delta(obsensemble.nonzero, self.obscov.inv.sqrt)

    def _calc_delta(self, ensemble, scaling_matrix):
        '''
        calc the scaled  ensemble differences from the mean
        '''
        mean = np.array(ensemble.mean(axis=0))
        delta = ensemble.as_pyemu_matrix()
        for i in range(ensemble.shape[0]):
            delta.x[i, :] -= mean
        delta = scaling_matrix * delta.T
        delta *= (1.0 / np.sqrt(float(ensemble.shape[0] - 1.0)))
        return delta

    def _calc_obs(self, parensemble):
        self.logger.log("removing existing sweep in/out files")
        try:
            os.remove(self.sweep_in_csv)
        except Exception as e:
            self.logger.warn(
                "error removing existing sweep in file:{0}".format(str(e)))
        try:
            os.remove(self.sweep_out_csv)
        except Exception as e:
            self.logger.warn(
                "error removing existing sweep out file:{0}".format(str(e)))
        self.logger.log("removing existing sweep in/out files")

        if parensemble.isnull().values.any():
            parensemble.to_csv("_nan.csv")
            self.logger.lraise(
                "_calc_obs() error: NaNs in parensemble (written to '_nan.csv')"
            )

        if self.submit_file is None:
            self._calc_obs_local(parensemble)
        else:
            self._calc_obs_condor(parensemble)

        # make a copy of sweep out for restart purposes
        # sweep_out = str(self.iter_num)+"_raw_"+self.sweep_out_csv
        # if os.path.exists(sweep_out):
        #     os.remove(sweep_out)
        # shutil.copy2(self.sweep_out_csv,sweep_out)

        self.logger.log("reading sweep out csv {0}".format(self.sweep_out_csv))
        failed_runs, obs = self._load_obs_ensemble(self.sweep_out_csv)
        self.logger.log("reading sweep out csv {0}".format(self.sweep_out_csv))
        self.total_runs += obs.shape[0]
        self.logger.statement("total runs:{0}".format(self.total_runs))
        return failed_runs, obs

    def _load_obs_ensemble(self, filename):
        if not os.path.exists(filename):
            self.logger.lraise(
                "obsensemble file {0} does not exists".format(filename))
        obs = pd.read_csv(filename)
        obs.columns = [item.lower() for item in obs.columns]
        self.raw_sweep_out = obs.copy(
        )  # save this for later to support restart
        assert "input_run_id" in obs.columns,\
            "'input_run_id' col missing...need newer version of sweep"
        obs.index = obs.input_run_id
        failed_runs = None
        if 1 in obs.failed_flag.values:
            failed_runs = obs.loc[obs.failed_flag == 1].index.values
            self.logger.warn("{0} runs failed (indices: {1})".\
                             format(len(failed_runs),','.join([str(f) for f in failed_runs])))
        obs = ObservationEnsemble.from_dataframe(
            df=obs.loc[:, self.obscov.row_names], pst=self.pst)
        if obs.isnull().values.any():
            self.logger.lraise("_calc_obs() error: NaNs in obsensemble")
        return failed_runs, obs

    def _get_master_thread(self):
        master_stdout = "_master_stdout.dat"
        master_stderr = "_master_stderr.dat"

        def master():
            try:
                #os.system("sweep {0} /h :{1} 1>{2} 2>{3}". \
                #          format(self.pst.filename, self.port, master_stdout, master_stderr))
                pyemu.helpers.run("sweep {0} /h :{1} 1>{2} 2>{3}". \
                          format(self.pst.filename, self.port, master_stdout, master_stderr))

            except Exception as e:
                self.logger.lraise("error starting condor master: {0}".format(
                    str(e)))
            with open(master_stderr, 'r') as f:
                err_lines = f.readlines()
            if len(err_lines) > 0:
                self.logger.warn("master stderr lines: {0}".format(','.join(
                    [l.strip() for l in err_lines])))

        master_thread = threading.Thread(target=master)
        master_thread.start()
        time.sleep(2.0)
        return master_thread

    def _calc_obs_condor(self, parensemble):
        self.logger.log("evaluating ensemble of size {0} with htcondor".\
                        format(parensemble.shape[0]))

        parensemble.to_csv(self.sweep_in_csv)
        master_thread = self._get_master_thread()
        condor_temp_file = "_condor_submit_stdout.dat"
        condor_err_file = "_condor_submit_stderr.dat"
        self.logger.log("calling condor_submit with submit file {0}".format(
            self.submit_file))
        try:
            os.system("condor_submit {0} 1>{1} 2>{2}".\
                      format(self.submit_file,condor_temp_file,condor_err_file))
        except Exception as e:
            self.logger.lraise("error in condor_submit: {0}".format(str(e)))
        self.logger.log("calling condor_submit with submit file {0}".format(
            self.submit_file))
        time.sleep(
            2.0)  #some time for condor to submit the job and echo to stdout
        condor_submit_string = "submitted to cluster"
        with open(condor_temp_file, 'r') as f:
            lines = f.readlines()
        self.logger.statement("condor_submit stdout: {0}".\
                              format(','.join([l.strip() for l in lines])))
        with open(condor_err_file, 'r') as f:
            err_lines = f.readlines()
        if len(err_lines) > 0:
            self.logger.warn("stderr from condor_submit:{0}".\
                             format([l.strip() for l in err_lines]))
        cluster_number = None
        for line in lines:
            if condor_submit_string in line.lower():
                cluster_number = int(
                    float(line.split(condor_submit_string)[-1]))
        if cluster_number is None:
            self.logger.lraise("couldn't find cluster number...")
        self.logger.statement("condor cluster: {0}".format(cluster_number))
        master_thread.join()
        self.logger.statement("condor master thread exited")
        self.logger.log(
            "calling condor_rm on cluster {0}".format(cluster_number))
        os.system("condor_rm cluster {0}".format(cluster_number))
        self.logger.log(
            "calling condor_rm on cluster {0}".format(cluster_number))
        self.logger.log("evaluating ensemble of size {0} with htcondor".\
                        format(parensemble.shape[0]))

    def _calc_obs_local(self, parensemble):
        '''
        propagate the ensemble forward using sweep.
        '''
        self.logger.log("evaluating ensemble of size {0} locally with sweep".\
                        format(parensemble.shape[0]))
        parensemble.to_csv(self.sweep_in_csv)
        if self.num_slaves > 0:
            master_thread = self._get_master_thread()
            pyemu.utils.start_slaves(self.slave_dir,
                                     "sweep",
                                     self.pst.filename,
                                     self.num_slaves,
                                     slave_root='..',
                                     port=self.port)
            master_thread.join()
        else:
            os.system("sweep {0}".format(self.pst.filename))

        self.logger.log("evaluating ensemble of size {0} locally with sweep".\
                        format(parensemble.shape[0]))

    def _calc_phi_vec(self, obsensemble):
        obs_diff = self._get_residual_matrix(obsensemble)
        #phi_vec = np.diagonal((obs_diff * self.obscov_inv_sqrt.get(row_names=obs_diff.col_names,
        #                                                           col_names=obs_diff.col_names) * obs_diff.T).x)
        q = np.diagonal(
            self.obscov_inv_sqrt.get(row_names=obs_diff.col_names,
                                     col_names=obs_diff.col_names).x)
        phi_vec = []
        for i in range(obs_diff.shape[0]):
            o = obs_diff.x[i, :]
            phi_vec.append(((obs_diff.x[i, :] * q)**2).sum())
        return np.array(phi_vec)

    def _phi_report(self, phi_vec, cur_lam):
        self.phi_csv.write("{0},{1},{2},{3},{4},{5},{6}".format(
            self.iter_num, self.total_runs, cur_lam, phi_vec.min(),
            phi_vec.max(), phi_vec.mean(), np.median(phi_vec), phi_vec.std()))
        self.phi_csv.write(",".join(
            ["{0:20.8}".format(phi) for phi in phi_vec]))
        self.phi_csv.write("\n")
        self.phi_csv.flush()

    def _get_residual_matrix(self, obsensemble):
        obs_matrix = obsensemble.nonzero.as_pyemu_matrix()
        return obs_matrix - self.obs0_matrix.get(
            col_names=obs_matrix.col_names, row_names=obs_matrix.row_names)

    def update(self,
               lambda_mults=[1.0],
               localizer=None,
               run_subset=None,
               use_approx=True):

        if run_subset is not None:
            if run_subset >= self.obsensemble.shape[0]:
                self.logger.warn("run_subset ({0}) >= num of active reals ({1})...ignoring ".\
                                 format(run_subset,self.obsensemble.shape[0]))
                run_subset = None

        self.iter_num += 1
        self.logger.log("iteration {0}".format(self.iter_num))
        self.logger.statement("{0} active realizations".format(
            self.obsensemble.shape[0]))
        if self.obsensemble.shape[0] < 2:
            self.logger.lraise(
                "at least active 2 realizations (really like 300) are needed to update"
            )
        if not self.__initialized:
            #raise Exception("must call initialize() before update()")
            self.logger.lraise("must call initialize() before update()")

        self.logger.log("calculate scaled delta obs")
        scaled_delta_obs = self._calc_delta_obs(self.obsensemble)
        self.logger.log("calculate scaled delta obs")
        self.logger.log("calculate scaled delta par")
        scaled_delta_par = self._calc_delta_par(self.parensemble)
        self.logger.log("calculate scaled delta par")

        self.logger.log("calculate pseudo inv comps")
        u, s, v = scaled_delta_obs.pseudo_inv_components()
        self.logger.log("calculate pseudo inv comps")

        self.logger.log("calculate obs diff matrix")
        obs_diff = self.obscov_inv_sqrt * self._get_residual_matrix(
            self.obsensemble).T
        self.logger.log("calculate obs diff matrix")

        # here is the math part...calculate upgrade matrices
        mean_lam, std_lam, paren_lam, obsen_lam = [], [], [], []
        lam_vals = []
        for ilam, cur_lam_mult in enumerate(lambda_mults):

            parensemble_cur_lam = self.parensemble.copy()
            #print(parensemble_cur_lam.isnull().values.any())

            cur_lam = self.current_lambda * cur_lam_mult
            lam_vals.append(cur_lam)
            self.logger.log("calcs for  lambda {0}".format(cur_lam_mult))
            scaled_ident = Cov.identity_like(s) * (cur_lam + 1.0)
            scaled_ident += s**2
            scaled_ident = scaled_ident.inv

            # build up this matrix as a single element so we can apply
            # localization
            self.logger.log("building upgrade_1 matrix")
            upgrade_1 = -1.0 * (self.half_parcov_diag * scaled_delta_par) *\
                        v * s * scaled_ident * u.T
            self.logger.log("building upgrade_1 matrix")

            # apply localization
            if localizer is not None:
                self.logger.log("applying localization")
                upgrade_1.hadamard_product(localizer)
                self.logger.log("applying localization")

            # apply residual information
            self.logger.log("applying residuals")
            upgrade_1 *= obs_diff
            self.logger.log("applying residuals")

            self.logger.log("processing upgrade_1")
            upgrade_1 = upgrade_1.to_dataframe()
            upgrade_1.index.name = "parnme"
            upgrade_1 = upgrade_1.T
            upgrade_1.index = [int(i) for i in upgrade_1.index]
            upgrade_1.to_csv(self.pst.filename+".upgrade_1.{0:04d}.csv".\
                               format(self.iter_num))
            if upgrade_1.isnull().values.any():
                self.logger.lraise("NaNs in upgrade_1")
            self.logger.log("processing upgrade_1")

            #print(upgrade_1.isnull().values.any())
            #print(parensemble_cur_lam.index)
            #print(upgrade_1.index)
            parensemble_cur_lam += upgrade_1

            # parameter-based upgrade portion
            if not use_approx and self.iter_num > 1:
                self.logger.log("building upgrade_2 matrix")
                par_diff = (self.parensemble - self.parensemble_0.loc[self.parensemble.index,:]).\
                    as_pyemu_matrix().T
                x4 = self.Am.T * self.half_parcov_diag * par_diff
                x5 = self.Am * x4
                x6 = scaled_delta_par.T * x5
                x7 = v * scaled_ident * v.T * x6
                upgrade_2 = -1.0 * (self.half_parcov_diag * scaled_delta_par *
                                    x7).to_dataframe()
                upgrade_2.index.name = "parnme"
                upgrade_2 = upgrade_2.T
                upgrade_2.to_csv(self.pst.filename+".upgrade_2.{0:04d}.csv".\
                                   format(self.iter_num))
                upgrade_2.index = [int(i) for i in upgrade_2.index]

                if upgrade_2.isnull().values.any():
                    self.logger.lraise("NaNs in upgrade_2")

                parensemble_cur_lam += upgrade_2
                self.logger.log("building upgrade_2 matrix")
            parensemble_cur_lam.enforce(self.enforce_bounds)

            # this is for testing failed runs on upgrade testing
            # works with the 10par_xsec smoother test
            #parensemble_cur_lam.iloc[:,:] = -1000000.0

            paren_lam.append(pd.DataFrame(parensemble_cur_lam.loc[:, :]))
            self.logger.log("calcs for  lambda {0}".format(cur_lam_mult))

        # subset if needed
        # and combine lambda par ensembles into one par ensemble for evaluation
        if run_subset is not None and run_subset < self.parensemble.shape[0]:
            #subset_idx = ["{0:d}".format(i) for i in np.random.randint(0,self.parensemble.shape[0]-1,run_subset)]
            subset_idx = self.parensemble.iloc[:run_subset, :].index.values
            self.logger.statement("subset idxs: " +
                                  ','.join([str(s) for s in subset_idx]))
            paren_lam_subset = [pe.loc[subset_idx, :] for pe in paren_lam]
            paren_combine = pd.concat(paren_lam_subset, ignore_index=True)
            paren_lam_subset = None
        else:
            subset_idx = self.parensemble.index.values
            paren_combine = pd.concat(paren_lam, ignore_index=True)


        self.logger.log("evaluating ensembles for lambdas : {0}".\
                        format(','.join(["{0:8.3E}".format(l) for l in lam_vals])))
        failed_runs, obsen_combine = self._calc_obs(paren_combine)
        #if failed_runs is not None:
        #    obsen_combine.loc[failed_runs,:] = np.NaN
        self.logger.log("evaluating ensembles for lambdas : {0}".\
                        format(','.join(["{0:8.3E}".format(l) for l in lam_vals])))
        paren_combine = None

        if failed_runs is not None and len(
                failed_runs) == obsen_combine.shape[0]:
            self.logger.lraise("all runs failed - cannot continue")

        # unpack lambda obs ensembles from combined obs ensemble
        nrun_per_lam = self.obsensemble.shape[0]
        if run_subset is not None:
            nrun_per_lam = run_subset
        obsen_lam = []
        for i in range(len(lam_vals)):
            sidx = i * nrun_per_lam
            eidx = sidx + nrun_per_lam
            oe = ObservationEnsemble.from_dataframe(
                df=obsen_combine.iloc[sidx:eidx, :].copy(), pst=self.pst)
            oe.index = subset_idx
            # check for failed runs in this set - drop failed runs from obs ensembles
            if failed_runs is not None:
                failed_runs_this = np.array(
                    [f for f in failed_runs if f >= sidx and f < eidx]) - sidx
                if len(failed_runs_this) > 0:
                    if len(failed_runs_this) == oe.shape[0]:
                        self.logger.warn(
                            "all runs failed for lambda {0}".format(
                                lam_vals[i]))
                    else:
                        self.logger.warn("{0} run failed for lambda {1}".\
                                         format(len(failed_runs_this),lam_vals[i]))
                    oe.iloc[failed_runs_this, :] = np.NaN
                    oe = oe.dropna()
            obsen_lam.append(oe)
        obsen_combine = None

        # here is where we need to select out the "best" lambda par and obs
        # ensembles
        self.logger.statement("\n**************************")
        self.logger.statement(str(datetime.now()))
        self.logger.statement("total runs:{0}".format(self.total_runs))
        self.logger.statement("iteration: {0}".format(self.iter_num))
        self.logger.statement("current lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                              format(self.current_lambda,
                         self.last_best_mean,self.last_best_std))
        phi_vecs = [self._calc_phi_vec(obsen) for obsen in obsen_lam]
        mean_std = [(pv.mean(), pv.std()) for pv in phi_vecs]
        update_pars = False
        update_lambda = False
        # accept a new best if its within 10%
        best_mean = self.last_best_mean * 1.1
        best_std = self.last_best_std * 1.1
        best_i = 0
        for i, (m, s) in enumerate(mean_std):
            self.logger.statement(" tested lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                                 format(self.current_lambda * lambda_mults[i],m,s))
            if m < best_mean:
                update_pars = True
                best_mean = m
                best_i = i
                if s < best_std:
                    update_lambda = True
                    best_std = s
        if np.isnan(best_mean):
            self.logger.lraise("best mean = NaN")
        if np.isnan(best_std):
            self.logger.lraise("best std = NaN")

        if not update_pars:
            self.current_lambda *= max(lambda_mults) * 10.0
            self.current_lambda = min(self.current_lambda, 100000)
            self.logger.statement("not accepting iteration, increased lambda:{0}".\
                  format(self.current_lambda))
        else:
            self.parensemble = ParameterEnsemble.from_dataframe(
                df=paren_lam[best_i], pst=self.pst)
            if run_subset is not None:
                failed_runs, self.obsensemble = self._calc_obs(
                    self.parensemble)
                if failed_runs is not None:
                    self.logger.warn("dropping failed realizations")
                    self.parensemble = self.parensemble.drop(failed_runs)
                    self.obsensemble = self.obsensemble.drop(failed_runs)
                self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
                self._phi_report(self.current_phi_vec,
                                 self.current_lambda * lambda_mults[best_i])
                best_mean = self.current_phi_vec.mean()
                best_std = self.current_phi_vec.std()
            else:
                self.obsensemble = obsen_lam[best_i]
                # reindex parensemble in case failed runs
                self.parensemble = ParameterEnsemble.from_dataframe(
                    df=self.parensemble.loc[self.obsensemble.index],
                    pst=self.pst)
                self._phi_report(phi_vecs[best_i],
                                 self.current_lambda * lambda_mults[best_i])
                self.current_phi_vec = phi_vecs[best_i]

            self.logger.statement("   best lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda*lambda_mults[best_i],
                         best_mean,best_std))
            self.last_best_mean = best_mean
            self.last_best_std = best_std

        if update_lambda:
            # be aggressive
            self.current_lambda *= (lambda_mults[best_i] * 0.75)
            # but don't let lambda get too small
            self.current_lambda = max(self.current_lambda, 0.001)
            self.logger.statement("updating lambda: {0:15.6G}".\
                  format(self.current_lambda ))

        self.logger.statement("**************************\n")
        self.parensemble.to_csv(self.pst.filename+self.paren_prefix.\
                                    format(self.iter_num))
        self.obsensemble.to_csv(self.pst.filename+self.obsen_prefix.\
                                    format(self.iter_num))
        if self.raw_sweep_out is not None:
            self.raw_sweep_out.to_csv(self.pst.filename+"_raw{0}".\
                                        format(self.iter_num))
        self.logger.log("iteration {0}".format(self.iter_num))
예제 #2
0
class EnsembleSmoother():

    def __init__(self,pst,parcov=None,obscov=None,num_slaves=0,use_approx=True,
                 restart_iter=0,submit_file=None):
        self.num_slaves = int(num_slaves)
        self.submit_file = submit_file
        self.use_approx = bool(use_approx)
        self.paren_prefix = ".parensemble.{0:04d}.csv"
        self.obsen_prefix = ".obsensemble.{0:04d}.csv"

        if isinstance(pst,str):
            pst = Pst(pst)
        assert isinstance(pst,Pst)
        self.pst = pst
        self.sweep_in_csv = pst.pestpp_options.get("sweep_parameter_csv_file","sweep_in.csv")
        self.sweep_out_csv = pst.pestpp_options.get("sweep_output_csv_file","sweep_out.csv")
        if parcov is not None:
            assert isinstance(parcov,Cov)
        else:
            parcov = Cov.from_parameter_data(self.pst)
        if obscov is not None:
            assert isinstance(obscov,Cov)
        else:
            obscov = Cov.from_observation_data(pst)

        self.parcov = parcov
        self.obscov = obscov
        self.restart = False

        if restart_iter > 0:
            self.restart_iter = restart_iter
            paren = self.pst.filename+self.paren_prefix.format(restart_iter)
            assert os.path.exists(paren),\
                "could not find restart par ensemble {0}".format(paren)
            obsen0 = self.pst.filename+self.obsen_prefix.format(0)
            assert os.path.exists(obsen0),\
                "could not find restart obs ensemble 0 {0}".format(obsen0)
            obsen = self.pst.filename+self.obsen_prefix.format(restart_iter)
            assert os.path.exists(obsen),\
                "could not find restart obs ensemble {0}".format(obsen)
            self.restart = True


        self.__initialized = False
        self.num_reals = 0
        self.half_parcov_diag = None
        self.half_obscov_diag = None
        self.delta_par_prior = None
        self.iter_num = 0

    def initialize(self,num_reals,init_lambda=None):
        '''
        (re)initialize the process
        '''
        assert num_reals > 1
        # initialize the phi report csv
        self.phi_csv = open(self.pst.filename+".iobj.csv",'w')
        self.phi_csv.write("iter_num,total_runs,lambda,min,max,mean,median,std,")
        self.phi_csv.write(','.join(["{0:010d}".\
                                    format(i+1) for i in range(num_reals)]))
        self.phi_csv.write('\n')
        self.total_runs = 0
        # this matrix gets used a lot, so only calc once and store
        self.obscov_inv_sqrt = self.obscov.get(self.pst.nnz_obs_names).inv.sqrt
        if self.restart:
            print("restarting...ignoring num_reals")
            raise NotImplementedError()
            df = pd.read_csv(self.pst.filename+self.paren_prefix.format(self.restart_iter))
            self.parensemble_0 = ParameterEnsemble.from_dataframe(df=df,pst=self.pst)
            self.parensemble = self.parensemble_0.copy()
            df = pd.read_csv(self.pst.filename+self.obsen_prefix.format(0))
            self.obsensemble_0 = ObservationEnsemble.from_dataframe(df=df.loc[:,self.pst.nnz_obs_names],
                                                                    pst=self.pst)
            # this matrix gets used a lot, so only calc once
            self.obs0_matrix = self.obsensemble_0.as_pyemu_matrix()
            df = pd.read_csv(self.pst.filename+self.obsen_prefix.format(self.restart_iter))
            self.obsensemble = ObservationEnsemble.from_dataframe(df=df.loc[:,self.pst.nnz_obs_names],
                                                                  pst=self.pst)
            assert self.parensemble.shape[0] == self.obsensemble.shape[0]
            self.num_reals = self.parensemble.shape[0]

        else:
            self.num_reals = int(num_reals)
            self.parensemble_0 = ParameterEnsemble(self.pst)
            self.parensemble_0.draw(cov=self.parcov,num_reals=num_reals)
            self.parensemble_0.enforce()
            self.parensemble = self.parensemble_0.copy()
            self.parensemble_0.to_csv(self.pst.filename +\
                                      self.paren_prefix.format(0))
            self.obsensemble_0 = ObservationEnsemble(self.pst)
            self.obsensemble_0.draw(cov=self.obscov,num_reals=num_reals)
            #self.obsensemble = self.obsensemble_0.copy()

            # save the base obsensemble
            self.obsensemble_0.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(-1))
            self.obs0_matrix = self.obsensemble_0.nonzero.as_pyemu_matrix()

            # run the initial parameter ensemble
            self.obsensemble = self._calc_obs(self.parensemble)
            self.obsensemble.to_csv(self.pst.filename +\
                                      self.obsen_prefix.format(0))
        self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
        self._phi_report(self.current_phi_vec,0.0)
        self.last_best_mean = self.current_phi_vec.mean()
        self.last_best_std = self.current_phi_vec.std()
        if init_lambda is not None:
            self.current_lambda = float(init_lambda)
        else:
            #following chen and oliver
            x = self.last_best_mean / (2.0 * float(self.obsensemble.shape[1]))
            self.current_lambda = 10.0**(np.floor(np.log10(x)))

        # if using the approximate form of the algorithm, let
        # the parameter scaling matrix be the identity matrix
        # jwhite - dec 5 2016 - using the actual parcov inv
        # for upgrades seems to be pushing parameters around
        # too much.  for now, just not using it, maybe
        # better choices of lambda will tame it
        if self.use_approx:
            self.half_parcov_diag = 1.0
        else:
            # if self.parcov.isdiagonal:
            #     self.half_parcov_diag = self.parcov.sqrt.inv
            # else:
            #     self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
            #                                 names=self.parcov.col_names,
            #                                 isdiagonal=True).inv.sqrt
            self.half_parcov_diag = 1.0
            self.delta_par_prior = self._calc_delta_par(self.parensemble_0)
            u,s,v = self.delta_par_prior.pseudo_inv_components()
            self.Am = u * s.inv
        self.__initialized = True

    def get_localizer(self):
        onames = self.pst.nnz_obs_names
        pnames = self.pst.adj_par_names
        localizer = Matrix(x=np.ones((len(onames),len(pnames))),row_names=onames,col_names=pnames)
        return localizer

    def _calc_delta_par(self,parensemble):
        '''
        calc the scaled parameter ensemble differences from the mean
        '''
        return self._calc_delta(parensemble, self.half_parcov_diag)

    def _calc_delta_obs(self,obsensemble):
        '''
        calc the scaled observation ensemble differences from the mean
        '''
        return self._calc_delta(obsensemble.nonzero, self.obscov.inv.sqrt)

    def _calc_delta(self,ensemble,scaling_matrix):
        '''
        calc the scaled  ensemble differences from the mean
        '''
        mean = np.array(ensemble.mean(axis=0))
        delta = ensemble.as_pyemu_matrix()
        for i in range(self.num_reals):
            delta.x[i,:] -= mean
        delta = scaling_matrix * delta.T
        delta *= (1.0 / np.sqrt(float(self.num_reals - 1.0)))
        return delta

    def _calc_obs(self,parensemble):
        if self.submit_file is None:
            self._calc_obs_local(parensemble)
        else:
            self._calc_obs_condor(parensemble)

    def _calc_obs_condor(self,parensemble):
        parensemble.to_csv(self.sweep_in_csv)
        os.system("condor_rm -all")
        port = 4004
        def master():
            os.system("sweep {0} /h :{1} >nul".format(self.pst.filename,port))
        master_thread = threading.Thread(target=master)
        master_thread.start()
        time.sleep(1.5) #just some time for the master to get up and running to take slaves
        pyemu.utils.start_slaves("template","sweep",self.pst.filename,
                                 self.num_slaves,slave_root='.',port=port)
        os.system("condor_submit {0}".format(self.submit_file))
        master_thread.join()

    def _calc_obs_local(self,parensemble):
        '''
        propagate the ensemble forward using sweep.
        '''
        parensemble.to_csv(self.sweep_in_csv)
        if self.num_slaves > 0:
            port = 4004
            def master():
                os.system("sweep {0} /h :{1} >nul".format(self.pst.filename,port))
            master_thread = threading.Thread(target=master)
            master_thread.start()
            time.sleep(1.5) #just some time for the master to get up and running to take slaves
            pyemu.utils.start_slaves("template","sweep",self.pst.filename,
                                     self.num_slaves,slave_root='.',port=port)
            master_thread.join()
        else:
            os.system("sweep {0}".format(self.pst.filename))

        obs = pd.read_csv(self.sweep_out_csv)
        obs.columns = [item.lower() for item in obs.columns]
        self.total_runs += obs.shape[0]
        return ObservationEnsemble.from_dataframe(df=obs.loc[:,self.obscov.row_names],
                                                  pst=self.pst)

    def _calc_phi_vec(self,obsensemble):
        obs_diff = self._get_residual_matrix(obsensemble)
        phi_vec = np.diagonal((obs_diff * self.obscov_inv_sqrt.get(row_names=obs_diff.col_names,
                                                                   col_names=obs_diff.col_names) * obs_diff.T).x)
        return phi_vec

    def _phi_report(self,phi_vec,cur_lam):
        assert phi_vec.shape[0] == self.num_reals
        self.phi_csv.write("{0},{1},{2},{3},{4},{5},{6}".format(self.iter_num,
                                                             self.total_runs,
                                                             cur_lam,
                                                             phi_vec.min(),
                                                             phi_vec.max(),
                                                             phi_vec.mean(),
                                                             np.median(phi_vec),
                                                             phi_vec.std()))
        self.phi_csv.write(",".join(["{0:20.8}".format(phi) for phi in phi_vec]))
        self.phi_csv.write("\n")
        self.phi_csv.flush()

    def _get_residual_matrix(self, obsensemble):
        obs_matrix = obsensemble.nonzero.as_pyemu_matrix()
        return  obs_matrix - self.obs0_matrix.get(col_names=obs_matrix.col_names,row_names=obs_matrix.row_names)

    def update(self,lambda_mults=[1.0],localizer=None,run_subset=None):

        self.iter_num += 1
        if not self.__initialized:
            raise Exception("must call initialize() before update()")

        scaled_delta_obs = self._calc_delta_obs(self.obsensemble)
        scaled_delta_par = self._calc_delta_par(self.parensemble)

        u,s,v = scaled_delta_obs.pseudo_inv_components()

        obs_diff = self._get_residual_matrix(self.obsensemble)

        if run_subset is not None:
            subset_idx = ["{0:d}".format(i) for i in np.random.randint(0,self.num_reals-1,run_subset)]
            print("subset idxs: " + ','.join(subset_idx))

        mean_lam,std_lam,paren_lam,obsen_lam = [],[],[],[]
        for ilam,cur_lam_mult in enumerate(lambda_mults):

            parensemble_cur_lam = self.parensemble.copy()

            cur_lam = self.current_lambda * cur_lam_mult

            scaled_ident = Cov.identity_like(s) * (cur_lam+1.0)
            scaled_ident += s**2
            scaled_ident = scaled_ident.inv

            # build up this matrix as a single element so we can apply
            # localization
            upgrade_1 = -1.0 * (self.half_parcov_diag * scaled_delta_par) *\
                        v * s * scaled_ident * u.T

            # apply localization
            #print(cur_lam,upgrade_1)
            if localizer is not None:
                upgrade_1.hadamard_product(localizer)

            # apply residual information
            upgrade_1 *= (self.obscov_inv_sqrt * obs_diff.T)

            upgrade_1 = upgrade_1.to_dataframe()
            upgrade_1.index.name = "parnme"
            upgrade_1 = upgrade_1.T
            upgrade_1.to_csv(self.pst.filename+".upgrade_1.{0:04d}.csv".\
                               format(self.iter_num))
            parensemble_cur_lam += upgrade_1

            # parameter-based upgrade portion
            if not self.use_approx and self.iter_num > 1:
                par_diff = (self.parensemble - self.parensemble_0).\
                    as_pyemu_matrix().T
                x4 = self.Am.T * self.half_parcov_diag * par_diff
                x5 = self.Am * x4
                x6 = scaled_delta_par.T * x5
                x7 = v * scaled_ident * v.T * x6
                upgrade_2 = -1.0 * (self.half_parcov_diag *
                                   scaled_delta_par * x7).to_dataframe()

                upgrade_2.index.name = "parnme"
                upgrade_2.T.to_csv(self.pst.filename+".upgrade_2.{0:04d}.csv".\
                                   format(self.iter_num))
                parensemble_cur_lam += upgrade_2.T
            parensemble_cur_lam.enforce()
            paren_lam.append(parensemble_cur_lam)
            if run_subset is not None:
                #phi_series = pd.Series(data=self.current_phi_vec)
                #phi_series.sort_values(inplace=True,ascending=False)
                #subset_idx = ["{0:d}".format(i) for i in phi_series.index.values[:run_subset]]

                parensemble_subset = parensemble_cur_lam.loc[subset_idx,:]
                obsensemble_cur_lam = self._calc_obs(parensemble_subset)
            else:
                obsensemble_cur_lam = self._calc_obs(parensemble_cur_lam)
            #print(obsensemble_cur_lam.head())
            obsen_lam.append(obsensemble_cur_lam)


        # here is where we need to select out the "best" lambda par and obs
        # ensembles
        print("\n**************************")
        print(str(datetime.now()))
        print("total runs:{0}".format(self.total_runs))
        print("iteration: {0}".format(self.iter_num))
        print("current lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda,
                         self.last_best_mean,self.last_best_std))
        phi_vecs = [self._calc_phi_vec(obsen) for obsen in obsen_lam]
        mean_std = [(pv.mean(),pv.std()) for pv in phi_vecs]
        update_pars = False
        update_lambda = False
        # accept a new best if its within 10%
        best_mean = self.last_best_mean * 1.1
        best_std = self.last_best_std * 1.1
        best_i = 0
        for i,(m,s) in enumerate(mean_std):
            print(" tested lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda * lambda_mults[i],m,s))
            if m < best_mean:
                update_pars = True
                best_mean = m
                best_i = i
                if s < best_std:
                    update_lambda = True
                    best_std = s

        if not update_pars:
            self.current_lambda *= max(lambda_mults) * 3.0
            self.current_lambda = min(self.current_lambda,100000)
            print("not accepting iteration, increased lambda:{0}".\
                  format(self.current_lambda))

        else:

            self.parensemble = paren_lam[best_i]
            if run_subset is not None:
                self.obsensemble = self._calc_obs(self.parensemble)
                self.current_phi_vec = self._calc_phi_vec(self.obsensemble)
                self._phi_report(self.current_phi_vec,self.current_lambda * lambda_mults[best_i])
                best_mean = self.current_phi_vec.mean()
                best_std = self.current_phi_vec.std()
            else:
                self.obsensemble = obsen_lam[best_i]
                self._phi_report(phi_vecs[best_i],self.current_lambda * lambda_mults[best_i])
                self.current_phi_vec = phi_vecs[best_i]

            print("\n" + "   best lambda:{0:15.6G}, mean:{1:15.6G}, std:{2:15.6G}".\
                  format(self.current_lambda*lambda_mults[best_i],
                         best_mean,best_std))
            self.last_best_mean = best_mean
            self.last_best_std = best_std

        if update_lambda:
            # be aggressive - cut best lambda in half
            self.current_lambda *= (lambda_mults[best_i] * 0.75)
            # but don't let lambda get too small
            self.current_lambda = max(self.current_lambda,0.001)
            print("updating lambda: {0:15.6G}".\
                  format(self.current_lambda ))


        print("**************************\n")

        self.parensemble.to_csv(self.pst.filename+self.paren_prefix.\
                                    format(self.iter_num))

        self.obsensemble.to_csv(self.pst.filename+self.obsen_prefix.\
                                    format(self.iter_num))
예제 #3
0
파일: smoother.py 프로젝트: wk1984/pyemu
class EnsembleSmoother():

    def __init__(self,pst,parcov=None,obscov=None):
        if isinstance(pst,str):
            pst = Pst(pst)
        assert isinstance(pst,Pst)
        self.pst = pst
        if parcov is not None:
            assert isinstance(parcov,Cov)
        else:
            parcov = Cov.from_parameter_data(self.pst)
        if obscov is not None:
            assert isinstance(obscov,Cov)
        else:
            obscov = Cov.from_observation_data(pst)

        self.parcov = parcov
        self.obscov = obscov

        self.__initialized = False
        self.num_reals = 0
        self.half_parcov_diag = None
        self.half_obscov_diag = None
        self.delta_par_prior = None
        self.iter_num = 0

    def initialize(self,num_reals):
        '''
        (re)initialize the process
        '''
        self.num_reals = int(num_reals)
        self.parensemble_0 = ParameterEnsemble(self.pst)
        self.parensemble_0.draw(cov=self.parcov,num_reals=num_reals)
        self.parensemble = self.parensemble_0.copy()
        self.parensemble_0.to_csv("parensemble.0.csv")


        self.obsensemble_0 = ObservationEnsemble(self.pst)
        self.obsensemble_0.draw(cov=self.obscov,num_reals=num_reals)
        self.obsensemble = self.obsensemble_0.copy()
        self.obsensemble_0.to_csv("obsensemble.0.csv")

        if self.parcov.isdiagonal:
            self.half_parcov_diag = self.parcov.inv.sqrt
        else:
            self.half_parcov_diag = Cov(x=np.diag(self.parcov.x),
                                        names=self.parcov.col_names,
                                        isdiagonal=True).inv.sqrt
        #if self.obscov.isdiagonal:
        #self.half_obscov_inv = self.obscov.inv.sqrt
       # else:
        #    self.half_obscov_diag = Cov(x=np.diag(self.obscov.x),
        #                                names=self.obscov.col_names,
        #                                isdiagonal=True)

        self.delta_par_prior = self._calc_delta_par()

        u,s,v = self.delta_par_prior.pseudo_inv_components()
        self.Am = u * s.inv


        self.__initialized = True

    def _calc_delta_par(self):
        '''
        calc the scaled parameter ensemble differences from the mean
        '''
        mean = np.array(self.parensemble.mean(axis=0))
        delta = self.parensemble.as_pyemu_matrix()
        for i in range(self.num_reals):
            delta.x[i,:] -= mean
        #delta = Matrix(x=(self.half_parcov_diag * delta.transpose()).x,
        #               row_names=self.parensemble.columns)
        delta = self.half_parcov_diag * delta.T
        return delta * (1.0 / np.sqrt(float(self.num_reals - 1.0)))

    def _calc_delta_obs(self):
        '''
        calc the scaled observation ensemble differences from the mean
        '''

        mean = np.array(self.obsensemble.mean(axis=0))
        delta = self.obsensemble.as_pyemu_matrix()
        for i in range(self.num_reals):
            delta.x[i,:] -= mean
        delta = self.obscov.inv.sqrt * delta.T
        return delta * (1.0 / np.sqrt(float(self.num_reals - 1.0)))


    def _calc_obs(self):
        '''
        propagate the ensemble forward...
        '''
        self.parensemble.to_csv(os.path.join("sweep_in.csv"))
        #os.chdir("smoother")
        print(os.listdir('.'))
        os.system("sweep {0}".format(self.pst.filename))
        #os.chdir('..')
        obs = ObservationEnsemble.from_csv(os.path.join('sweep_out.csv'))
        obs.columns = [item.lower() for item in obs.columns]
        self.obsensemble = ObservationEnsemble.from_dataframe(df=obs.loc[:,self.obscov.row_names],pst=self.pst)
        #todo: modifiy sweep to be interactive...
        return

    @property
    def current_lambda(self):
        return 1.0

    def update(self):
        if not self.__initialized:
            raise Exception("must call initialize() before update()")

        self._calc_obs()
        self.iter_num += 1
        self.obsensemble.to_csv("obsensemble.{0}.csv".format(self.iter_num))
        delta_obs = self._calc_delta_obs()

        u,s,v = delta_obs.pseudo_inv_components()
        scaled_par_diff = self._calc_delta_par()
        scaled_obs_diff = self.obsensemble.as_pyemu_matrix() -\
               self.obsensemble_0.as_pyemu_matrix()
        scaled_ident = (self.current_lambda*Cov.identity_like(s) + s**2).inv

        x1 = u.T * self.obscov.inv.sqrt * scaled_obs_diff.T
        x1.autoalign = False
        x2 = scaled_ident * x1
        x3 = v * s * x2
        upgrade_1 = -1.0 *  (self.half_parcov_diag * scaled_par_diff *\
                             x3).to_dataframe()
        upgrade_1.index.name = "parnme"
        upgrade_1.T.to_csv("upgrade_1.{0}.csv".format(self.iter_num))
        self.parensemble += upgrade_1.T
        if self.iter_num > 1:
            par_diff = (self.parensemble - self.parensemble_0).\
                as_pyemu_matrix().T
            x4 = self.Am.T * self.half_parcov_diag * par_diff
            x5 = self.Am * x4
            x6 = scaled_par_diff.T * x5
            x7 = v * scaled_ident * v.T * x6
            upgrade_2 = -1.0 * (self.half_parcov_diag *
                                scaled_par_diff * x7).to_dataframe()
            upgrade_2.index.name = "parnme"
            upgrade_2.T.to_csv("upgrade_2.{0}.csv".format(self.iter_num))
            self.parensemble += upgrade_2.T
        self.parensemble.to_csv("parensemble.{0}.csv".format(self.iter_num))