Exemple #1
0
    def draw(self, cov, num_reals=1, names=None):
        """ draw random realizations from a multivariate
            Gaussian distribution

        Parameters:
        ----------
            cov: a Cov instance
                covariance structure to draw from
            num_reals: int
                number of realizations to generate
            names : list of names to draw for.  If None, values all names
                    are drawn
        Returns:
        -------
            None
        """
        # set up some realization names
        #real_names = ["{0:d}".format(i)
        #              for i in range(num_reals)]
        real_names = np.arange(num_reals, dtype=np.int64)

        # make sure everything is cool WRT ordering
        if names is not None:
            vals = self.mean_values.loc[names]
            cov = cov.get(names)
        elif self.names != cov.row_names:
            names = get_common_elements(self.names, cov.row_names)
            vals = self.mean_values.loc[names]
            cov = cov.get(names)
            pass
        else:
            vals = self.mean_values
            names = self.names

        # generate random numbers
        if cov.isdiagonal:  #much faster
            val_array = np.array([np.random.normal(mu,std,size=num_reals) for\
                                  mu,std in zip(vals,np.sqrt(cov.x))]).transpose()
            #for mu,std in zip(vals,np.sqrt(cov.x)):
            #    val_array.append(np.random.normal(mu,std,size=num_reals))
            #val_array = np.array(val_array).transpose()
        else:
            val_array = np.random.multivariate_normal(vals, cov.as_2d,
                                                      num_reals)

        self.loc[:, :] = np.NaN
        self.dropna(inplace=True)

        # this sucks - can only set by enlargement one row at a time
        for rname, vals in zip(real_names, val_array):
            self.loc[rname, names] = vals
            # set NaNs to mean_values
            idx = pd.isnull(self.loc[rname, :])
            self.loc[rname, idx] = self.mean_values[idx]
Exemple #2
0
    def draw(self,cov,num_reals=1):
        """ draw random realizations from a multivariate
            Gaussian distribution

        Parameters:
        ----------
            cov: a Cov instance
                covariance structure to draw from
            num_reals: int
                number of realizations to generate
        Returns:
        -------
            None
        """
        # set up some column names
        real_names = ["{0:d}".format(i)
                      for i in range(num_reals)]

        # make sure everything is cool WRT ordering
        if self.names != cov.row_names:
            common_names = get_common_elements(self.names,
                                               cov.row_names)
            vals = self.mean_values.loc[common_names]
            cov = cov.get(common_names)
            pass
        else:
            vals = self.mean_values
            common_names = self.names

        # generate random numbers
        val_array = np.random.multivariate_normal(vals, cov.as_2d,num_reals)

        self.loc[:,:] = np.NaN
        self.dropna(inplace=True)

        # this sucks - can only set by enlargement one row at a time
        for rname,vals in zip(real_names,val_array):
            self.loc[rname,common_names] = vals
    
            # set NaNs to mean_values
            idx = pd.isnull(self.loc[rname,:])
            self.loc[rname,idx] = self.mean_values[idx]
Exemple #3
0
    def from_gaussian_draw(cls, pe, cov, num_reals=1):
        """ this is an experiemental method to help speed up draws
        for a really large (>1E6) ensemble sizes.  gets around the
        dataframe expansion-by-loc that is one col at a time.  WARNING:
        this constructor transforms the pe argument!
        :param pe: ParameterEnsemble instance
        "param cov: Covariance instance
        :param num_reals: number of realizations to generate
        :return: ParameterEnsemble
        """

        # set up some column names
        real_names = ["{0:d}".format(i) for i in range(num_reals)]

        if not pe.istransformed:
            pe._transform()
        # make sure everything is cool WRT ordering
        if pe.names != cov.row_names:
            common_names = get_common_elements(pe.names, cov.row_names)
            vals = pe.mean_values.loc[common_names]
            cov = cov.get(common_names)
            pass
        else:
            vals = pe.mean_values
            common_names = pe.names

        vals = pe.mean_values
        df = pd.DataFrame(data=np.random.multivariate_normal(
            vals, cov.as_2d, num_reals),
                          columns=common_names,
                          index=real_names)
        # replace the realizations for fixed parameters with the original
        # parval1 in the control file

        pe.pst.parameter_data.index = pe.pst.parameter_data.parnme
        fixed_vals = pe.pst.parameter_data.loc[pe.fixed_indexer, "parval1"]
        for fname, fval in zip(fixed_vals.index, fixed_vals.values):
            df.loc[:, fname] = fval
        istransformed = pe.pst.parameter_data.loc[:, "partrans"] == "log"
        df.loc[:, istransformed] = 10.0**df.loc[:, istransformed]
        return cls.from_dataframe(pst=pe.pst, df=df)
Exemple #4
0
    def draw(self, cov, num_reals=1):
        """ draw random realizations from a multivariate
            Gaussian distribution

        Parameters:
        ----------
            cov: a Cov instance
                covariance structure to draw from
            num_reals: int
                number of realizations to generate
        Returns:
        -------
            None
        """
        # set up some column names
        real_names = ["{0:d}".format(i) for i in range(num_reals)]

        # make sure everything is cool WRT ordering
        if self.names != cov.row_names:
            common_names = get_common_elements(self.names, cov.row_names)
            vals = self.mean_values.loc[common_names]
            cov = cov.get(common_names)
            pass
        else:
            vals = self.mean_values
            common_names = self.names

        # generate random numbers
        val_array = np.random.multivariate_normal(vals, cov.as_2d, num_reals)

        self.loc[:, :] = np.NaN
        self.dropna(inplace=True)

        # this sucks - can only set by enlargement one row at a time
        for rname, vals in zip(real_names, val_array):
            self.loc[rname, common_names] = vals
            # set NaNs to mean_values
            idx = pd.isnull(self.loc[rname, :])
            self.loc[rname, idx] = self.mean_values[idx]
Exemple #5
0
    def project(self, projection_matrix, inplace=True, log=None, enforce=True):
        """ project the ensemble
        Parameters:
        ----------
            projection_matrix: (pyemu.Matrix) projection operator - must already respect log transform

            inplace: (boolean) project self or return a new ParameterEnsemble instance

            log: (pyemu.la.logger instance) for logging progress

            enforce: (bool) parameter bound enforcement flag (True)

        Returns:
        -------
            if not inplace, ParameterEnsemble, otherwise None




        """

        if not self.istransformed:
            self._transform()

        #make sure everything is cool WRT ordering
        common_names = get_common_elements(self.adj_names,
                                           projection_matrix.row_names)
        base = self.mean_values.loc[common_names]
        projection_matrix = projection_matrix.get(common_names, common_names)

        if not inplace:
            new_en = ParameterEnsemble(pst=self.pst.get(),
                                       data=self.loc[:, :].copy(),
                                       columns=self.columns,
                                       mean_values=self.mean_values.copy(),
                                       istransformed=self.istransformed)

        for real in self.index:
            if log is not None:
                log("projecting realization {0}".format(real))

            # null space projection of difference vector
            pdiff = np.dot(projection_matrix.x,
                           (self.loc[real,common_names] - base)\
                           .as_matrix())

            # lb_fac = np.abs(pdiff)/((base+pdiff)-self.lbnd)
            # lb_fac[pdiff>0.0] = 1.0
            #
            # ub_fac = np.abs(pdiff)/(self.ubnd-(base+pdiff))
            # ub_fac[pdiff<=0.0] = 1.0
            #
            # factor = max(lb_fac.max(),
            #              ub_fac.max())

            if inplace:
                self.loc[real, common_names] = base + pdiff
            else:
                new_en.loc[real, common_names] = base + pdiff

            if log is not None:
                log("projecting realization {0}".format(real))
        if not inplace:
            if enforce:
                new_en.enforce()
            new_en._back_transform()
            return new_en

        if enforce:
            self.enforce()
        self._back_transform()
Exemple #6
0
    def project(self,projection_matrix,inplace=True,log=None,enforce=True):
        """ project the ensemble
        Parameters:
        ----------
            projection_matrix: (pyemu.Matrix) projection operator - must already respect log transform

            inplace: (boolean) project self or return a new ParameterEnsemble instance

            log: (pyemu.la.logger instance) for logging progress

            enforce: (bool) parameter bound enforcement flag (True)

        Returns:
        -------
            if not inplace, ParameterEnsemble, otherwise None

        """

        if not self.istransformed:
            self._transform()

        #make sure everything is cool WRT ordering
        common_names = get_common_elements(self.adj_names,
                                                 projection_matrix.row_names)
        base = self.mean_values.loc[common_names]
        projection_matrix = projection_matrix.get(common_names,common_names)

        if not inplace:
            new_en = ParameterEnsemble(pst=self.pst.get(),data=self.loc[:,:].copy(),
                              columns=self.columns,
                              mean_values=self.mean_values.copy(),istransformed=self.istransformed)

        for real in self.index:
            if log is not None:
                log("projecting realization {0}".format(real))

            # null space projection of difference vector
            pdiff = np.dot(projection_matrix.x,
                           (self.loc[real,common_names] - base)\
                           .as_matrix())

            # lb_fac = np.abs(pdiff)/((base+pdiff)-self.lbnd)
            # lb_fac[pdiff>0.0] = 1.0
            #
            # ub_fac = np.abs(pdiff)/(self.ubnd-(base+pdiff))
            # ub_fac[pdiff<=0.0] = 1.0
            #
            # factor = max(lb_fac.max(),
            #              ub_fac.max())

            if inplace:
                self.loc[real,common_names] = base + pdiff
            else:
                new_en.loc[real,common_names] = base + pdiff

            if log is not None:
                log("projecting realization {0}".format(real))
        if not inplace:
            if enforce:
                new_en.enforce()
            new_en._back_transform()
            return new_en

        if enforce:
            self.enforce()
        self._back_transform()
Exemple #7
0
    def project(self,
                projection_matrix,
                inplace=True,
                log=None,
                enforce_bounds="reset"):
        """ project the ensemble
        Parameters:
        ----------
            projection_matrix: (pyemu.Matrix) projection operator - must already respect log transform

            inplace: (boolean) project self or return a new ParameterEnsemble instance

            log: (pyemu.la.logger instance) for logging progress

            enforce_bounds: (str) parameter bound enforcement flag.  'drop' removes
             offending realizations, 'reset' resets offending values)

        Returns:
        -------
            if not inplace, ParameterEnsemble, otherwise None

        """

        if self.istransformed:
            self._back_transform()

        istransformed = self.pst.parameter_data.loc[:, "partrans"] == "log"
        self.loc[:, istransformed] = self.loc[:, istransformed].applymap(
            lambda x: math.log10(x))
        self.__istransformed = True

        #make sure everything is cool WRT ordering
        common_names = get_common_elements(self.adj_names,
                                           projection_matrix.row_names)
        base = self.mean_values.loc[common_names]
        projection_matrix = projection_matrix.get(common_names, common_names)

        if not inplace:
            new_en = ParameterEnsemble(pst=self.pst.get(),
                                       data=self.loc[:, :].copy(),
                                       columns=self.columns,
                                       mean_values=self.mean_values.copy(),
                                       istransformed=self.istransformed)

        for real in self.index:
            if log is not None:
                log("projecting realization {0}".format(real))

            # null space projection of difference vector
            pdiff = self.loc[real, common_names] - base
            pdiff = np.dot(projection_matrix.x,
                           (self.loc[real,common_names] - base)\
                           .as_matrix())

            # lb_fac = np.abs(pdiff)/((base+pdiff)-self.lbnd)
            # lb_fac[pdiff>0.0] = 1.0
            #
            # ub_fac = np.abs(pdiff)/(self.ubnd-(base+pdiff))
            # ub_fac[pdiff<=0.0] = 1.0
            #
            # factor = max(lb_fac.max(),
            #              ub_fac.max())

            if inplace:
                self.loc[real, common_names] = base + pdiff
            else:
                new_en.loc[real, common_names] = base + pdiff

            if log is not None:
                log("projecting realization {0}".format(real))
        if not inplace:
            new_en.enforce(enforce_bounds)
            new_en.loc[:, istransformed] = 10.0**new_en.loc[:, istransformed]
            new_en.__istransformed = False

            #new_en._back_transform()
            return new_en

        self.enforce(enforce_bounds)
        self.loc[:, istransformed] = 10.0**self.loc[:, istransformed]
        self.__istransformed = False
Exemple #8
0
    def from_gaussian_draw(cls, pe, cov, num_reals=1):
        """ this is an experiemental method to help speed up draws
        for a really large (>1E6) ensemble sizes.  gets around the
        dataframe expansion-by-loc that is one col at a time.  WARNING:
        this constructor transforms the pe argument!
        :param pe: ParameterEnsemble instance
        "param cov: Covariance instance
        :param num_reals: number of realizations to generate
        :return: ParameterEnsemble
        """

        # set up some column names
        #real_names = ["{0:d}".format(i)
        #              for i in range(num_reals)]
        real_names = np.arange(num_reals, dtype=np.int64)

        if not pe.istransformed:
            pe._transform()
        # make sure everything is cool WRT ordering
        if pe.names != cov.row_names:
            common_names = get_common_elements(pe.names, cov.row_names)
            vals = pe.mean_values.loc[common_names]
            cov = cov.get(common_names)
            pass
        else:
            vals = pe.mean_values
            common_names = pe.names

        if cov.isdiagonal:
            print("making diagonal cov draws")
            arr = np.zeros((num_reals, len(pe.names)))
            stds = {
                pname: std
                for pname, std in zip(common_names, np.sqrt(cov.x.flatten()))
            }
            means = {pname: val for pname, val in zip(common_names, vals)}
            for i, pname in enumerate(pe.names):
                if pname in pe.pst.adj_par_names:
                    s = stds[pname]
                    v = means[pname]
                    arr[:, i] = np.random.normal(means[pname],
                                                 stds[pname],
                                                 size=num_reals)
                else:
                    arr[:, i] = means[pname]

            df = pd.DataFrame(data=arr, columns=common_names, index=real_names)
        else:

            #vals = pe.mean_values
            print("making full cov draws")
            df = pd.DataFrame(data=np.random.multivariate_normal(
                vals, cov.as_2d, num_reals),
                              columns=common_names,
                              index=real_names)
            #print(df.shape,cov.shape)

        istransformed = pe.pst.parameter_data.loc[common_names,
                                                  "partrans"] == "log"
        print("back transforming")
        df.loc[:, istransformed] = 10.0**df.loc[:, istransformed]

        # replace the realizations for fixed parameters with the original
        # parval1 in the control file
        print("handling fixed pars")
        pe.pst.parameter_data.index = pe.pst.parameter_data.parnme
        fixed_vals = pe.pst.parameter_data.loc[pe.fixed_indexer, "parval1"]
        for fname, fval in zip(fixed_vals.index, fixed_vals.values):
            #if fname not in df.columns:
            #    continue
            print(fname)
            df.loc[:, fname] = fval

        #print("apply tied")
        new_pe = cls.from_dataframe(pst=pe.pst, df=df)
        #ParameterEnsemble.apply_tied(new_pe)
        return new_pe
Exemple #9
0
    def from_gaussian_draw_homegrown(cls, pe, cov, num_reals=1):
        """ this is an experiemental method to help speed up draws
        for a really large (>1E6) ensemble sizes.  gets around the
        dataframe expansion-by-loc that is one col at a time.  Implements
        multivariate normal draws to get around the 32-bit lapack limitations
        in scipy/numpy

        Parameters
        ----------
        pe : ParameterEnsemble
            existing ParameterEnsemble used to get information
            needed to call ParameterEnsemble constructor
        cov : (pyemu.Cov)
            covariance matrix to use for drawing
        num_reals : int
            number of realizations to generate

        Returns
        -------
        ParameterEnsemble : ParameterEnsemble

        Note
        ----
        this constructor transforms the pe argument!
        """

        s = datetime.now()
        print("{0} - starting home-grown multivariate draws".format(s))

        # set up some column names
        # real_names = ["{0:d}".format(i)
        #              for i in range(num_reals)]
        real_names = np.arange(num_reals, dtype=np.int64)

        if not pe.istransformed:
            pe._transform()
        # make sure everything is cool WRT ordering
        if pe.names != cov.row_names:
            common_names = get_common_elements(pe.names, cov.row_names)
            vals = pe.mean_values.loc[common_names]
            cov = cov.get(common_names)
            pass
        else:
            vals = pe.mean_values
            common_names = pe.names

        #generate standard normal vectors
        snv = np.random.randn(num_reals, cov.shape[0])

        #jwhite - 18-dec-17: the cholesky version is giving significantly diff
        #results compared to eigen solve, so turning this off for now - need to
        #learn more about this...
        use_chol = False
        if use_chol:
            a = np.linalg.cholesky(cov.as_2d)

        else:
            #decompose...
            v, w = np.linalg.eigh(cov.as_2d)

            #form projection matrix
            a = np.dot(w, np.diag(np.sqrt(v)))

        #project...
        reals = []
        for vec in snv:
            real = vals + np.dot(a, vec)
            reals.append(real)

        df = pd.DataFrame(reals, columns=common_names, index=real_names)
        istransformed = pe.pst.parameter_data.loc[common_names,
                                                  "partrans"] == "log"
        #print("back transforming")
        df.loc[:, istransformed] = 10.0**df.loc[:, istransformed]

        # replace the realizations for fixed parameters with the original
        # parval1 in the control file
        #print("handling fixed pars")
        pe.pst.parameter_data.index = pe.pst.parameter_data.parnme
        fixed_vals = pe.pst.parameter_data.loc[pe.fixed_indexer, "parval1"]
        for fname, fval in zip(fixed_vals.index, fixed_vals.values):
            # if fname not in df.columns:
            #    continue
            #print(fname)
            df.loc[:, fname] = fval

        # print("apply tied")
        new_pe = cls.from_dataframe(pst=pe.pst, df=df)
        # ParameterEnsemble.apply_tied(new_pe)

        e = datetime.now()
        print("{0} - done...took {1}".format(e, (e - s).total_seconds()))

        return new_pe
Exemple #10
0
    def project(self,
                projection_matrix,
                inplace=True,
                log=None,
                enforce_bounds="reset"):
        """ project the ensemble using the null-space Monte Carlo method

        Parameters
        ----------
        projection_matrix : pyemu.Matrix
            projection operator - must already respect log transform

        inplace : bool
            project self or return a new ParameterEnsemble instance

        log: pyemu.Logger
            for logging progress

        enforce_bounds : str
            parameter bound enforcement flag. 'drop' removes
            offending realizations, 'reset' resets offending values

        Returns
        -------
        ParameterEnsemble : ParameterEnsemble
            if inplace is False

        """

        if self.istransformed:
            self._back_transform()

        istransformed = self.pst.parameter_data.loc[:, "partrans"] == "log"
        self.loc[:, istransformed] = self.loc[:, istransformed].applymap(
            lambda x: math.log10(x))
        self.__istransformed = True

        #make sure everything is cool WRT ordering
        common_names = get_common_elements(self.adj_names,
                                           projection_matrix.row_names)
        base = self.mean_values.loc[common_names]
        projection_matrix = projection_matrix.get(common_names, common_names)

        if not inplace:
            new_en = ParameterEnsemble(pst=self.pst.get(),
                                       data=self.loc[:, :].copy(),
                                       columns=self.columns,
                                       mean_values=self.mean_values.copy(),
                                       istransformed=self.istransformed)

        for real in self.index:
            if log is not None:
                log("projecting realization {0}".format(real))

            # null space projection of difference vector
            pdiff = self.loc[real, common_names] - base
            pdiff = np.dot(projection_matrix.x,
                           (self.loc[real,common_names] - base)\
                           .as_matrix())

            if inplace:
                self.loc[real, common_names] = base + pdiff
            else:
                new_en.loc[real, common_names] = base + pdiff

            if log is not None:
                log("projecting realization {0}".format(real))
        if not inplace:
            new_en.enforce(enforce_bounds)
            new_en.loc[:, istransformed] = 10.0**new_en.loc[:, istransformed]
            new_en.__istransformed = False

            #new_en._back_transform()
            return new_en

        self.enforce(enforce_bounds)
        self.loc[:, istransformed] = 10.0**self.loc[:, istransformed]
        self.__istransformed = False