def draw(self, cov, num_reals=1, names=None): """ draw random realizations from a multivariate Gaussian distribution Parameters: ---------- cov: a Cov instance covariance structure to draw from num_reals: int number of realizations to generate names : list of names to draw for. If None, values all names are drawn Returns: ------- None """ # set up some realization names #real_names = ["{0:d}".format(i) # for i in range(num_reals)] real_names = np.arange(num_reals, dtype=np.int64) # make sure everything is cool WRT ordering if names is not None: vals = self.mean_values.loc[names] cov = cov.get(names) elif self.names != cov.row_names: names = get_common_elements(self.names, cov.row_names) vals = self.mean_values.loc[names] cov = cov.get(names) pass else: vals = self.mean_values names = self.names # generate random numbers if cov.isdiagonal: #much faster val_array = np.array([np.random.normal(mu,std,size=num_reals) for\ mu,std in zip(vals,np.sqrt(cov.x))]).transpose() #for mu,std in zip(vals,np.sqrt(cov.x)): # val_array.append(np.random.normal(mu,std,size=num_reals)) #val_array = np.array(val_array).transpose() else: val_array = np.random.multivariate_normal(vals, cov.as_2d, num_reals) self.loc[:, :] = np.NaN self.dropna(inplace=True) # this sucks - can only set by enlargement one row at a time for rname, vals in zip(real_names, val_array): self.loc[rname, names] = vals # set NaNs to mean_values idx = pd.isnull(self.loc[rname, :]) self.loc[rname, idx] = self.mean_values[idx]
def draw(self,cov,num_reals=1): """ draw random realizations from a multivariate Gaussian distribution Parameters: ---------- cov: a Cov instance covariance structure to draw from num_reals: int number of realizations to generate Returns: ------- None """ # set up some column names real_names = ["{0:d}".format(i) for i in range(num_reals)] # make sure everything is cool WRT ordering if self.names != cov.row_names: common_names = get_common_elements(self.names, cov.row_names) vals = self.mean_values.loc[common_names] cov = cov.get(common_names) pass else: vals = self.mean_values common_names = self.names # generate random numbers val_array = np.random.multivariate_normal(vals, cov.as_2d,num_reals) self.loc[:,:] = np.NaN self.dropna(inplace=True) # this sucks - can only set by enlargement one row at a time for rname,vals in zip(real_names,val_array): self.loc[rname,common_names] = vals # set NaNs to mean_values idx = pd.isnull(self.loc[rname,:]) self.loc[rname,idx] = self.mean_values[idx]
def from_gaussian_draw(cls, pe, cov, num_reals=1): """ this is an experiemental method to help speed up draws for a really large (>1E6) ensemble sizes. gets around the dataframe expansion-by-loc that is one col at a time. WARNING: this constructor transforms the pe argument! :param pe: ParameterEnsemble instance "param cov: Covariance instance :param num_reals: number of realizations to generate :return: ParameterEnsemble """ # set up some column names real_names = ["{0:d}".format(i) for i in range(num_reals)] if not pe.istransformed: pe._transform() # make sure everything is cool WRT ordering if pe.names != cov.row_names: common_names = get_common_elements(pe.names, cov.row_names) vals = pe.mean_values.loc[common_names] cov = cov.get(common_names) pass else: vals = pe.mean_values common_names = pe.names vals = pe.mean_values df = pd.DataFrame(data=np.random.multivariate_normal( vals, cov.as_2d, num_reals), columns=common_names, index=real_names) # replace the realizations for fixed parameters with the original # parval1 in the control file pe.pst.parameter_data.index = pe.pst.parameter_data.parnme fixed_vals = pe.pst.parameter_data.loc[pe.fixed_indexer, "parval1"] for fname, fval in zip(fixed_vals.index, fixed_vals.values): df.loc[:, fname] = fval istransformed = pe.pst.parameter_data.loc[:, "partrans"] == "log" df.loc[:, istransformed] = 10.0**df.loc[:, istransformed] return cls.from_dataframe(pst=pe.pst, df=df)
def draw(self, cov, num_reals=1): """ draw random realizations from a multivariate Gaussian distribution Parameters: ---------- cov: a Cov instance covariance structure to draw from num_reals: int number of realizations to generate Returns: ------- None """ # set up some column names real_names = ["{0:d}".format(i) for i in range(num_reals)] # make sure everything is cool WRT ordering if self.names != cov.row_names: common_names = get_common_elements(self.names, cov.row_names) vals = self.mean_values.loc[common_names] cov = cov.get(common_names) pass else: vals = self.mean_values common_names = self.names # generate random numbers val_array = np.random.multivariate_normal(vals, cov.as_2d, num_reals) self.loc[:, :] = np.NaN self.dropna(inplace=True) # this sucks - can only set by enlargement one row at a time for rname, vals in zip(real_names, val_array): self.loc[rname, common_names] = vals # set NaNs to mean_values idx = pd.isnull(self.loc[rname, :]) self.loc[rname, idx] = self.mean_values[idx]
def project(self, projection_matrix, inplace=True, log=None, enforce=True): """ project the ensemble Parameters: ---------- projection_matrix: (pyemu.Matrix) projection operator - must already respect log transform inplace: (boolean) project self or return a new ParameterEnsemble instance log: (pyemu.la.logger instance) for logging progress enforce: (bool) parameter bound enforcement flag (True) Returns: ------- if not inplace, ParameterEnsemble, otherwise None """ if not self.istransformed: self._transform() #make sure everything is cool WRT ordering common_names = get_common_elements(self.adj_names, projection_matrix.row_names) base = self.mean_values.loc[common_names] projection_matrix = projection_matrix.get(common_names, common_names) if not inplace: new_en = ParameterEnsemble(pst=self.pst.get(), data=self.loc[:, :].copy(), columns=self.columns, mean_values=self.mean_values.copy(), istransformed=self.istransformed) for real in self.index: if log is not None: log("projecting realization {0}".format(real)) # null space projection of difference vector pdiff = np.dot(projection_matrix.x, (self.loc[real,common_names] - base)\ .as_matrix()) # lb_fac = np.abs(pdiff)/((base+pdiff)-self.lbnd) # lb_fac[pdiff>0.0] = 1.0 # # ub_fac = np.abs(pdiff)/(self.ubnd-(base+pdiff)) # ub_fac[pdiff<=0.0] = 1.0 # # factor = max(lb_fac.max(), # ub_fac.max()) if inplace: self.loc[real, common_names] = base + pdiff else: new_en.loc[real, common_names] = base + pdiff if log is not None: log("projecting realization {0}".format(real)) if not inplace: if enforce: new_en.enforce() new_en._back_transform() return new_en if enforce: self.enforce() self._back_transform()
def project(self,projection_matrix,inplace=True,log=None,enforce=True): """ project the ensemble Parameters: ---------- projection_matrix: (pyemu.Matrix) projection operator - must already respect log transform inplace: (boolean) project self or return a new ParameterEnsemble instance log: (pyemu.la.logger instance) for logging progress enforce: (bool) parameter bound enforcement flag (True) Returns: ------- if not inplace, ParameterEnsemble, otherwise None """ if not self.istransformed: self._transform() #make sure everything is cool WRT ordering common_names = get_common_elements(self.adj_names, projection_matrix.row_names) base = self.mean_values.loc[common_names] projection_matrix = projection_matrix.get(common_names,common_names) if not inplace: new_en = ParameterEnsemble(pst=self.pst.get(),data=self.loc[:,:].copy(), columns=self.columns, mean_values=self.mean_values.copy(),istransformed=self.istransformed) for real in self.index: if log is not None: log("projecting realization {0}".format(real)) # null space projection of difference vector pdiff = np.dot(projection_matrix.x, (self.loc[real,common_names] - base)\ .as_matrix()) # lb_fac = np.abs(pdiff)/((base+pdiff)-self.lbnd) # lb_fac[pdiff>0.0] = 1.0 # # ub_fac = np.abs(pdiff)/(self.ubnd-(base+pdiff)) # ub_fac[pdiff<=0.0] = 1.0 # # factor = max(lb_fac.max(), # ub_fac.max()) if inplace: self.loc[real,common_names] = base + pdiff else: new_en.loc[real,common_names] = base + pdiff if log is not None: log("projecting realization {0}".format(real)) if not inplace: if enforce: new_en.enforce() new_en._back_transform() return new_en if enforce: self.enforce() self._back_transform()
def project(self, projection_matrix, inplace=True, log=None, enforce_bounds="reset"): """ project the ensemble Parameters: ---------- projection_matrix: (pyemu.Matrix) projection operator - must already respect log transform inplace: (boolean) project self or return a new ParameterEnsemble instance log: (pyemu.la.logger instance) for logging progress enforce_bounds: (str) parameter bound enforcement flag. 'drop' removes offending realizations, 'reset' resets offending values) Returns: ------- if not inplace, ParameterEnsemble, otherwise None """ if self.istransformed: self._back_transform() istransformed = self.pst.parameter_data.loc[:, "partrans"] == "log" self.loc[:, istransformed] = self.loc[:, istransformed].applymap( lambda x: math.log10(x)) self.__istransformed = True #make sure everything is cool WRT ordering common_names = get_common_elements(self.adj_names, projection_matrix.row_names) base = self.mean_values.loc[common_names] projection_matrix = projection_matrix.get(common_names, common_names) if not inplace: new_en = ParameterEnsemble(pst=self.pst.get(), data=self.loc[:, :].copy(), columns=self.columns, mean_values=self.mean_values.copy(), istransformed=self.istransformed) for real in self.index: if log is not None: log("projecting realization {0}".format(real)) # null space projection of difference vector pdiff = self.loc[real, common_names] - base pdiff = np.dot(projection_matrix.x, (self.loc[real,common_names] - base)\ .as_matrix()) # lb_fac = np.abs(pdiff)/((base+pdiff)-self.lbnd) # lb_fac[pdiff>0.0] = 1.0 # # ub_fac = np.abs(pdiff)/(self.ubnd-(base+pdiff)) # ub_fac[pdiff<=0.0] = 1.0 # # factor = max(lb_fac.max(), # ub_fac.max()) if inplace: self.loc[real, common_names] = base + pdiff else: new_en.loc[real, common_names] = base + pdiff if log is not None: log("projecting realization {0}".format(real)) if not inplace: new_en.enforce(enforce_bounds) new_en.loc[:, istransformed] = 10.0**new_en.loc[:, istransformed] new_en.__istransformed = False #new_en._back_transform() return new_en self.enforce(enforce_bounds) self.loc[:, istransformed] = 10.0**self.loc[:, istransformed] self.__istransformed = False
def from_gaussian_draw(cls, pe, cov, num_reals=1): """ this is an experiemental method to help speed up draws for a really large (>1E6) ensemble sizes. gets around the dataframe expansion-by-loc that is one col at a time. WARNING: this constructor transforms the pe argument! :param pe: ParameterEnsemble instance "param cov: Covariance instance :param num_reals: number of realizations to generate :return: ParameterEnsemble """ # set up some column names #real_names = ["{0:d}".format(i) # for i in range(num_reals)] real_names = np.arange(num_reals, dtype=np.int64) if not pe.istransformed: pe._transform() # make sure everything is cool WRT ordering if pe.names != cov.row_names: common_names = get_common_elements(pe.names, cov.row_names) vals = pe.mean_values.loc[common_names] cov = cov.get(common_names) pass else: vals = pe.mean_values common_names = pe.names if cov.isdiagonal: print("making diagonal cov draws") arr = np.zeros((num_reals, len(pe.names))) stds = { pname: std for pname, std in zip(common_names, np.sqrt(cov.x.flatten())) } means = {pname: val for pname, val in zip(common_names, vals)} for i, pname in enumerate(pe.names): if pname in pe.pst.adj_par_names: s = stds[pname] v = means[pname] arr[:, i] = np.random.normal(means[pname], stds[pname], size=num_reals) else: arr[:, i] = means[pname] df = pd.DataFrame(data=arr, columns=common_names, index=real_names) else: #vals = pe.mean_values print("making full cov draws") df = pd.DataFrame(data=np.random.multivariate_normal( vals, cov.as_2d, num_reals), columns=common_names, index=real_names) #print(df.shape,cov.shape) istransformed = pe.pst.parameter_data.loc[common_names, "partrans"] == "log" print("back transforming") df.loc[:, istransformed] = 10.0**df.loc[:, istransformed] # replace the realizations for fixed parameters with the original # parval1 in the control file print("handling fixed pars") pe.pst.parameter_data.index = pe.pst.parameter_data.parnme fixed_vals = pe.pst.parameter_data.loc[pe.fixed_indexer, "parval1"] for fname, fval in zip(fixed_vals.index, fixed_vals.values): #if fname not in df.columns: # continue print(fname) df.loc[:, fname] = fval #print("apply tied") new_pe = cls.from_dataframe(pst=pe.pst, df=df) #ParameterEnsemble.apply_tied(new_pe) return new_pe
def from_gaussian_draw_homegrown(cls, pe, cov, num_reals=1): """ this is an experiemental method to help speed up draws for a really large (>1E6) ensemble sizes. gets around the dataframe expansion-by-loc that is one col at a time. Implements multivariate normal draws to get around the 32-bit lapack limitations in scipy/numpy Parameters ---------- pe : ParameterEnsemble existing ParameterEnsemble used to get information needed to call ParameterEnsemble constructor cov : (pyemu.Cov) covariance matrix to use for drawing num_reals : int number of realizations to generate Returns ------- ParameterEnsemble : ParameterEnsemble Note ---- this constructor transforms the pe argument! """ s = datetime.now() print("{0} - starting home-grown multivariate draws".format(s)) # set up some column names # real_names = ["{0:d}".format(i) # for i in range(num_reals)] real_names = np.arange(num_reals, dtype=np.int64) if not pe.istransformed: pe._transform() # make sure everything is cool WRT ordering if pe.names != cov.row_names: common_names = get_common_elements(pe.names, cov.row_names) vals = pe.mean_values.loc[common_names] cov = cov.get(common_names) pass else: vals = pe.mean_values common_names = pe.names #generate standard normal vectors snv = np.random.randn(num_reals, cov.shape[0]) #jwhite - 18-dec-17: the cholesky version is giving significantly diff #results compared to eigen solve, so turning this off for now - need to #learn more about this... use_chol = False if use_chol: a = np.linalg.cholesky(cov.as_2d) else: #decompose... v, w = np.linalg.eigh(cov.as_2d) #form projection matrix a = np.dot(w, np.diag(np.sqrt(v))) #project... reals = [] for vec in snv: real = vals + np.dot(a, vec) reals.append(real) df = pd.DataFrame(reals, columns=common_names, index=real_names) istransformed = pe.pst.parameter_data.loc[common_names, "partrans"] == "log" #print("back transforming") df.loc[:, istransformed] = 10.0**df.loc[:, istransformed] # replace the realizations for fixed parameters with the original # parval1 in the control file #print("handling fixed pars") pe.pst.parameter_data.index = pe.pst.parameter_data.parnme fixed_vals = pe.pst.parameter_data.loc[pe.fixed_indexer, "parval1"] for fname, fval in zip(fixed_vals.index, fixed_vals.values): # if fname not in df.columns: # continue #print(fname) df.loc[:, fname] = fval # print("apply tied") new_pe = cls.from_dataframe(pst=pe.pst, df=df) # ParameterEnsemble.apply_tied(new_pe) e = datetime.now() print("{0} - done...took {1}".format(e, (e - s).total_seconds())) return new_pe
def project(self, projection_matrix, inplace=True, log=None, enforce_bounds="reset"): """ project the ensemble using the null-space Monte Carlo method Parameters ---------- projection_matrix : pyemu.Matrix projection operator - must already respect log transform inplace : bool project self or return a new ParameterEnsemble instance log: pyemu.Logger for logging progress enforce_bounds : str parameter bound enforcement flag. 'drop' removes offending realizations, 'reset' resets offending values Returns ------- ParameterEnsemble : ParameterEnsemble if inplace is False """ if self.istransformed: self._back_transform() istransformed = self.pst.parameter_data.loc[:, "partrans"] == "log" self.loc[:, istransformed] = self.loc[:, istransformed].applymap( lambda x: math.log10(x)) self.__istransformed = True #make sure everything is cool WRT ordering common_names = get_common_elements(self.adj_names, projection_matrix.row_names) base = self.mean_values.loc[common_names] projection_matrix = projection_matrix.get(common_names, common_names) if not inplace: new_en = ParameterEnsemble(pst=self.pst.get(), data=self.loc[:, :].copy(), columns=self.columns, mean_values=self.mean_values.copy(), istransformed=self.istransformed) for real in self.index: if log is not None: log("projecting realization {0}".format(real)) # null space projection of difference vector pdiff = self.loc[real, common_names] - base pdiff = np.dot(projection_matrix.x, (self.loc[real,common_names] - base)\ .as_matrix()) if inplace: self.loc[real, common_names] = base + pdiff else: new_en.loc[real, common_names] = base + pdiff if log is not None: log("projecting realization {0}".format(real)) if not inplace: new_en.enforce(enforce_bounds) new_en.loc[:, istransformed] = 10.0**new_en.loc[:, istransformed] new_en.__istransformed = False #new_en._back_transform() return new_en self.enforce(enforce_bounds) self.loc[:, istransformed] = 10.0**self.loc[:, istransformed] self.__istransformed = False