def objective(x): """x = [var_fit]""" ## Evaluate model df_var = tran_outer( df_data[var_feat], concat( (df_nom[var_fix].iloc[[0]], df_make(**dict(zip(var_fit, x)))), axis=1, ), ) df_tmp = eval_df(model, df=df_var) ## Compute joint MSE return ((df_tmp[out].values - df_data[out].values) ** 2).mean()
def var_outer(self, df_rand, df_det=None): """Outer product of random and deterministic samples Args: df_rand (DataFrame) random variable samples df_det (DataFrame) deterministic variable samples set to "nom" for nominal evaluation Returns: DataFrame: Outer product of samples """ ## Pass-through if no var_det if self.n_var_det == 0: return df_rand ## Error-throwing default value if df_det is None: raise ValueError("df_det must be DataFrame or 'nom'") ## String shortcut elif isinstance(df_det, str): if df_det == "nom": df_det = self.det_nom() else: raise ValueError("df_det shortcut string invalid") ## DataFrame else: ## Check invariant; model inputs must be subset of df columns if not set(self.var_det).issubset(set(df_det.columns)): raise ValueError("model.var_det not a subset of given columns") ## Pass-through if no var_rand if self.n_var_rand == 0: return df_det ## Outer product if both det and rand exist return gr.tran_outer(df_rand, df_det)
def fit_nls( df_data, md=None, out=None, var_fix=None, df_init=None, verbose=True, uq_method=None, **kwargs, ): r"""Fit a model with Nonlinear Least Squares (NLS) Estimate best-fit variable levels with nonlinear least squares (NLS), and return an executable model with those frozen best-fit levels. Optionally, fit a distribution on the parameters to quantify parametric uncertainty. Note: This is a *synonym* for eval_nls(); see the documentation for eval_nls() for keyword argument options available beyond those listed here. Args: df_data (DataFrame): Data for estimating best-fit variable levels. Variables not found in df_data optimized for fitting. md (gr.Model): Model to analyze. All model variables selected for fitting must be bounded or random. Deterministic variables may have semi-infinite bounds. var_fix (list or None): Variables to fix to nominal levels. Note that variables with domain width zero will automatically be fixed. df_init (DataFrame): Initial guesses for parameters; overrides n_restart n_restart (int): Number of restarts to try; the first try is at the nominal conditions of the model. Returned model will use the least-error parameter set among restarts tested. n_maxiter (int): Optimizer maximum iterations verbose (bool): Print best-fit parameters to console? uq_method (str OR None): If string, select method to quantify parameter uncertainties. If None, provide best-fit values only. Methods: uq_method = "linpool": assume normal errors; linearly approximate parameter effects; equally pool variance matrices for each output Returns: gr.Model: Model for evaluation with best-fit variables frozen to optimized levels. Examples: >>> import grama as gr >>> from grama.data import df_trajectory_windowed >>> from grama.models import make_trajectory_linear >>> X = gr.Intention() >>> >>> md_trajectory = make_trajectory_linear() >>> md_fitted = ( >>> df_trajectory_windowed >>> >> gr.ft_nls( >>> md=md_trajectory, >>> uq_method="linpool", >>> ) >>> ) """ ## Check `out` invariants if out is None: out = md.out print("... fit_nls setting out = {}".format(out)) ## Check invariants if md is None: raise ValueError("Must provide model md") ## Determine variables to be fixed if var_fix is None: var_fix = set() else: var_fix = set(var_fix) for var in md.var_det: wid = md.domain.get_width(var) if wid == 0: var_fix.add(var) ## Run eval_nls to fit model parameter values df_fit = eval_nls( md, df_data=df_data, var_fix=var_fix, df_init=df_init, append=True, verbose=verbose, **kwargs, ) ## Select best-fit values df_best = df_fit.sort_values(by="mse", axis=0).iloc[[0]].reset_index(drop=True) if verbose: print(df_fit.sort_values(by="mse", axis=0)) ## Determine variables that were fitted var_fitted = list(set(md.var).intersection(set(df_best.columns))) var_remain = list(set(md.var).difference(set(var_fitted))) if len(var_remain) == 0: raise ValueError("Resulting model is constant!") ## Assemble and return fitted model if md.name is None: name = "(Fitted Model)" else: name = md.name + " (Fitted)" ## Calibrate parametric uncertainty, if requested if uq_method == "linpool": ## Precompute data df_nom = eval_nominal(md, df_det="nom") df_base = tran_outer( df_data, concat((df_best[var_fitted], df_nom[var_fix]), axis=1)) df_pred = eval_df(md, df=df_base) df_grad = eval_grad_fd(md, df_base=df_base, var=var_fitted) ## Pool variance matrices n_obs = df_data.shape[0] n_fitted = len(var_fitted) Sigma_pooled = zeros((n_fitted, n_fitted)) for output in out: ## Approximate sigma_sq sigma_sq = npsum( nppow(df_data[output].values - df_pred[output].values, 2)) / (n_obs - n_fitted) ## Approximate (pseudo)-inverse hessian var_grad = list(map(lambda v: "D" + output + "_D" + v, var_fitted)) Z = df_grad[var_grad].values Hinv = pinv(Z.T.dot(Z), hermitian=True) ## Add variance matrix to pooled Sigma Sigma_pooled = Sigma_pooled + sigma_sq * Hinv / n_fitted ## Check model for identifiability kappa_out = cond(Sigma_pooled) if kappa_out > 1e10: warn( "Model is locally unidentifiable as measured by the " + "condition number of the pooled covariance matrix; " + "kappa = {}".format(kappa_out), RuntimeWarning, ) ## Convert to std deviations and correlation sigma_comp = npsqrt(diag(Sigma_pooled)) corr_mat = Sigma_pooled / (atleast_2d(sigma_comp).T.dot( atleast_2d(sigma_comp))) corr_data = [] I, J = triu_indices(n_fitted, k=1) for ind in range(len(I)): i = I[ind] j = J[ind] corr_data.append([var_fitted[i], var_fitted[j], corr_mat[i, j]]) df_corr = DataFrame(data=corr_data, columns=["var1", "var2", "corr"]) ## Assemble marginals marginals = {} for ind, var_ in enumerate(var_fitted): marginals[var_] = { "dist": "norm", "loc": df_best[var_].values[0], "scale": sigma_comp[ind], } ## Construct model with Gaussian copula if len(var_fix) > 0: md_res = (Model(name) >> cp_function( lambda x: df_nom[var_fix].values, var=set(var_remain).difference(var_fix), out=var_fix, name="Fix variable levels", ) >> cp_md_det(md=md) >> cp_marginals(**marginals) >> cp_copula_gaussian(df_corr=df_corr)) else: md_res = (Model(name) >> cp_md_det(md=md) >> cp_marginals( **marginals) >> cp_copula_gaussian(df_corr=df_corr)) ## Return deterministic model elif uq_method is None: md_res = (Model(name) >> cp_function( lambda x: df_best[var_fitted].values, var=var_remain, out=var_fitted, name="Fix variable levels", ) >> cp_md_det(md=md)) else: raise ValueError( "uq_method option {} not recognized".format(uq_method)) return md_res
def eval_grad_fd(model, h=1e-8, df_base=None, var=None, append=True, skip=False): r"""Finite-difference gradient approximation Evaluates a given model with a central-difference stencil to approximate the gradient. Args: model (gr.Model): Model to differentiate h (numeric): finite difference stepsize, single (scalar): or per-input (array) df_base (DataFrame): Base-points for gradient calculations var (list(str) or string): list of variables to differentiate, or flag; "rand" for var_rand, "det" for var_det append (bool): Append results to base point inputs? skip (bool): Skip evaluation of the functions? Returns: DataFrame: Gradient approximation or unevaluated design @pre (not isinstance(h, collections.Sequence)) | (h.shape[0] == df_base.shape[1]) Examples: >>> import grama as gr >>> from grama.models import make_cantilever_beam >>> md = make_cantilever_beam() >>> df_nom = md >> gr.ev_nominal(df_det="nom") >>> df_grad = md >> gr.ev_grad_fd(df_base=df_nom) >>> df_grad >> gr.tf_gather("var", "val", gr.everything()) """ ## Check invariants if not set(model.var).issubset(set(df_base.columns)): raise ValueError("model.var must be subset of df_base.columns") if var is None: var = model.var elif isinstance(var, str): if var == "rand": var = model.var_rand elif var == "det": var = model.var_det else: raise ValueError("var flag not recognized; use 'rand' or 'det'") else: if not set(var).issubset(set(model.var)): raise ValueError("var must be subset of model.var") var_fix = list(set(model.var).difference(set(var))) ## TODO if skip == True: raise NotImplementedError("skip not implemented") ## Build stencil n_var = len(var) stencil = eye(n_var) * h stepscale = tile(atleast_2d(0.5 / h).T, (1, model.n_out)) outputs = model.out nested_labels = [ list(map(lambda s_out: "D" + s_out + "_D" + s_var, outputs)) for s_var in var ] grad_labels = list(itertools.chain.from_iterable(nested_labels)) ## Loop over df_base results = [] # TODO: Preallocate? for row_i in range(df_base.shape[0]): ## Evaluate df_left = eval_df( model, tran_outer( DataFrame(columns=var, data=-stencil + df_base[var].iloc[[row_i]].values), df_base[var_fix].iloc[[row_i]], ), append=False, ) df_right = eval_df( model, tran_outer( DataFrame(columns=var, data=+stencil + df_base[var].iloc[[row_i]].values), df_base[var_fix].iloc[[row_i]], ), append=False, ) ## Compute differences res = (stepscale * (df_right[outputs] - df_left[outputs]).values).flatten() df_grad = DataFrame(columns=grad_labels, data=[res]) results.append(df_grad) return concat(results).reset_index(drop=True)