def fit_nls( df_data, md=None, out=None, var_fix=None, df_init=None, verbose=True, uq_method=None, **kwargs, ): r"""Fit a model with Nonlinear Least Squares (NLS) Estimate best-fit variable levels with nonlinear least squares (NLS), and return an executable model with those frozen best-fit levels. Optionally, fit a distribution on the parameters to quantify parametric uncertainty. Note: This is a *synonym* for eval_nls(); see the documentation for eval_nls() for keyword argument options available beyond those listed here. Args: df_data (DataFrame): Data for estimating best-fit variable levels. Variables not found in df_data optimized for fitting. md (gr.Model): Model to analyze. All model variables selected for fitting must be bounded or random. Deterministic variables may have semi-infinite bounds. var_fix (list or None): Variables to fix to nominal levels. Note that variables with domain width zero will automatically be fixed. df_init (DataFrame): Initial guesses for parameters; overrides n_restart n_restart (int): Number of restarts to try; the first try is at the nominal conditions of the model. Returned model will use the least-error parameter set among restarts tested. n_maxiter (int): Optimizer maximum iterations verbose (bool): Print best-fit parameters to console? uq_method (str OR None): If string, select method to quantify parameter uncertainties. If None, provide best-fit values only. Methods: uq_method = "linpool": assume normal errors; linearly approximate parameter effects; equally pool variance matrices for each output Returns: gr.Model: Model for evaluation with best-fit variables frozen to optimized levels. Examples: >>> import grama as gr >>> from grama.data import df_trajectory_windowed >>> from grama.models import make_trajectory_linear >>> X = gr.Intention() >>> >>> md_trajectory = make_trajectory_linear() >>> md_fitted = ( >>> df_trajectory_windowed >>> >> gr.ft_nls( >>> md=md_trajectory, >>> uq_method="linpool", >>> ) >>> ) """ ## Check `out` invariants if out is None: out = md.out print("... fit_nls setting out = {}".format(out)) ## Check invariants if md is None: raise ValueError("Must provide model md") ## Determine variables to be fixed if var_fix is None: var_fix = set() else: var_fix = set(var_fix) for var in md.var_det: wid = md.domain.get_width(var) if wid == 0: var_fix.add(var) ## Run eval_nls to fit model parameter values df_fit = eval_nls( md, df_data=df_data, var_fix=var_fix, df_init=df_init, append=True, verbose=verbose, **kwargs, ) ## Select best-fit values df_best = df_fit.sort_values(by="mse", axis=0).iloc[[0]].reset_index(drop=True) if verbose: print(df_fit.sort_values(by="mse", axis=0)) ## Determine variables that were fitted var_fitted = list(set(md.var).intersection(set(df_best.columns))) var_remain = list(set(md.var).difference(set(var_fitted))) if len(var_remain) == 0: raise ValueError("Resulting model is constant!") ## Assemble and return fitted model if md.name is None: name = "(Fitted Model)" else: name = md.name + " (Fitted)" ## Calibrate parametric uncertainty, if requested if uq_method == "linpool": ## Precompute data df_nom = eval_nominal(md, df_det="nom") df_base = tran_outer( df_data, concat((df_best[var_fitted], df_nom[var_fix]), axis=1)) df_pred = eval_df(md, df=df_base) df_grad = eval_grad_fd(md, df_base=df_base, var=var_fitted) ## Pool variance matrices n_obs = df_data.shape[0] n_fitted = len(var_fitted) Sigma_pooled = zeros((n_fitted, n_fitted)) for output in out: ## Approximate sigma_sq sigma_sq = npsum( nppow(df_data[output].values - df_pred[output].values, 2)) / (n_obs - n_fitted) ## Approximate (pseudo)-inverse hessian var_grad = list(map(lambda v: "D" + output + "_D" + v, var_fitted)) Z = df_grad[var_grad].values Hinv = pinv(Z.T.dot(Z), hermitian=True) ## Add variance matrix to pooled Sigma Sigma_pooled = Sigma_pooled + sigma_sq * Hinv / n_fitted ## Check model for identifiability kappa_out = cond(Sigma_pooled) if kappa_out > 1e10: warn( "Model is locally unidentifiable as measured by the " + "condition number of the pooled covariance matrix; " + "kappa = {}".format(kappa_out), RuntimeWarning, ) ## Convert to std deviations and correlation sigma_comp = npsqrt(diag(Sigma_pooled)) corr_mat = Sigma_pooled / (atleast_2d(sigma_comp).T.dot( atleast_2d(sigma_comp))) corr_data = [] I, J = triu_indices(n_fitted, k=1) for ind in range(len(I)): i = I[ind] j = J[ind] corr_data.append([var_fitted[i], var_fitted[j], corr_mat[i, j]]) df_corr = DataFrame(data=corr_data, columns=["var1", "var2", "corr"]) ## Assemble marginals marginals = {} for ind, var_ in enumerate(var_fitted): marginals[var_] = { "dist": "norm", "loc": df_best[var_].values[0], "scale": sigma_comp[ind], } ## Construct model with Gaussian copula if len(var_fix) > 0: md_res = (Model(name) >> cp_function( lambda x: df_nom[var_fix].values, var=set(var_remain).difference(var_fix), out=var_fix, name="Fix variable levels", ) >> cp_md_det(md=md) >> cp_marginals(**marginals) >> cp_copula_gaussian(df_corr=df_corr)) else: md_res = (Model(name) >> cp_md_det(md=md) >> cp_marginals( **marginals) >> cp_copula_gaussian(df_corr=df_corr)) ## Return deterministic model elif uq_method is None: md_res = (Model(name) >> cp_function( lambda x: df_best[var_fitted].values, var=var_remain, out=var_fitted, name="Fix variable levels", ) >> cp_md_det(md=md)) else: raise ValueError( "uq_method option {} not recognized".format(uq_method)) return md_res
def eval_nls( model, df_data=None, out=None, var_fix=None, df_init=None, append=False, tol=1e-6, ftol=1e-9, gtol=1e-5, n_maxiter=100, n_restart=1, n_process=1, method="L-BFGS-B", seed=None, verbose=True, ): r"""Estimate with Nonlinear Least Squares (NLS) Estimate best-fit variable levels with nonlinear least squares (NLS). Args: model (gr.Model): Model to analyze. All model variables selected for fitting must be bounded or random. Deterministic variables may have semi-infinite bounds. df_data (DataFrame): Data for estimating parameters. Variables not found in df_data optimized in fitting. out (list or None): Output contributions to consider in computing MSE. Assumed to be model.out if left as None. var_fix (list or None): Variables to fix to nominal levels. Note that variables with domain width zero will automatically be fixed. df_init (DataFrame): Initial guesses for parameters; overrides n_restart append (bool): Append metadata? (Initial guess, MSE, optimizer status) tol (float): Optimizer convergence tolerance n_maxiter (int): Optimizer maximum iterations n_restart (int): Number of restarts; beyond n_restart=1 random restarts are used. seed (int OR None): Random seed for restarts verbose (bool): Print messages to console? Returns: DataFrame: Results of estimation Examples: >>> import grama as gr >>> from grama.data import df_trajectory_full >>> from grama.models import make_trajectory_linear >>> >>> md_trajectory = make_trajectory_linear() >>> >>> df_fit = ( >>> md_trajectory >>> >> gr.ev_nls(df_data=df_trajectory_full) >>> ) >>> >>> print(df_fit) """ ## Check `out` invariants if out is None: out = model.out if verbose: print("... eval_nls setting out = {}".format(out)) set_diff = set(out).difference(set(df_data.columns)) if len(set_diff) > 0: raise ValueError("out must be subset of df_data.columns\n" + "difference = {}".format(set_diff)) ## Determine variables to be fixed if var_fix is None: var_fix = set() else: var_fix = set(var_fix) for var in model.var_det: wid = model.domain.get_width(var) if wid == 0: var_fix.add(var) if verbose: print("... eval_nls setting var_fix = {}".format(list(var_fix))) var_fix = list(var_fix) ## Determine variables for evaluation var_feat = set(model.var).intersection(set(df_data.columns)) if verbose: print("... eval_nls setting var_feat = {}".format(var_feat)) var_feat = list(var_feat) ## Determine variables for fitting var_fit = set(model.var).difference(set(var_fix).union(set(var_feat))) if len(var_fit) == 0: raise ValueError("No var selected for fitting!\n" + "Try checking model bounds and df_data.columns.") var_fit = list(var_fit) ## Separate var_fit into det and rand var_fit_det = list(set(model.var_det).intersection(var_fit)) var_fit_rand = list(set(model.var_rand).intersection(var_fit)) ## Construct bounds, fix var_fit order var_fit = var_fit_det + var_fit_rand bounds = [] var_prob = [] for var in var_fit_det: if not isfinite(model.domain.get_nominal(var)): var_prob.append(var) bounds.append(model.domain.get_bound(var)) if len(var_prob) > 0: raise ValueError( "all variables to be fitted must finite nominal value\n" + "offending var = {}".format(var_prob)) for var in var_fit_rand: bounds.append(( model.density.marginals[var].q(0), model.density.marginals[var].q(1), )) ## Determine initial guess points df_nom = eval_nominal(model, df_det="nom", skip=True) ## Use specified initial guess(es) if not (df_init is None): # Check invariants set_diff = list(set(var_fit).difference(set(df_init.columns))) if len(set_diff) > 0: raise ValueError("var_fit must be subset of df_init.columns\n" + "difference = {}".format(set_diff)) # Pull n_restart n_restart = df_init.shape[0] ## Generate initial guess(es) else: df_init = df_nom[var_fit] if n_restart > 1: if not (seed is None): setseed(seed) ## Collect sweep-able deterministic variables var_sweep = list( filter( lambda v: isfinite(model.domain.get_width(v)) & (model.domain.get_width(v) > 0), model.var_det, )) ## Generate pseudo-marginals dicts_var = {} for v in var_sweep: dicts_var[v] = { "dist": "uniform", "loc": model.domain.get_bound(v)[0], "scale": model.domain.get_width(v), } ## Overwrite model md_sweep = comp_marginals(model, **dicts_var) md_sweep = comp_copula_independence(md_sweep) ## Generate random start points df_rand = eval_sample( md_sweep, n=n_restart - 1, df_det="nom", skip=True, ) df_init = concat((df_init, df_rand[var_fit]), axis=0).reset_index(drop=True) ## Iterate over initial guesses df_res = DataFrame() def fun_mp(i): x0 = df_init[var_fit].iloc[i].values ## Build evaluator def objective(x): """x = [var_fit]""" ## Evaluate model df_var = tran_outer( df_data[var_feat], concat( (df_nom[var_fix].iloc[[0]], df_make(**dict(zip(var_fit, x)))), axis=1, ), ) df_tmp = eval_df(model, df=df_var) ## Compute joint MSE return ((df_tmp[out].values - df_data[out].values)**2).mean() ## Run optimization res = minimize( objective, x0, args=(), method=method, jac=False, tol=tol, options={ "maxiter": n_maxiter, "disp": False, "ftol": ftol, "gtol": gtol, }, bounds=bounds, ) df_tmp = df_make( **dict(zip(var_fit, res.x)), **dict(zip(map(lambda s: s + "_0", var_fit), x0)), ) df_tmp["success"] = [res.success] df_tmp["message"] = [res.message] df_tmp["n_iter"] = [res.nit] df_tmp["mse"] = [res.fun] return df_tmp df_res = DataFrame() for i in range(n_restart): df_tmp = fun_mp(i) df_res = concat((df_res, df_tmp), axis=0).reset_index(drop=True) ## Post-process if append: return df_res return df_res[var_fit]
def eval_min( model, out_min=None, out_geq=None, out_leq=None, out_eq=None, method="SLSQP", tol=1e-6, n_restart=1, n_maxiter=50, seed=None, df_start=None, ): r"""Constrained minimization using functions from a model Perform constrained minimization using functions from a model. Model must have deterministic variables only. Wrapper for scipy.optimize.minimize Args: model (gr.Model): Model to analyze. All model variables must be deterministic. out_min (str): Output to use as minimization objective. out_geq (None OR list of str): Outputs to use as geq constraints; out >= 0 out_leq (None OR list of str): Outputs to use as leq constraints; out <= 0 out_eq (None OR list of str): Outputs to use as equality constraints; out == 0 method (str): Optimization method; see the documentation for scipy.optimize.minimize for options. tol (float): Optimization objective convergence tolerance n_restart (int): Number of restarts; beyond n_restart=1 random restarts are used. df_start (None or DataFrame): Specific starting values to use; overrides n_restart if non None provided. Returns: DataFrame: Results of optimization Examples: >>> import grama as gr >>> md = ( >>> gr.Model("Constrained Rosenbrock") >>> >> gr.cp_function( >>> fun=lambda x: (1 - x[0])**2 + 100*(x[1] - x[0]**2)**2, >>> var=["x", "y"], >>> out=["c"], >>> ) >>> >> gr.cp_function( >>> fun=lambda x: (x[0] - 1)**3 - x[1] + 1, >>> var=["x", "y"], >>> out=["g1"], >>> ) >>> >> gr.cp_function( >>> fun=lambda x: x[0] + x[1] - 2, >>> var=["x", "y"], >>> out=["g2"], >>> ) >>> >> gr.cp_bounds( >>> x=(-1.5, +1.5), >>> y=(-0.5, +2.5), >>> ) >>> ) >>> md >> gr.ev_min( >>> out_min="c", >>> out_leq=["g1", "g2"] >>> ) """ ## Check that model has only deterministic variables if model.n_var_rand > 0: raise ValueError("model must have no random variables") ## Check that objective is in model if not (out_min in model.out): raise ValueError("model must contain out_min") ## Check that constraints are in model if not (out_geq is None): out_diff = set(out_geq).difference(set(model.out)) if len(out_diff) > 0: raise ValueError( "model must contain each out_geq; missing {}".format(out_diff)) if not (out_leq is None): out_diff = set(out_leq).difference(set(model.out)) if len(out_diff) > 0: raise ValueError( "model must contain each out_leq; missing {}".format(out_diff)) if not (out_eq is None): out_diff = set(out_eq).difference(set(model.out)) if len(out_diff) > 0: raise ValueError( "model must contain each out_eq; missing {}".format(out_diff)) ## Formulate initial guess df_nom = eval_nominal(model, df_det="nom", skip=True) if df_start is None: df_start = df_nom[model.var] if n_restart > 1: if not (seed is None): setseed(seed) ## Collect sweep-able deterministic variables var_sweep = list( filter( lambda v: isfinite(model.domain.get_width(v)) & (model.domain.get_width(v) > 0), model.var_det, )) ## Generate pseudo-marginals dicts_var = {} for v in var_sweep: dicts_var[v] = { "dist": "uniform", "loc": model.domain.get_bound(v)[0], "scale": model.domain.get_width(v), } ## Overwrite model md_sweep = comp_marginals(model, **dicts_var) md_sweep = comp_copula_independence(md_sweep) ## Generate random start points df_rand = eval_sample( md_sweep, n=n_restart - 1, df_det="nom", skip=True, ) df_start = concat((df_start, df_rand[model.var]), axis=0).reset_index(drop=True) else: n_restart = df_start.shape[0] ## Factory for wrapping model's output def make_fun(out, sign=+1): def fun(x): df = DataFrame([x], columns=model.var) df_res = eval_df(model, df) return sign * df_res[out] return fun ## Create helper functions for constraints constraints = [] if not (out_geq is None): for out in out_geq: constraints.append({ "type": "ineq", "fun": make_fun(out), }) if not (out_leq is None): for out in out_leq: constraints.append({ "type": "ineq", "fun": make_fun(out, sign=-1), }) if not (out_eq is None): for out in out_eq: constraints.append({ "type": "eq", "fun": make_fun(out), }) ## Parse the bounds for minimize bounds = list(map(lambda k: model.domain.bounds[k], model.var)) ## Run optimization df_res = DataFrame() for i in range(n_restart): x0 = df_start[model.var].iloc[i].values res = minimize( make_fun(out_min), x0, args=(), method=method, jac=False, tol=tol, options={ "maxiter": n_maxiter, "disp": False }, constraints=constraints, bounds=bounds, ) df_opt = df_make( **dict(zip(model.var, res.x)), **dict(zip(map(lambda s: s + "_0", model.var), x0)), ) df_tmp = eval_df(model, df=df_opt) df_tmp["success"] = [res.success] df_tmp["message"] = [res.message] df_tmp["n_iter"] = [res.nit] df_res = concat((df_res, df_tmp), axis=0).reset_index(drop=True) return df_res
def eval_nls( model, df_data=None, out=None, var_fix=None, append=False, tol=1e-3, maxiter=25, nrestart=1, ): r"""Estimate with Nonlinear Least Squares (NLS) Estimate best-fit variable levels with nonlinear least squares (NLS). Args: model (gr.Model): Model to analyze. All model variables selected for fitting must be bounded or random. Deterministic variables may have semi-infinite bounds. df_data (DataFrame): Data for estimating parameters. Variables not found in df_data optimized in fitting. out (list or None): Output contributions to consider in computing MSE. Assumed to be model.out if left as None. var_fix (list or None): Variables to fix to nominal levels. Note that variables with domain width zero will automatically be fixed. append (bool): Append metadata? (Initial guess, MSE, optimizer status) tol (float): Optimizer convergence tolerance maxiter (int): Optimizer maximum iterations nrestart (int): Number of restarts; beyond nrestart=1 random restarts are used. Returns: DataFrame: Results of estimation Examples: >>> import grama as gr >>> from grama.data import df_trajectory_full >>> from grama.models import make_trajectory_linear >>> >>> md_trajectory = make_trajectory_linear() >>> >>> df_fit = ( >>> md_trajectory >>> >> gr.ev_nls(df_data=df_trajectory_full) >>> ) >>> >>> print(df_fit) """ ## Check `out` invariants if out is None: out = model.out print("... eval_nls setting out = {}".format(out)) set_diff = set(out).difference(set(df_data.columns)) if len(set_diff) > 0: raise ValueError("out must be subset of df_data.columns\n" + "difference = {}".format(set_diff)) ## Determine variables to be fixed if var_fix is None: var_fix = set() else: var_fix = set(var_fix) for var in model.var_det: wid = model.domain.get_width(var) if wid == 0: var_fix.add(var) print("... eval_nls setting var_fix = {}".format(list(var_fix))) ## Determine variables for evaluation var_feat = set(model.var).intersection(set(df_data.columns)) print("... eval_nls setting var_feat = {}".format(list(var_feat))) ## Determine variables for fitting var_fit = set(model.var).difference(var_fix.union(var_feat)) if len(var_fit) == 0: raise ValueError("No var selected for fitting!\n" + "Try checking model bounds and df_data.columns.") ## Separate var_fit into det and rand var_fit_det = list(set(model.var_det).intersection(var_fit)) var_fit_rand = list(set(model.var_rand).intersection(var_fit)) ## Construct bounds, fix var_fit order var_fit = var_fit_det + var_fit_rand bounds = [] var_prob = [] for var in var_fit_det: if not isfinite(model.domain.get_nominal(var)): var_prob.append(var) bounds.append(model.domain.get_bound(var)) if len(var_prob) > 0: raise ValueError( "all variables to be fitted must finite nominal value\n" + "offending var = {}".format(var_prob)) for var in var_fit_rand: bounds.append(( model.density.marginals[var].q(0), model.density.marginals[var].q(1), )) ## Determine initial guess points df_nom = eval_nominal(model, df_det="nom", skip=True) df_init = df_nom[var_fit] if nrestart > 1: raise NotImplementedError() ## Iterate over initial guesses df_res = DataFrame() for i in range(df_init.shape[0]): x0 = df_init[var_fit].iloc[i].values ## Build evaluator def objective(x): """x = [var_fit]""" ## Evaluate model df_var = tran_outer( df_data[var_feat], concat( (df_nom[var_fix].iloc[[0]], df_make(**dict(zip(var_fit, x)))), axis=1, ), ) df_res = eval_df(model, df=df_var) ## Compute joint MSE return ((df_res[out].values - df_data[out].values)**2).mean() ## Run optimization res = minimize( objective, x0, args=(), method="SLSQP", jac=False, tol=tol, options={ "maxiter": maxiter, "disp": False }, bounds=bounds, ) df_res = concat( ( df_res, df_make( **dict(zip(var_fit, res.x)), **dict(zip(map(lambda s: s + "_0", var_fit), x0)), status=res.status, mse=res.fun, ), ), axis=0, ) ## Post-process if append: return df_res else: return df_res[var_fit]