def eval_contour( model, var=None, out=None, df=None, levels=None, n_side=20, n_levels=5, ): r"""Generate contours from a model Generates contours from a model. Evaluates the model on a dense grid, then runs marching squares to generate contours. Supports targeting multiple outputs and handling auxiliary inputs not included in the contour map. Args: model (gr.Model): Model to evaluate. var (list of str): Model inputs to target; must provide exactly two inputs, and both must have finite domain width. out (list of str): Model output(s) for contour generation. df (DataFrame or None): Levels for model variables not included in var (auxiliary inputs). If provided var and model.var contain the same values, then df may equal None. levels (dict): Specific output levels for contour generation; overrides n_levels. n_side (int): Side resolution for grid; n_side**2 total evaluations. n_levels (int): Number of contour levels. Returns: DataFrame: Points along contours, organized by output and auxiliary variable levels. Examples:: import grama as gr ## Multiple outputs ( gr.Model() >> gr.cp_vec_function( fun=lambda df: gr.df_make( f=df.x**2 + df.y**2, g=df.x + df.y, ), var=["x", "y"], out=["f", "g"], ) >> gr.cp_bounds( x=(-1, +1), y=(-1, +1), ) >> gr.ev_contour( var=["x", "y"], out=["f", "g"], ) # Contours with no auxiliary variables can autoplot >> gr.pt_auto() ) ## Auxiliary inputs ( gr.Model() >> gr.cp_vec_function( fun=lambda df: gr.df_make( f=df.c * df.x + (1 - df.c) * df.y, ), var=["x", "y"], out=["f", "g"], ) >> gr.cp_bounds( x=(-1, +1), y=(-1, +1), ) >> gr.ev_contour( var=["x", "y"], out=["f"], df=gr.df_make(c=[0, 1]) ) # Contours with auxiliary variables should be manually plotted >> gr.ggplot(gr.aes("x", "y")) + gr.geom_segment(gr.aes(xend="x_end", yend="y_end", group="level", color="c")) ) """ ## Check invariants invariants_eval_model(model) invariants_eval_df(df, acc_none=True) # Argument given if var is None: raise ValueError("No `var` given") # Correct number of inputs if len(var) != 2: raise ValueError("Must provide exactly 2 inputs in `var`.") # Inputs available var_diff = set(var).difference(set(model.var)) if len(var_diff) > 0: raise ValueError( "`var` must be a subset of model.var; missing: {}".format( var_diff)) # All inputs supported var_diff = set(model.var).difference(set(var)) if len(var_diff) > 0: if df is None: raise ValueError( "Must provide values for remaining model variables using df; " + "missing values: {}".format(var_diff)) # Drop the swept variables df = df.drop(columns=var, errors="ignore") # Check for unsupported inputs var_diff2 = var_diff.difference(set(df.columns)) if len(var_diff2) > 0: raise ValueError( "All model variables need values in provided df; " + "missing values: {}".format(var_diff2)) if df.shape[0] > 1: has_aux = True else: has_aux = False else: has_aux = False # Finite bound width if not all([ isfinite(model.domain.get_width(v)) and (model.domain.get_width(v) > 0) for v in var ]): raise ValueError( "All model bounds for `var` must be finite and nonzero") # Argument given if out is None: raise ValueError("No `out` given") # Outputs available out_diff = set(out).difference(set(model.out)) if len(out_diff) > 0: raise ValueError( "`out` must be a subset of model.out; missing: {}".format( out_diff)) ## Generate data xv = linspace(*model.domain.get_bound(var[0]), n_side) yv = linspace(*model.domain.get_bound(var[1]), n_side) df_x = DataFrame({var[0]: xv}) df_y = DataFrame({var[1]: yv}) df_input = (df_x >> tf_outer(df_outer=df_y)) # Create singleton level if necessary if df is None: df = DataFrame({"_foo": [0]}) ## Loop over provided auxiliary levels df_res = DataFrame() for i in range(df.shape[0]): df_in_tmp = (df_input >> tf_outer(df_outer=df.iloc[[i]])) df_out = eval_df( model, df=df_in_tmp, ) ## Set output threshold levels if levels is None: # Do not overwrite `levels`, to adapt per loop levels_wk = dict( zip(out, [ linspace(df_out[o].min(), df_out[o].max(), n_levels + 2)[1:-1] for o in out ])) else: levels_wk = levels ## Run marching squares # Output quantity for o in out: # Reshape data Data = reshape(df_out[o].values, (n_side, n_side)) # Threshold level for t in levels_wk[o]: # Run marching squares segments = marching_square(xv, yv, Data, t) sqdata = array(segments).squeeze() if len(sqdata) > 0: # Package df_tmp = DataFrame( data=sqdata, columns=[ var[0], var[1], var[0] + "_end", var[1] + "_end" ], ) df_tmp["out"] = [o] * df_tmp.shape[0] df_tmp["level"] = [t] * df_tmp.shape[0] df_tmp = (df_tmp >> tf_outer(df_outer=df.iloc[[i]])) df_res = concat((df_res, df_tmp), axis=0) else: warn("Output {0:} had no contours at level {1:}".format( o, t, )) ## Remove dummy column, if present if "_foo" in df_res.columns: df_res.drop("_foo", axis=1, inplace=True) # Drop index df_res = df_res.reset_index(drop=True) ## Attach metadata with catch_warnings(): simplefilter("ignore") df_res._plot_info = { "type": "contour", "var": var, "out": "out", "level": "level", "aux": has_aux, } ## Return the results return df_res
def eval_form_ria( model, limits=None, cons=None, df_corr=None, df_det=None, append=True, tol=1e-3, n_maxiter=25, n_restart=1, verbose=False, ): r"""Tail reliability via FORM RIA Approximate the desired tail probability using the reliability index approach (RIA) of the first-order reliability method (FORM) [1]. Select limit states to analyze with list input `limits`. Provide confidence levels `cons` and estimator covariance `df_corr` to compute with margin in beta [2]. Note that the reliability index approach (RIA) is generally less stable than the performance measure approach (PMA). Consider using ``gr.eval_form_pma()`` instead, particularly when using FORM to optimize a design. Args: model (gr.Model): Model to analyze limits (list): Target limit states; must be in model.out; limit state assumed to be critical at g == 0. cons (dict or None): Target confidence levels; key = limit state name; must be in model.out value = confidence level, \in (0, 1) df_corr (DataFrame or None): Sampling distribution covariance entries; parameters with no information assumed to be known exactly. df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels. n_maxiter (int): Maximum iterations for each optimization run n_restart (int): Number of restarts (== number of optimization runs) append (bool): Append MPP results for random values? verbose (bool): Print optimization results? Returns: DataFrame: Results of MPP search Notes: Since FORM RIA relies on optimization over the limit state, it is often beneficial to scale your limit state to keep values near unity. References: [1] Tu, Choi, and Park, "A new study on reliability-based design optimization," Journal of Mechanical Design, 1999 [2] del Rosario, Fenrich, and Iaccarino, "Fast precision margin with the first-order reliability method," AIAA Journal, 2019 Examples:: import grama as gr from grama.models import make_cantilever_beam md_beam = make_cantilever_beam() ## Evaluate the reliability of specified designs ( md_beam >> gr.ev_form_ria( # Specify limit states to analyze limits=("g_stress", "g_disp"), # Analyze three different thicknesses df_det=gr.df_make(t=[2, 3, 4], w=3) ) ) """ ## Check invariants invariants_eval_model(model) invariants_eval_df(df_corr, arg_name="df_corr", acc_none=True) invariants_eval_df(df_det, arg_name="df_det", valid_str=["nom"]) if limits is None: raise ValueError( "Must provide `limits` keyword argument to define reliability targets" ) if not set(limits).issubset(set(model.out)): raise ValueError("`limits` must be subset of model.out") if not cons is None: if not (set(cons.keys()) == set(limits)): raise ValueError("cons.keys() must be same as limits") else: if df_corr is None: raise ValueError("Must provide df_corr is using cons") raise NotImplementedError df_det = model.var_outer( DataFrame(data=zeros((1, model.n_var_rand)), columns=model.var_rand), df_det=df_det, ) df_det = df_det[model.var_det] # df_return = DataFrame(columns=model.var_rand + model.var_det + limits) df_return = DataFrame() for ind in range(df_det.shape[0]): ## Loop over objectives for key in limits: ## Temp dataframe df_inner = df_det.iloc[[ind]].reset_index(drop=True) ## Construct lambdas def fun_jac(z): ## Squared reliability index fun = z.dot(z) jac = 2 * z * length(z) return (fun, jac) def con_limit(z): ## Transform: standard normal-to-random variable df_norm = DataFrame(data=[z], columns=model.var_rand) df_rand = model.norm2rand(df_norm) df = model.var_outer(df_rand, df_det=df_inner) ## Eval limit state df_res = eval_df(model, df=df) g = df_res[key].iloc[0] return g ## Use conservative direction for initial guess signs = array( [model.density.marginals[k].sign for k in model.var_rand]) if length(signs) > 0: z0 = signs / length(signs) else: z0 = ones(model.n_var_rand) / sqrt(model.n_var_rand) ## Minimize res_all = [] for jnd in range(n_restart): res = minimize( fun_jac, z0, args=(), method="SLSQP", jac=True, tol=tol, options={ "maxiter": n_maxiter, "disp": False }, constraints=[{ "type": "eq", "fun": con_limit }], ) # Append only a successful result if res["status"] == 0: res_all.append(res) # Set a random start; repeat z0 = multivariate_normal([0] * model.n_var_rand, eye(model.n_var_rand)) z0 = z0 / length(z0) # Choose value among restarts n_iter_total = sum([res_all[i].nit for i in range(len(res_all))]) if len(res_all) > 0: i_star = argmin([res.fun for res in res_all]) x_star = res_all[i_star].x fun_star = sqrt(res_all[i_star].fun) if verbose: print("out = {}: Optimization successful".format(key)) print("n_iter = {}".format(res_all[i_star].nit)) print("n_iter_total = {}".format(n_iter_total)) else: ## WARNING x_star = [NaN] * model.n_var_rand fun_star = NaN if verbose: print("out = {}: Optimization unsuccessful".format(key)) print("n_iter = {}".format(res_all[i_star].nit)) print("n_iter_total = {}".format(n_iter_total)) ## Extract results if append: df_inner = concat( ( df_inner, model.norm2rand( DataFrame(data=[x_star], columns=model.var_rand)), ), axis=1, sort=False, ) df_inner["beta_" + key] = [fun_star] df_return = concat((df_return, df_inner), axis=0, sort=False) if not append: df_return = ( df_return.groupby(model.var_det) \ .agg({"beta_" + s: max for s in limits}).reset_index() ) return df_return
def eval_form_pma( model, betas=None, cons=None, df_corr=None, df_det=None, append=True, tol=1e-3, n_maxiter=25, n_restart=1, verbose=False, ): r"""Tail quantile via FORM PMA Approximate the desired tail quantiles using the performance measure approach (PMA) of the first-order reliability method (FORM) [1]. Select limit states to minimize at desired quantile with `betas`. Provide confidence levels `cons` and estimator covariance `df_corr` to compute with margin in beta [2]. Note that under the performance measure approach, the optimized limit state value `g` is sought to be non-negative $g \geq 0$. This is usually included as a constraint in optimization, which can be accomplished in by using ``gr.eval_form_pnd()` *within* a model definition---see the Examples below for more details. Args: model (gr.Model): Model to analyze betas (dict): Target reliability indices; key = limit state name; must be in model.out value = reliability index; beta = Phi^{-1}(reliability) cons (dict or None): Target confidence levels; key = limit state name; must be in model.out value = confidence level, \in (0, 1) df_corr (DataFrame or None): Sampling distribution covariance entries; parameters with no information assumed to be known exactly. df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels. n_maxiter (int): Maximum iterations for each optimization run n_restart (int): Number of restarts (== number of optimization runs) append (bool): Append MPP results for random values? verbose (bool): Print optimization results? Returns: DataFrame: Results of MPP search Notes: Since FORM PMA relies on optimization over the limit state, it is often beneficial to scale your limit state to keep values near unity. References: Tu, Choi, and Park, "A new study on reliability-based design optimization," Journal of Mechanical Design, 1999 del Rosario, Fenrich, and Iaccarino, "Fast precision margin with the first-order reliability method," AIAA Journal, 2019 Examples:: import grama as gr from grama.models import make_cantilever_beam md_beam = make_cantilever_beam() ## Evaluate the reliability of specified designs ( md_beam >> gr.ev_form_pma( # Specify target reliability betas=dict(g_stress=3, g_disp=3), # Analyze three different thicknesses df_det=gr.df_make(t=[2, 3, 4], w=3) ) ) ## Build a nested model for optimization under uncertainty md_opt = ( gr.Model("Beam Optimization") >> gr.cp_vec_function( fun=lambda df: gr.df_make(c_area=df.w * df.t), var=["w", "t"], out=["c_area"], name="Area objective", ) >> gr.cp_vec_function( fun=lambda df: gr.eval_form_pma( md_beam, betas=dict(g_stress=3, g_disp=3), df_det=df, append=False, ) var=["w", "t"], out=["g_stress", "g_disp"], name="Reliability constraints", ) >> gr.cp_bounds(w=(2, 4), t=(2, 4)) ) # Run the optimization ( md_opt >> gr.ev_min( out_min="c_area", out_geq=["g_stress", "g_disp"], ) ) """ ## Check invariants invariants_eval_model(model) invariants_eval_df(df_corr, arg_name="df_corr", acc_none=True) invariants_eval_df(df_det, arg_name="df_det", valid_str=["nom"]) if betas is None: raise ValueError( "Must provide `betas` keyword argument to define reliability targets" ) if not set(betas.keys()).issubset(set(model.out)): raise ValueError("betas.keys() must be subset of model.out") if not cons is None: if not (set(cons.keys()) == set(betas.keys())): raise ValueError("cons.keys() must be same as betas.keys()") else: if df_corr is None: raise ValueError("Must provide df_corr is using cons") raise NotImplementedError df_det = model.var_outer( DataFrame(data=zeros((1, model.n_var_rand)), columns=model.var_rand), df_det=df_det, ) df_det = df_det[model.var_det] df_return = DataFrame() for ind in range(df_det.shape[0]): ## Loop over objectives for key in betas.keys(): ## Temp dataframe df_inner = df_det.iloc[[ind]].reset_index(drop=True) ## Construct lambdas def objective(z): ## Transform: standard normal-to-random variable df_norm = DataFrame(data=[z], columns=model.var_rand) df_rand = model.norm2rand(df_norm) df = model.var_outer(df_rand, df_det=df_inner) df_res = eval_df(model, df=df) g = df_res[key].iloc[0] # return (g, jac) return g def con_beta(z): return z.dot(z) - (betas[key])**2 ## Use conservative direction for initial guess signs = array( [model.density.marginals[k].sign for k in model.var_rand]) if length(signs) > 0: z0 = betas[key] * signs / length(signs) else: z0 = betas[key] * ones(model.n_var_rand) / sqrt( model.n_var_rand) ## Minimize res_all = [] for jnd in range(n_restart): res = minimize( objective, z0, args=(), method="SLSQP", jac=False, tol=tol, options={ "maxiter": n_maxiter, "disp": False }, constraints=[{ "type": "eq", "fun": con_beta }], ) # Append only a successful result if res["status"] == 0: res_all.append(res) # Set a random start; repeat z0 = multivariate_normal([0] * model.n_var_rand, eye(model.n_var_rand)) z0 = z0 / length(z0) * betas[key] # Choose value among restarts n_iter_total = sum([res_all[i].nit for i in range(len(res_all))]) if len(res_all) > 0: i_star = argmin([res.fun for res in res_all]) x_star = res_all[i_star].x fun_star = res_all[i_star].fun if verbose: print("out = {}: Optimization successful".format(key)) print("n_iter = {}".format(res_all[i_star].nit)) print("n_iter_total = {}".format(n_iter_total)) else: ## WARNING x_star = [NaN] * model.n_var_rand fun_star = NaN if verbose: print("out = {}: Optimization unsuccessful".format(key)) print("n_iter = {}".format(res_all[i_star].nit)) print("n_iter_total = {}".format(n_iter_total)) ## Extract results if append: df_inner = concat( ( df_inner, model.norm2rand( DataFrame(data=[x_star], columns=model.var_rand)), ), axis=1, sort=False, ) df_inner[key] = [fun_star] df_return = concat((df_return, df_inner), axis=0, sort=False) if not append: df_return = (df_return.groupby(model.var_det).agg( {s: max for s in betas.keys()}).reset_index()) return df_return
def eval_sinews( model, n_density=10, n_sweeps=3, seed=None, df_det=None, varname="sweep_var", indname="sweep_ind", append=True, skip=False, ): r"""Sweep study Perform coordinate sweeps over each model random variable ("sinew" design). Use random starting points drawn from the joint density. Optionally sweep the deterministic variables. For more expensive models, it can be helpful to tune n_density and n_sweeps to achieve a reasonable runtime. Use gr.plot_auto() to construct a quick visualization of the output dataframe. Use `skip` version to visualize the design, and non-skipped version to visualize the results. Args: model (gr.Model): Model to evaluate n_density (numeric): Number of points along each sweep n_sweeps (numeric): Number of sweeps per-random variable seed (int): Random seed to use df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels, use "swp" to sweep deterministic variables varname (str): Column name to give for sweep variable; default="sweep_var" indname (str): Column name to give for sweep index; default="sweep_ind" append (bool): Append results to conservative inputs? skip (bool): Skip evaluation of the functions? Returns: DataFrame: Results of evaluation or unevaluated design Examples:: import grama as gr md = gr.make_cantilever_beam() # Skip evaluation, used to visualize the design (input points) df_design = md >> gr.ev_sinews(df_det="nom", skip=True) df_design >> gr.pt_auto() # Visualize the input-to-output relationships of the model df_sinew = md >> gr.ev_sinews(df_det="nom") df_sinew >> gr.pt_auto() """ ## Common invariant checks invariants_eval_model(model, skip) invariants_eval_df(df_det, arg_name="df_det", valid_str=["nom", "swp"]) ## Override model if deterministic sweeps desired if df_det == "swp": ## Collect sweep-able deterministic variables var_sweep = list( filter( lambda v: isfinite(model.domain.get_width(v)) & (model.domain.get_width(v) > 0), model.var_det, )) ## Generate pseudo-marginals dicts_var = {} for v in var_sweep: dicts_var[v] = { "dist": "uniform", "loc": model.domain.get_bound(v)[0], "scale": model.domain.get_width(v), } ## Overwrite model model = comp_marginals(model, **dicts_var) ## Restore flag df_det = "nom" ## Set seed only if given if seed is not None: set_seed(seed) ## Ensure sample count is int if not isinstance(n_density, Integral): print("eval_sinews() is rounding n_density...") n_density = int(n_density) if not isinstance(n_sweeps, Integral): print("eval_sinews() is rounding n_sweeps...") n_sweeps = int(n_sweeps) ## Build quantile sweep data q_random = tile(random((1, model.n_var_rand, n_sweeps)), (n_density, 1, 1)) q_dense = linspace(0, 1, num=n_density) Q_all = zeros((n_density * n_sweeps * model.n_var_rand, model.n_var_rand)) C_var = ["tmp"] * (n_density * n_sweeps * model.n_var_rand) C_ind = [0] * (n_density * n_sweeps * model.n_var_rand) ## Interlace for i_input in range(model.n_var_rand): ind_base = i_input * n_density * n_sweeps for i_sweep in range(n_sweeps): ind_start = ind_base + i_sweep * n_density ind_end = ind_base + (i_sweep + 1) * n_density Q_all[ind_start:ind_end] = q_random[:, :, i_sweep] Q_all[ind_start:ind_end, i_input] = q_dense C_var[ind_start:ind_end] = [model.var_rand[i_input]] * n_density C_ind[ind_start:ind_end] = [i_sweep] * n_density ## Modify endpoints for infinite support if not isfinite( model.density.marginals[model.var_rand[i_input]].q(0)): Q_all[ind_start, i_input] = 1 / n_density / 10 if not isfinite( model.density.marginals[model.var_rand[i_input]].q(1)): Q_all[ind_end - 1, i_input] = 1 - 1 / n_density / 10 ## Assemble sampling plan df_pr = DataFrame(data=Q_all, columns=model.var_rand) df_rand = model.density.pr2sample(df_pr) df_rand[varname] = C_var df_rand[indname] = C_ind ## Construct outer-product DOE df_samp = model.var_outer(df_rand, df_det=df_det) if skip: ## Evaluation estimate runtime_est = model.runtime(df_samp.shape[0]) if runtime_est > 0: print( "Estimated runtime for design with model ({0:1}):\n {1:4.3} sec" .format(model.name, runtime_est)) else: print( "Design runtime estimates unavailable; model has no timing data." ) ## For autoplot with catch_warnings(): simplefilter("ignore") df_samp._plot_info = { "type": "sinew_inputs", "var": model.var_rand } ## Pass-through return df_samp ## Apply df_res = eval_df(model, df=df_samp, append=append) ## For autoplot with catch_warnings(): simplefilter("ignore") df_res._plot_info = { "type": "sinew_outputs", "var": model.var_rand, "out": model.out, } return df_res
def eval_hybrid( model, n=1, plan="first", df_det=None, varname="hybrid_var", seed=None, append=True, skip=False, ): r"""Hybrid points for Sobol' indices Use the "hybrid point" design (Sobol', 1999) to support estimating Sobol' indices. Use gr.tran_sobol() to post-process the results and compute estimates. Args: model (gr.Model): Model to evaluate; must have CopulaIndependence n (numeric): Number of points along each sweep plan (str): Sobol' index to compute; plan={"first", "total"} seed (int): Random seed to use df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels. varname (str): Column name to give for sweep variable; default="hybrid_var" append (bool): Append results to conservative inputs? skip (bool): Skip evaluation of the functions? Returns: DataFrame: Results of evaluation or unevaluated design References: I.M. Sobol', "Sensitivity Estimates for Nonlinear Mathematical Models" (1999) MMCE, Vol 1. Examples:: import grama as gr md = gr.make_cantilever_beam() ## Compute the first-order indices df_first = md >> gr.ev_hybrid(df_det="nom", plan="first") df_first >> gr.tf_sobol() ## Compute the total-order indices df_total = md >> gr.ev_hybrid(df_det="nom", plan="total") df_total >> gr.tf_sobol() """ ## Check invariants invariants_eval_model(model, skip) invariants_eval_df(df_det, arg_name="df_det", valid_str=["nom"]) if not isinstance(model.density.copula, CopulaIndependence): raise ValueError( "model must have CopulaIndependence structure;\n" + "Sobol' indices only defined for independent variables") ## Set seed only if given if seed is not None: set_seed(seed) if not isinstance(n, Integral): print("eval_hybrid() is rounding n...") n = int(n) ## Draw hybrid points X = random((n, model.n_var_rand)) Z = random((n, model.n_var_rand)) ## Reserve space Q_all = zeros((n * (model.n_var_rand + 1), model.n_var_rand)) Q_all[:n] = X # Base samples C_var = ["_"] * (n * (model.n_var_rand + 1)) ## Interleave samples for i_in in range(model.n_var_rand): i_start = (i_in + 1) * n i_end = (i_in + 2) * n if plan == "first": Q_all[i_start:i_end, :] = Z Q_all[i_start:i_end, i_in] = X[:, i_in] elif plan == "total": Q_all[i_start:i_end, :] = X Q_all[i_start:i_end, i_in] = Z[:, i_in] else: raise ValueError("plan must be `first` or `total`") C_var[i_start:i_end] = [model.var_rand[i_in]] * n ## Construct sampling plan df_pr = DataFrame(data=Q_all, columns=model.var_rand) ## Convert samples to desired marginals df_rand = model.density.pr2sample(df_pr) df_rand[varname] = C_var ## Construct outer-product DOE df_samp = model.var_outer(df_rand, df_det=df_det) if skip: with catch_warnings(): simplefilter("ignore") df_samp._meta = dict( type="eval_hybrid", varname=varname, plan=plan, var_rand=model.var_rand, out=model.out, ) return df_samp df_res = eval_df(model, df=df_samp, append=append) with catch_warnings(): simplefilter("ignore") df_res._meta = dict( type="eval_hybrid", varname=varname, plan=plan, var_rand=model.var_rand, out=model.out, ) return df_res
def eval_pnd(model, df_train, df_test, signs, n=int(1e4), seed=None, append=True, \ mean_prefix="_mean", sd_prefix="_sd"): """Approximate the probability non-dominated (PND) Approximates the probability non-dominated (PND) for a set of training points given a fitted probabilistic model. Used to rank a set of candidates in the context of multiobjective optimization. Args: model (gr.model): predictive model to evaluate df_train (DataFrame): dataframe with training data df_test (DataFrame): dataframe with test data signs (dict): dict with the variables you would like to use and minimization or maximization parameter for each append (bool): Append df_test to pnd algorithm outputs Kwargs: n (int): Number of draws for importance sampler seed (int): declarble seed value for reproducibility Returns: DataFrame: Results of predictive model going through a PND algorithm. Conatians both values and their scores. References: del Rosario, Zachary, et al. "Assessing the frontier: Active learning, model accuracy, and multi-objective candidate discovery and optimization." The Journal of Chemical Physics 153.2 (2020): 024112. Examples:: import grama as gr ## Define a ground-truth model md_true = gr.make_pareto_random() df_data = ( md_true >> gr.ev_sample(n=2e3, seed=101, df_det="nom") ) ## Generate test/train data df_train = ( df_data >> gr.tf_sample(n=10) ) df_test = ( df_data >> gr.anti_join( df_train, by = ["x1","x2"] ) >> gr.tf_sample(n=200) ) ## Fit a model to training data md_fit = ( df_train >> gr.ft_gp( var=["x1","x2"] out=["y1","y2"] ) ) ## Rank training points by PND algorithm df_pnd = ( md_fit >> gr.ev_pnd( df_train, df_test, signs = {"y1":1, "y2":1}, seed = 101 ) >> gr.tf_arrange(gr.desc(DF.pr_scores)) ) """ invariants_eval_model(model) invariants_eval_df(df_train, arg_name="df_train") invariants_eval_df(df_test, arg_name="df_test") # Check content if len(model.out)/2 < 2: raise ValueError('Given Model needs multiple outputs') if not set(model.var).issubset(set(df_train.columns)): raise ValueError("model.var must be subset of df_train.columns") if not set(model.var).issubset(set(df_test.columns)): raise ValueError("model.var must be subset of df_test.columns") for key in signs.keys(): if key+mean_prefix not in model.out: raise ValueError(f"signs.{key} implies output {key+mean_prefix}, which is not found in provided md.out") if key+sd_prefix not in model.out: raise ValueError(f"signs{key} implies output {key+sd_prefix}, which is not found in provided sd.out") ## Compute predictions and predicted uncertainties df_pred = ( df_test >> tf_md(md=model) ) ## Setup for reshaping means = [] sds = [] columns = df_train.columns.values length = int(len(signs.keys())) outputs = [key for key in signs.keys() if key in columns] signs = [value for value in signs.values()] ## append mean and sd prefixes for i, value in enumerate(outputs): means.append(value+mean_prefix) sds.append(value+sd_prefix) ## Remove extra columns from df_test df_pred = df_pred[means + sds] ## Reshape data for PND algorithm X_pred = df_pred[means].values # Predicted response values X_sig = df_pred[sds].values # Predictive uncertainties X_train = df_train[outputs].values # Training ### Create covariance matrices X_cov = zeros((X_sig.shape[0], length, length)) for l in range(length): for i in range(X_sig.shape[0]): X_cov[i, l, l] = X_sig[i, l] ### Apply pnd pr_scores, var_values = approx_pnd( X_pred, X_cov, X_train, signs = signs, n = n, seed = seed ) ### Package outputs df_pnd = DataFrame( { "pr_scores": pr_scores, "var_values": var_values, } ) if append: return df_test.reset_index(drop=True).merge(df_pnd, left_index=True, right_index=True) return df_pnd
def eval_min( model, out_min=None, out_geq=None, out_leq=None, out_eq=None, method="SLSQP", tol=1e-6, n_restart=1, n_maxiter=50, seed=None, df_start=None, ): r"""Constrained minimization using functions from a model Perform constrained minimization using functions from a model. Model must have deterministic variables only. Wrapper for scipy.optimize.minimize Args: model (gr.Model): Model to analyze. All model variables must be deterministic. out_min (str): Output to use as minimization objective. out_geq (None OR list of str): Outputs to use as geq constraints; out >= 0 out_leq (None OR list of str): Outputs to use as leq constraints; out <= 0 out_eq (None OR list of str): Outputs to use as equality constraints; out == 0 method (str): Optimization method; see the documentation for scipy.optimize.minimize for options. tol (float): Optimization objective convergence tolerance n_restart (int): Number of restarts; beyond n_restart=1 random restarts are used. df_start (None or DataFrame): Specific starting values to use; overrides n_restart if non None provided. Returns: DataFrame: Results of optimization Examples:: import grama as gr ## Define a model with objective and constraints md = ( gr.Model("Constrained Rosenbrock") >> gr.cp_function( fun=lambda x: (1 - x[0])**2 + 100*(x[1] - x[0]**2)**2, var=["x", "y"], out=["c"], ) >> gr.cp_function( fun=lambda x: (x[0] - 1)**3 - x[1] + 1, var=["x", "y"], out=["g1"], ) >> gr.cp_function( fun=lambda x: x[0] + x[1] - 2, var=["x", "y"], out=["g2"], ) >> gr.cp_bounds( x=(-1.5, +1.5), y=(-0.5, +2.5), ) ) ## Run the optimizer md >> gr.ev_min( out_min="c", out_leq=["g1", "g2"] ) """ ## Common invariant checks invariants_eval_model(model) invariants_eval_df(df_start, arg_name="df_start", acc_none=True) ## Check that model has only deterministic variables if model.n_var_rand > 0: raise ValueError("model must have no random variables") ## Check that objective is in model if not (out_min in model.out): raise ValueError("model must contain out_min") ## Check that constraints are in model if not (out_geq is None): out_diff = set(out_geq).difference(set(model.out)) if len(out_diff) > 0: raise ValueError( "model must contain each out_geq; missing {}".format(out_diff)) if not (out_leq is None): out_diff = set(out_leq).difference(set(model.out)) if len(out_diff) > 0: raise ValueError( "model must contain each out_leq; missing {}".format(out_diff)) if not (out_eq is None): out_diff = set(out_eq).difference(set(model.out)) if len(out_diff) > 0: raise ValueError( "model must contain each out_eq; missing {}".format(out_diff)) ## Formulate initial guess df_nom = eval_nominal(model, df_det="nom", skip=True) if df_start is None: df_start = df_nom[model.var] if n_restart > 1: if not (seed is None): setseed(seed) ## Collect sweep-able deterministic variables var_sweep = list( filter( lambda v: isfinite(model.domain.get_width(v)) & (model.domain.get_width(v) > 0), model.var_det, )) ## Generate pseudo-marginals dicts_var = {} for v in var_sweep: dicts_var[v] = { "dist": "uniform", "loc": model.domain.get_bound(v)[0], "scale": model.domain.get_width(v), } ## Overwrite model md_sweep = comp_marginals(model, **dicts_var) md_sweep = comp_copula_independence(md_sweep) ## Generate random start points df_rand = eval_sample( md_sweep, n=n_restart - 1, df_det="nom", skip=True, ) df_start = concat((df_start, df_rand[model.var]), axis=0).reset_index(drop=True) else: n_restart = df_start.shape[0] ## Factory for wrapping model's output def make_fun(out, sign=+1): def fun(x): df = DataFrame([x], columns=model.var) df_res = eval_df(model, df) return sign * df_res[out] return fun ## Create helper functions for constraints constraints = [] if not (out_geq is None): for out in out_geq: constraints.append({ "type": "ineq", "fun": make_fun(out), }) if not (out_leq is None): for out in out_leq: constraints.append({ "type": "ineq", "fun": make_fun(out, sign=-1), }) if not (out_eq is None): for out in out_eq: constraints.append({ "type": "eq", "fun": make_fun(out), }) ## Parse the bounds for minimize bounds = list(map(lambda k: model.domain.bounds[k], model.var)) ## Run optimization df_res = DataFrame() for i in range(n_restart): x0 = df_start[model.var].iloc[i].values res = minimize( make_fun(out_min), x0, args=(), method=method, jac=False, tol=tol, options={ "maxiter": n_maxiter, "disp": False }, constraints=constraints, bounds=bounds, ) df_opt = df_make( **dict(zip(model.var, res.x)), **dict(zip(map(lambda s: s + "_0", model.var), x0)), ) df_tmp = eval_df(model, df=df_opt) df_tmp["success"] = [res.success] df_tmp["message"] = [res.message] df_tmp["n_iter"] = [res.nit] df_res = concat((df_res, df_tmp), axis=0).reset_index(drop=True) return df_res
def eval_nls( model, df_data=None, out=None, var_fix=None, df_init=None, append=False, tol=1e-6, ftol=1e-9, gtol=1e-5, n_maxiter=100, n_restart=1, n_process=1, method="L-BFGS-B", seed=None, verbose=True, ): r"""Estimate with Nonlinear Least Squares (NLS) Estimate best-fit variable levels with nonlinear least squares (NLS). Args: model (gr.Model): Model to analyze. All model variables selected for fitting must be bounded or random. Deterministic variables may have semi-infinite bounds. df_data (DataFrame): Data for estimating parameters. Variables not found in df_data optimized in fitting. out (list or None): Output contributions to consider in computing MSE. Assumed to be model.out if left as None. var_fix (list or None): Variables to fix to nominal levels. Note that variables with domain width zero will automatically be fixed. df_init (DataFrame or None): Initial guesses for parameters; overrides n_restart append (bool): Append metadata? (Initial guess, MSE, optimizer status) tol (float): Optimizer convergence tolerance n_maxiter (int): Optimizer maximum iterations n_restart (int): Number of restarts; beyond n_restart=1 random restarts are used. seed (int OR None): Random seed for restarts verbose (bool): Print messages to console? Returns: DataFrame: Results of estimation Examples:: import grama as gr from grama.data import df_trajectory_full from grama.models import make_trajectory_linear md_trajectory = make_trajectory_linear() df_fit = ( md_trajectory >> gr.ev_nls(df_data=df_trajectory_full) ) print(df_fit) """ ## Common invariant checks invariants_eval_model(model) invariants_eval_df(df_data, arg_name="df_data") invariants_eval_df(df_init, arg_name="df_init", acc_none=True) ## Check `out` invariants if out is None: out = model.out if verbose: print("... eval_nls setting out = {}".format(out)) set_diff = set(out).difference(set(df_data.columns)) if len(set_diff) > 0: raise ValueError("out must be subset of df_data.columns\n" + "difference = {}".format(set_diff)) ## Determine variables to be fixed if var_fix is None: var_fix = set() else: var_fix = set(var_fix) for var in model.var_det: wid = model.domain.get_width(var) if wid == 0: var_fix.add(var) if verbose: print("... eval_nls setting var_fix = {}".format(list(var_fix))) var_fix = list(var_fix) ## Determine variables for evaluation var_feat = set(model.var).intersection(set(df_data.columns)) if verbose: print("... eval_nls setting var_feat = {}".format(var_feat)) var_feat = list(var_feat) ## Determine variables for fitting var_fit = set(model.var).difference(set(var_fix).union(set(var_feat))) if len(var_fit) == 0: raise ValueError("No var selected for fitting!\n" + "Try checking model bounds and df_data.columns.") var_fit = list(var_fit) ## Separate var_fit into det and rand var_fit_det = list(set(model.var_det).intersection(var_fit)) var_fit_rand = list(set(model.var_rand).intersection(var_fit)) ## Construct bounds, fix var_fit order var_fit = var_fit_det + var_fit_rand bounds = [] var_prob = [] for var in var_fit_det: if not isfinite(model.domain.get_nominal(var)): var_prob.append(var) bounds.append(model.domain.get_bound(var)) if len(var_prob) > 0: raise ValueError( "all variables to be fitted must finite nominal value\n" + "offending var = {}".format(var_prob)) for var in var_fit_rand: bounds.append(( model.density.marginals[var].q(0), model.density.marginals[var].q(1), )) ## Determine initial guess points df_nom = eval_nominal(model, df_det="nom", skip=True) ## Use specified initial guess(es) if not (df_init is None): # Check invariants set_diff = list(set(var_fit).difference(set(df_init.columns))) if len(set_diff) > 0: raise ValueError("var_fit must be subset of df_init.columns\n" + "difference = {}".format(set_diff)) # Pull n_restart n_restart = df_init.shape[0] ## Generate initial guess(es) else: df_init = df_nom[var_fit] if n_restart > 1: if not (seed is None): setseed(seed) ## Collect sweep-able deterministic variables var_sweep = list( filter( lambda v: isfinite(model.domain.get_width(v)) & (model.domain.get_width(v) > 0), model.var_det, )) ## Generate pseudo-marginals dicts_var = {} for v in var_sweep: dicts_var[v] = { "dist": "uniform", "loc": model.domain.get_bound(v)[0], "scale": model.domain.get_width(v), } ## Overwrite model md_sweep = comp_marginals(model, **dicts_var) md_sweep = comp_copula_independence(md_sweep) ## Generate random start points df_rand = eval_sample( md_sweep, n=n_restart - 1, df_det="nom", skip=True, ) df_init = concat((df_init, df_rand[var_fit]), axis=0).reset_index(drop=True) ## Iterate over initial guesses df_res = DataFrame() def fun_mp(i): x0 = df_init[var_fit].iloc[i].values ## Build evaluator def objective(x): """x = [var_fit]""" ## Evaluate model df_var = tran_outer( df_data[var_feat], concat( (df_nom[var_fix].iloc[[0]], df_make(**dict(zip(var_fit, x)))), axis=1, ), ) df_tmp = eval_df(model, df=df_var) ## Compute joint MSE return ((df_tmp[out].values - df_data[out].values)**2).mean() ## Run optimization res = minimize( objective, x0, args=(), method=method, jac=False, tol=tol, options={ "maxiter": n_maxiter, "disp": False, "ftol": ftol, "gtol": gtol, }, bounds=bounds, ) df_tmp = df_make( **dict(zip(var_fit, res.x)), **dict(zip(map(lambda s: s + "_0", var_fit), x0)), ) df_tmp["success"] = [res.success] df_tmp["message"] = [res.message] df_tmp["n_iter"] = [res.nit] df_tmp["mse"] = [res.fun] return df_tmp df_res = DataFrame() for i in range(n_restart): df_tmp = fun_mp(i) df_res = concat((df_res, df_tmp), axis=0).reset_index(drop=True) ## Post-process if append: return df_res return df_res[var_fit]