## Construct model with Gaussian copula if len(var_fix) > 0: md_res = (Model(name) >> cp_function( lambda x: df_nom[var_fix].values, var=set(var_remain).difference(var_fix), out=var_fix, name="Fix variable levels", ) >> cp_md_det(md=md) >> cp_marginals(**marginals) >> cp_copula_gaussian(df_corr=df_corr)) else: md_res = (Model(name) >> cp_md_det(md=md) >> cp_marginals( **marginals) >> cp_copula_gaussian(df_corr=df_corr)) ## Return deterministic model elif uq_method is None: md_res = (Model(name) >> cp_function( lambda x: df_best[var_fitted].values, var=var_remain, out=var_fitted, name="Fix variable levels", ) >> cp_md_det(md=md)) else: raise ValueError( "uq_method option {} not recognized".format(uq_method)) return md_res ft_nls = add_pipe(fit_nls)
### Add index column to dataset longer = df.reset_index().melt(id_vars="index", var_name=names_to, value_vars=columns, value_name=values_to) ### rename index column to desired: index_to longer.rename(columns={'index': index_to}, inplace=True) longer = index_to_cleanup(df, longer, data_index) return longer ###################################### tf_pivot_longer = add_pipe(tran_pivot_longer) def tran_pivot_wider( df, #id_cols, names_from, indexes_from=None, #names_prefix, #names_sep, #names_glue = None, #names_sort = False, #names_glue, values_from=None, #values_fill = None, #values_fn = None
df_res = concat((df_res, df_tmp), axis=0) else: warn("Output {0:} had no contours at level {1:}".format( o, t, )) ## Remove dummy column, if present if "_foo" in df_res.columns: df_res.drop("_foo", axis=1, inplace=True) # Drop index df_res = df_res.reset_index(drop=True) ## Attach metadata with catch_warnings(): simplefilter("ignore") df_res._plot_info = { "type": "contour", "var": var, "out": "out", "level": "level", "aux": has_aux, } ## Return the results return df_res ev_contour = add_pipe(eval_contour)
"Deterministic variables of md_base and md_var must match:\n" + "md_base is missing: {}\n".format(var_new.difference(var_base)) + "md_new is missing: {}".format(var_base.difference(var_new))) # Check that `weights` name does not collide if (var_weight in df_base.columns) and append: raise ValueError( "Weight name {} already in df_base.columns; ".format(var_weight) + "choose a new name.") ## Compute weight values # Use base model for importance distribution q = md_base.density.d(df_base) # Use new model for nominal distribution p = md_new.density.d(df_base) # Compute likelihood ratio w = p / q ## Return results df_res = DataFrame({var_weight: w}) if append: df_res = concat( [df_base.reset_index(drop=True), df_res], axis=1, ) return df_res tf_reweight = add_pipe(tran_reweight)
## Apply df_res = eval_df(model, df=df_samp, append=append) ## For autoplot with catch_warnings(): simplefilter("ignore") df_res._plot_info = { "type": "sinew_outputs", "var": model.var_rand, "out": model.out, } return df_res ev_sinews = add_pipe(eval_sinews) ## Hybrid points for Sobol' indices # -------------------------------------------------- @curry def eval_hybrid( model, n=1, plan="first", df_det=None, varname="hybrid_var", seed=None, append=True, skip=False, ):
raise ValueError("out must be subset of df.columns") ## Default input value if var is None: var = list(set(df.columns).difference(set(out))) ## Check more invariants set_inter = set(out).intersection(set(var)) if len(set_inter) > 0: raise ValueError( "outputs and inputs must be disjoint; intersect = {}".format( set_inter)) if not set(var).issubset(set(df.columns)): raise ValueError("var must be subset of df.columns") ## Construct gaussian process for each output functions = [] for output in out: rf = RandomForestRegressor(**kwargs) set_seed(seed) rf.fit(df[var].values, df[output].values) name = "RF" fun = FunctionRFR(rf, var, [output], name, 0, return_std) functions.append(fun) ## Construct model return gr.Model(functions=functions, domain=domain, density=density) ft_lolo = add_pipe(fit_lolo)
"-2": "blue", "-1": "darkturquoise", "0": "black", "+1": "salmon", "+2": "red" }, ) + scale_shape_manual( name="Patterns", values={ "Below Limit": "s", "Above Limit": "s", "Low Run": "X", "High Run": "X", "Increasing Run": "^", "Decreasing Run": "v", "None": "." }, ) + scale_linetype_manual( name="Guideline", values=dict(LCL="dashed", UCL="dashed", center="solid"), ) + guides(color=None) + facet_grid( "_var~.", scales="free_y", labeller=labeller(dict(X="Mean", S="Variability")), ) + labs( x="Group variable ({})".format(group), y="Value ({})".format(var), )) pt_xbs = add_pipe(plot_xbs)
Returns: DataFrame: Results of evaluation or unevaluated design Notes: - Wrapper on pyDOE.lhs """ ## Set seed only if given if seed is not None: set_seed(seed) ## Ensure sample count is int if not isinstance(n, Integral): print("eval_lhs() is rounding n...") n = int(n) ## Draw samples df_quant = DataFrame(data=lhs(model.n_var_rand, samples=n), columns=model.var_rand) ## Convert samples to desired marginals df_rand = model.density.pr2sample(df_quant) ## Construct outer-product DOE df_samp = model.var_outer(df_rand, df_det=df_det) if skip: return df_samp else: return gr.eval_df(model, df=df_samp, append=append) ev_lhs = add_pipe(eval_lhs)
## Featurize try: featurizer = ElementProperty.from_preset(preset_name=preset_name) except NameError as e: error_string = str(e) raise NameError( error_string + "\n\nThis function requires the `matminer` package. " + "Try running the following to install the package:\n" " pip install matminer" ) df_res = StrToComposition().featurize_dataframe( df[[var_formula]], var_formula, ignore_errors=ignore_errors, ) df_res = featurizer.featurize_dataframe( df_res, col_id="composition", ignore_errors=ignore_errors, **kwargs, ) df_res.drop(columns=[var_formula, "composition"], inplace=True) ## Concatenate as necessary if append: df_res = concat((df, df_res), axis=1) return df_res tf_feat_composition = add_pipe(tran_feat_composition)
df_res = model.evaluate_df(df) if append: df_res = concat( [ df.reset_index(drop=True).drop( model.out, axis=1, errors="ignore"), df_res, ], axis=1, ) return df_res ev_df = add_pipe(eval_df) ## Nominal evaluation # -------------------------------------------------- @curry def eval_nominal(model, df_det=None, append=True, skip=False): r"""Evaluate model at nominal values Evaluates a given model at a model nominal conditions (median). Args: model (gr.Model): Model to evaluate df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels. append (bool): Append results to nominal inputs?
>>> from grama.models import make_cantilever_beam >>> md = make_cantilever_beam() >>> md >> \ >>> gr.ev_monte_carlo(n=100, df_det="nom", skip=True) >> \ >>> gr.pt_scattermat(var=md.var) >>> plt.show() """ if var is None: raise ValueError("Must provide input columns list as keyword var") ## Plot return pairplot(data=df, vars=var) pt_scattermat = add_pipe(plot_scattermat) @curry def plot_hists(df, out=None): r"""Construct histograms Create a set of histograms. Often used to visualize the results of random sampling for multiple outputs. Args: out (list of strings): Variables to plot Returns: Seaborn histogram plot
fun = FunctionGPR(gpr, var, [output], name, 0, var_min, var_max) functions.append(fun) except NameError as e: error_string = str(e) raise NameError(error_string + "\n\nThis function requires the `sklearn` package. " + "Try running the following to install the package:\n" " pip install scikit-learn") ## Construct model return Model(functions=functions, domain=domain, density=density) ft_gp = add_pipe(fit_gp) ## Fit random forest model with sklearn # -------------------------------------------------- @curry def fit_rf(df, md=None, var=None, out=None, domain=None, density=None, seed=None, suppress_warnings=True, **kwargs): r"""Fit a random forest
""" n_obs, n_in = df.shape ## Parse formulae for output names n_out = len(formulae) outputs = [""] * n_out for ind in range(n_out): ind_start = formulae[ind].find("~") outputs[ind] = formulae[ind][:ind_start].strip() ## Construct fits fits = [] for ind in range(n_out): fits.append(smf.ols(formulae[ind], data=df).fit()) def fit_all(df_new): n_obs_new, _ = df_new.shape result = zeros((n_obs_new, n_out)) for ind in range(n_out): result[:, ind] = fits[ind].predict(df_new) return DataFrame(data=result, columns=outputs) ## Construct model return gr.model_vectorized(function=fit_all, outputs=outputs, domain=domain, density=density) ft_ols = add_pipe(fit_ols)
df_tmp["n_iter"] = [res.nit] df_tmp["mse"] = [res.fun] return df_tmp df_res = DataFrame() for i in range(n_restart): df_tmp = fun_mp(i) df_res = concat((df_res, df_tmp), axis=0).reset_index(drop=True) ## Post-process if append: return df_res return df_res[var_fit] ev_nls = add_pipe(eval_nls) ## Minimize # -------------------------------------------------- @curry def eval_min( model, out_min=None, out_geq=None, out_leq=None, out_eq=None, method="SLSQP", tol=1e-6, n_restart=1, n_maxiter=50,
tf(**summaries_all) # >> tf_mutate(_kfold=i) ) if var_fold is None: df_summary_tmp = df_summary_tmp >> tf_mutate(_kfold=i) else: df_summary_tmp[var_fold] = levels[i] df_res = concat((df_res, df_summary_tmp), axis=0).reset_index(drop=True) return df_res tf_kfolds = add_pipe(tran_kfolds) ## Bootstrap utility # -------------------------------------------------- @curry def tran_bootstrap(df, tran=None, n_boot=500, n_sub=25, con=0.90, col_sel=None, seed=None): r"""Estimate bootstrap confidence intervals Estimate bootstrap confidence intervals for a given transform. Uses the
outputs = df_res.drop(typename, axis=1).columns df_res[outputs] = df_res[outputs].apply(lambda row: round(row, decimals=digits)) df_res.sort_values(typename, inplace=True) ## Filter, if necessary if not full: I_normalized = list(map(lambda s: s[0] == "S", df_res[typename])) df_res = df_res[I_normalized] ## Fill NaN's df_res.fillna(value=0, inplace=True) return df_res tf_sobol = add_pipe(tran_sobol) ## Linear algebra tools ################################################## ## Principal Component Analysis (PCA) @curry def tran_pca(df, var=None, lamvar="lam", standardize=False): r"""Principal Component Analysis Compute principal directions and eigenvalues for a dataset. Can specify columns to analyze, or just analyze all numerical columns. Args: df (DataFrame): Data to analyze var (list of str or None): List of columns to analyze lambvar (str): Name to give eigenvalue column; default="lam"
poset = powerset(set(range(n)).difference({j})) data = zeros((s, len(out))) df_tmp = DataFrame(columns=out, data=data) for p in poset: den = n * comb(n - 1, len(p)) for t in range(s): if t in inds: t1 = cohort_mean(t, list(set(p).union({j}))) t0 = cohort_mean(t, p) df_tmp.iloc[t] = df_tmp.iloc[t] + (t1 - t0).loc[0] / den else: df_tmp.iloc[t] = NaN return df_tmp ## Compute cohort shapley over all variables df_res = DataFrame() for j in range(n): df_tmp = cohort_shapley(j) df_tmp.columns = [df_tmp.columns[i] + "_" + var[j] for i in range(len(out))] df_res = concat((df_res, df_tmp), axis=1) return df_res tf_shapley_cohort = add_pipe(tran_shapley_cohort)
## Concatenate as necessary if keep: df_res = concat( (df_res.reset_index(drop=True), df[var_leftover].reset_index(drop=True)), axis=1, ) if append: df_res = concat( (df_res.reset_index(drop=True), df[var].reset_index(drop=True)), axis=1) return df_res tf_tsne = add_pipe(tran_tsne) # -------------------------------------------------- @curry def tran_poly(df, degree=None, var=None, keep=True, **kwargs): r"""Compute polynomial features of a dataset Compute polynomial features of a dataset. Args: df (DataFrame): Hybrid point results from gr.eval_hybrid() Kwargs: degree (int): Maximum degree of polynomial features var (list or None): Variables in df on which to perform dimension reduction.
return df_samp else: df_res = gr.eval_df(model, df=df_samp, append=append) ## Attach metadata with warnings.catch_warnings(): warnings.simplefilter("ignore") df_res._plot_info = { "type": "monte_carlo_outputs", "out": model.out } return df_res ev_monte_carlo = add_pipe(eval_monte_carlo) ## Marginal sweeps with random origins # -------------------------------------------------- @curry def eval_sinews( model, n_density=10, n_sweeps=3, seed=None, df_det=None, varname="sweep_var", indname="sweep_ind", append=True, skip=False,
), axis=1, sort=False, ) df_inner[key] = [fun_star] df_return = concat((df_return, df_inner), axis=0, sort=False) if not append: df_return = (df_return.groupby(model.var_det).agg( {s: max for s in betas.keys()}).reset_index()) return df_return ev_form_pma = add_pipe(eval_form_pma) @curry def eval_form_ria( model, limits=None, cons=None, df_corr=None, df_det=None, append=True, tol=1e-3, n_maxiter=25, n_restart=1, verbose=False, ):
# Extract values Y = df[var].values if standardize: Y_mean = Y.mean(axis=0) Y_sd = Y.std(axis=0) Y = (Y - Y_mean) / Y_sd # Generate initial proposal points X0 = _perturbed_choice(Y, n) ## Run sp.ccp algorithm X, d, iter_c = _sp_cpp(X0, Y, delta=tol, iter_max=n_maxiter) if verbose: print( "tran_sp finished in {0:} iterations with distance criterion {1:4.3e}" .format(iter_c, d)) if d > tol: warn( "Convergence tolerance not met; d = {0:4.3e} > tol = {1:4.3e}". format(d, tol), RuntimeWarning, ) if standardize: X = X * Y_sd + Y_mean ## Package results return DataFrame(data=X, columns=var) tf_sp = add_pipe(tran_sp)
""" model_new = model.copy() ## Dispatch to core builder for consistent behavior fun, var, out, name, runtime = _comp_function_data(model, fun, var, out, name, runtime) ## Add new function model_new.functions.append(gr.Function(fun, var, out, name, runtime)) model_new.update() return model_new cp_function = add_pipe(comp_function) # Add vectorized function # ------------------------- @curry def comp_vec_function(model, fun=None, var=None, out=None, name=None, runtime=0): r"""Add a vectorized function to a model Composition. Add a function to an existing model. Function must be vectorized over DataFrames, and must add new columns matching its `out`
seed=seed) ### Package outputs df_pnd = DataFrame({ "pr_scores": pr_scores, "var_values": var_values, }) if append: return df_test.reset_index(drop=True).merge(df_pnd, left_index=True, right_index=True) return df_pnd ev_pnd = add_pipe(eval_pnd) # Relative Pareto frontier calculation def pareto_min_rel(X_test, X_base=None): r"""Determine if rows in X_test are optimal, compared to X_base Finds the Pareto-efficient test-points that minimize the column values, relative to a given set of base-points. Args: X_test (2d numpy array): Test point observations; rows are observations, columns are features X_base (2d numpy array): Base point observations; rows are observations, columns are features Returns: array of boolean values: Indicates if test observation is Pareto-efficient, relative to base points
try: df_res = DataFrame( data=UMAP(n_components=n_dim, random_state=seed, **kwargs).fit_transform(df[var].values), columns=[out + "{}".format(i) for i in range(n_dim)], ) except NameError as e: error_string = str(e) raise NameError(error_string + "\n\nThis function requires the `umap` package. " + "Try running the following to install the package:\n" " pip install umap-learn") ## Concatenate as necessary if keep: df_res = concat( (df_res.reset_index(drop=True), df[var_leftover].reset_index(drop=True)), axis=1, ) if append: df_res = concat( (df_res.reset_index(drop=True), df[var].reset_index(drop=True)), axis=1) return df_res tf_umap = add_pipe(tran_umap)
Returns: gr.model: Metamodel """ ## Extract model information inputs = model.domain.inputs outputs = model.outputs ## Assign default arguments if ev is None: ev = gr.eval_lhs if ft is None: # Linear features for each output sum_inputs = "+".join(inputs) formulae = list(map(lambda output: output + "~" + sum_inputs, outputs)) ft = lambda df: gr.fit_ols( df, formulae=formulae, domain=model.domain, density=model.density) ## Generate data df_results = ev(model, n_samples=n, seed=seed) ## Fit a model model = ft(df_results) return model cp_metamodel = add_pipe(comp_metamodel)
df >> ggplot() + geom_segment( aes( var[0], var[1], xend=var[0]+"_end", yend=var[1]+"_end", linetype=out, color=level, ) ) ) pt_contour = add_pipe(plot_contour) ## tran_iocorr # -------------------------------------------------- @curry def plot_corrtile(df, var=None, out=None, corr=None): r""" """ return ( df >> ggplot(aes(var, out)) + geom_tile(aes(fill=corr)) + scale_fill_gradient2(name="Corr", midpoint=0) + theme(axis_text_x=element_text(angle=270)) )