Esempio n. 1
0
def eval_contour(
    model,
    var=None,
    out=None,
    df=None,
    levels=None,
    n_side=20,
    n_levels=5,
):
    r"""Generate contours from a model

    Generates contours from a model. Evaluates the model on a dense grid, then runs marching squares to generate contours. Supports targeting multiple outputs and handling auxiliary inputs not included in the contour map.

    Args:
        model (gr.Model): Model to evaluate.
        var (list of str): Model inputs to target; must provide exactly two inputs, and both must have finite domain width.
        out (list of str): Model output(s) for contour generation.
        df (DataFrame or None): Levels for model variables not included in var 
            (auxiliary inputs). If provided var and model.var contain the same
            values, then df may equal None.
        levels (dict): Specific output levels for contour generation; overrides n_levels.
        n_side (int): Side resolution for grid; n_side**2 total evaluations.
        n_levels (int): Number of contour levels.

    Returns:
        DataFrame: Points along contours, organized by output and auxiliary variable levels.

    Examples::

        import grama as gr
        ## Multiple outputs
        (
            gr.Model()
            >> gr.cp_vec_function(
                fun=lambda df: gr.df_make(
                    f=df.x**2 + df.y**2,
                    g=df.x + df.y,
                ),
                var=["x", "y"],
                out=["f", "g"],
            )
            >> gr.cp_bounds(
                x=(-1, +1),
                y=(-1, +1),
            )
            >> gr.ev_contour(
                var=["x", "y"],
                out=["f", "g"],
            )
            # Contours with no auxiliary variables can autoplot
            >> gr.pt_auto()
        )

        ## Auxiliary inputs
        (
            gr.Model()
            >> gr.cp_vec_function(
                fun=lambda df: gr.df_make(
                    f=df.c * df.x + (1 - df.c) * df.y,
                ),
                var=["x", "y"],
                out=["f", "g"],
            )
            >> gr.cp_bounds(
                x=(-1, +1),
                y=(-1, +1),
            )
            >> gr.ev_contour(
                var=["x", "y"],
                out=["f"],
                df=gr.df_make(c=[0, 1])
            )

            # Contours with auxiliary variables should be manually plotted
            >> gr.ggplot(gr.aes("x", "y"))
            + gr.geom_segment(gr.aes(xend="x_end", yend="y_end", group="level", color="c"))
        )

    """
    ## Check invariants
    invariants_eval_model(model)
    invariants_eval_df(df, acc_none=True)
    # Argument given
    if var is None:
        raise ValueError("No `var` given")
    # Correct number of inputs
    if len(var) != 2:
        raise ValueError("Must provide exactly 2 inputs in `var`.")
    # Inputs available
    var_diff = set(var).difference(set(model.var))
    if len(var_diff) > 0:
        raise ValueError(
            "`var` must be a subset of model.var; missing: {}".format(
                var_diff))
    # All inputs supported
    var_diff = set(model.var).difference(set(var))
    if len(var_diff) > 0:
        if df is None:
            raise ValueError(
                "Must provide values for remaining model variables using df; "
                + "missing values: {}".format(var_diff))
        # Drop the swept variables
        df = df.drop(columns=var, errors="ignore")

        # Check for unsupported inputs
        var_diff2 = var_diff.difference(set(df.columns))
        if len(var_diff2) > 0:
            raise ValueError(
                "All model variables need values in provided df; " +
                "missing values: {}".format(var_diff2))

        if df.shape[0] > 1:
            has_aux = True
        else:
            has_aux = False
    else:
        has_aux = False

    # Finite bound width
    if not all([
            isfinite(model.domain.get_width(v)) and
        (model.domain.get_width(v) > 0) for v in var
    ]):
        raise ValueError(
            "All model bounds for `var` must be finite and nonzero")

    # Argument given
    if out is None:
        raise ValueError("No `out` given")
    # Outputs available
    out_diff = set(out).difference(set(model.out))
    if len(out_diff) > 0:
        raise ValueError(
            "`out` must be a subset of model.out; missing: {}".format(
                out_diff))

    ## Generate data
    xv = linspace(*model.domain.get_bound(var[0]), n_side)
    yv = linspace(*model.domain.get_bound(var[1]), n_side)
    df_x = DataFrame({var[0]: xv})
    df_y = DataFrame({var[1]: yv})
    df_input = (df_x >> tf_outer(df_outer=df_y))

    # Create singleton level if necessary
    if df is None:
        df = DataFrame({"_foo": [0]})

    ## Loop over provided auxiliary levels
    df_res = DataFrame()
    for i in range(df.shape[0]):
        df_in_tmp = (df_input >> tf_outer(df_outer=df.iloc[[i]]))
        df_out = eval_df(
            model,
            df=df_in_tmp,
        )

        ## Set output threshold levels
        if levels is None:
            # Do not overwrite `levels`, to adapt per loop
            levels_wk = dict(
                zip(out, [
                    linspace(df_out[o].min(), df_out[o].max(),
                             n_levels + 2)[1:-1] for o in out
                ]))
        else:
            levels_wk = levels

        ## Run marching squares
        # Output quantity
        for o in out:
            # Reshape data
            Data = reshape(df_out[o].values, (n_side, n_side))
            # Threshold level
            for t in levels_wk[o]:
                # Run marching squares
                segments = marching_square(xv, yv, Data, t)
                sqdata = array(segments).squeeze()

                if len(sqdata) > 0:
                    # Package
                    df_tmp = DataFrame(
                        data=sqdata,
                        columns=[
                            var[0], var[1], var[0] + "_end", var[1] + "_end"
                        ],
                    )
                    df_tmp["out"] = [o] * df_tmp.shape[0]
                    df_tmp["level"] = [t] * df_tmp.shape[0]
                    df_tmp = (df_tmp >> tf_outer(df_outer=df.iloc[[i]]))

                    df_res = concat((df_res, df_tmp), axis=0)
                else:
                    warn("Output {0:} had no contours at level {1:}".format(
                        o,
                        t,
                    ))

    ## Remove dummy column, if present
    if "_foo" in df_res.columns:
        df_res.drop("_foo", axis=1, inplace=True)

    # Drop index
    df_res = df_res.reset_index(drop=True)

    ## Attach metadata
    with catch_warnings():
        simplefilter("ignore")
        df_res._plot_info = {
            "type": "contour",
            "var": var,
            "out": "out",
            "level": "level",
            "aux": has_aux,
        }

    ## Return the results
    return df_res
Esempio n. 2
0
def eval_form_ria(
    model,
    limits=None,
    cons=None,
    df_corr=None,
    df_det=None,
    append=True,
    tol=1e-3,
    n_maxiter=25,
    n_restart=1,
    verbose=False,
):
    r"""Tail reliability via FORM RIA

    Approximate the desired tail probability using the reliability index approach (RIA) of the first-order reliability method (FORM) [1]. Select limit states to analyze with list input `limits`. Provide confidence levels `cons` and estimator covariance `df_corr` to compute with margin in beta [2].

    Note that the reliability index approach (RIA) is generally less stable than the performance measure approach (PMA). Consider using ``gr.eval_form_pma()`` instead, particularly when using FORM to optimize a design.

    Args:
        model (gr.Model): Model to analyze
        limits (list): Target limit states; must be in model.out; limit state assumed to be critical at g == 0.
        cons (dict or None): Target confidence levels;
            key   = limit state name; must be in model.out
            value = confidence level, \in (0, 1)
        df_corr (DataFrame or None): Sampling distribution covariance entries; parameters with no information assumed to be known exactly.
        df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels.
        n_maxiter (int): Maximum iterations for each optimization run
        n_restart (int): Number of restarts (== number of optimization runs)
        append (bool): Append MPP results for random values?
        verbose (bool): Print optimization results?

    Returns:
        DataFrame: Results of MPP search

    Notes:
        Since FORM RIA relies on optimization over the limit state, it is often beneficial to scale your limit state to keep values near unity.

    References:
        [1] Tu, Choi, and Park, "A new study on reliability-based design optimization," Journal of Mechanical Design, 1999
        [2] del Rosario, Fenrich, and Iaccarino, "Fast precision margin with the first-order reliability method," AIAA Journal, 2019

    Examples::

        import grama as gr
        from grama.models import make_cantilever_beam
        md_beam = make_cantilever_beam()
        ## Evaluate the reliability of specified designs
        (
            md_beam
            >> gr.ev_form_ria(
                # Specify limit states to analyze
                limits=("g_stress", "g_disp"),
                # Analyze three different thicknesses
                df_det=gr.df_make(t=[2, 3, 4], w=3)
            )
        )

    """
    ## Check invariants
    invariants_eval_model(model)
    invariants_eval_df(df_corr, arg_name="df_corr", acc_none=True)
    invariants_eval_df(df_det, arg_name="df_det", valid_str=["nom"])
    if limits is None:
        raise ValueError(
            "Must provide `limits` keyword argument to define reliability targets"
        )
    if not set(limits).issubset(set(model.out)):
        raise ValueError("`limits` must be subset of model.out")
    if not cons is None:
        if not (set(cons.keys()) == set(limits)):
            raise ValueError("cons.keys() must be same as limits")
        else:
            if df_corr is None:
                raise ValueError("Must provide df_corr is using cons")
        raise NotImplementedError

    df_det = model.var_outer(
        DataFrame(data=zeros((1, model.n_var_rand)), columns=model.var_rand),
        df_det=df_det,
    )
    df_det = df_det[model.var_det]

    # df_return = DataFrame(columns=model.var_rand + model.var_det + limits)
    df_return = DataFrame()
    for ind in range(df_det.shape[0]):
        ## Loop over objectives
        for key in limits:
            ## Temp dataframe
            df_inner = df_det.iloc[[ind]].reset_index(drop=True)

            ## Construct lambdas
            def fun_jac(z):
                ## Squared reliability index
                fun = z.dot(z)
                jac = 2 * z * length(z)

                return (fun, jac)

            def con_limit(z):
                ## Transform: standard normal-to-random variable
                df_norm = DataFrame(data=[z], columns=model.var_rand)
                df_rand = model.norm2rand(df_norm)
                df = model.var_outer(df_rand, df_det=df_inner)

                ## Eval limit state
                df_res = eval_df(model, df=df)
                g = df_res[key].iloc[0]

                return g

            ## Use conservative direction for initial guess
            signs = array(
                [model.density.marginals[k].sign for k in model.var_rand])
            if length(signs) > 0:
                z0 = signs / length(signs)
            else:
                z0 = ones(model.n_var_rand) / sqrt(model.n_var_rand)

            ## Minimize
            res_all = []
            for jnd in range(n_restart):
                res = minimize(
                    fun_jac,
                    z0,
                    args=(),
                    method="SLSQP",
                    jac=True,
                    tol=tol,
                    options={
                        "maxiter": n_maxiter,
                        "disp": False
                    },
                    constraints=[{
                        "type": "eq",
                        "fun": con_limit
                    }],
                )
                # Append only a successful result
                if res["status"] == 0:
                    res_all.append(res)
                # Set a random start; repeat
                z0 = multivariate_normal([0] * model.n_var_rand,
                                         eye(model.n_var_rand))
                z0 = z0 / length(z0)

            # Choose value among restarts
            n_iter_total = sum([res_all[i].nit for i in range(len(res_all))])
            if len(res_all) > 0:
                i_star = argmin([res.fun for res in res_all])
                x_star = res_all[i_star].x
                fun_star = sqrt(res_all[i_star].fun)
                if verbose:
                    print("out = {}: Optimization successful".format(key))
                    print("n_iter = {}".format(res_all[i_star].nit))
                    print("n_iter_total = {}".format(n_iter_total))
            else:
                ## WARNING
                x_star = [NaN] * model.n_var_rand
                fun_star = NaN
                if verbose:
                    print("out = {}: Optimization unsuccessful".format(key))
                    print("n_iter = {}".format(res_all[i_star].nit))
                    print("n_iter_total = {}".format(n_iter_total))

            ## Extract results
            if append:
                df_inner = concat(
                    (
                        df_inner,
                        model.norm2rand(
                            DataFrame(data=[x_star], columns=model.var_rand)),
                    ),
                    axis=1,
                    sort=False,
                )
            df_inner["beta_" + key] = [fun_star]
            df_return = concat((df_return, df_inner), axis=0, sort=False)

    if not append:
        df_return = (
            df_return.groupby(model.var_det) \
                     .agg({"beta_" + s: max for s in limits}).reset_index()
        )

    return df_return
Esempio n. 3
0
def eval_form_pma(
    model,
    betas=None,
    cons=None,
    df_corr=None,
    df_det=None,
    append=True,
    tol=1e-3,
    n_maxiter=25,
    n_restart=1,
    verbose=False,
):
    r"""Tail quantile via FORM PMA

    Approximate the desired tail quantiles using the performance measure approach (PMA) of the first-order reliability method (FORM) [1]. Select limit states to minimize at desired quantile with `betas`. Provide confidence levels `cons` and estimator covariance `df_corr` to compute with margin in beta [2].

    Note that under the performance measure approach, the optimized limit state value `g` is sought to be non-negative $g \geq 0$. This is usually included as a constraint in optimization, which can be accomplished in by using ``gr.eval_form_pnd()` *within* a model definition---see the Examples below for more details.

    Args:
        model (gr.Model): Model to analyze
        betas (dict): Target reliability indices;
            key   = limit state name; must be in model.out
            value = reliability index; beta = Phi^{-1}(reliability)
        cons (dict or None): Target confidence levels;
            key   = limit state name; must be in model.out
            value = confidence level, \in (0, 1)
        df_corr (DataFrame or None): Sampling distribution covariance entries; parameters with no information assumed to be known exactly.
        df_det (DataFrame): Deterministic levels for evaluation; use "nom" for nominal deterministic levels.
        n_maxiter (int): Maximum iterations for each optimization run
        n_restart (int): Number of restarts (== number of optimization runs)
        append (bool): Append MPP results for random values?
        verbose (bool): Print optimization results?

    Returns:
        DataFrame: Results of MPP search

    Notes:
        Since FORM PMA relies on optimization over the limit state, it is often beneficial to scale your limit state to keep values near unity.

    References:
        Tu, Choi, and Park, "A new study on reliability-based design optimization," Journal of Mechanical Design, 1999
        del Rosario, Fenrich, and Iaccarino, "Fast precision margin with the first-order reliability method," AIAA Journal, 2019

    Examples::

        import grama as gr
        from grama.models import make_cantilever_beam
        md_beam = make_cantilever_beam()
        ## Evaluate the reliability of specified designs
        (
            md_beam
            >> gr.ev_form_pma(
                # Specify target reliability
                betas=dict(g_stress=3, g_disp=3),
                # Analyze three different thicknesses
                df_det=gr.df_make(t=[2, 3, 4], w=3)
            )
        )

        ## Build a nested model for optimization under uncertainty
        md_opt = (
            gr.Model("Beam Optimization")
            >> gr.cp_vec_function(
                fun=lambda df: gr.df_make(c_area=df.w * df.t),
                var=["w", "t"],
                out=["c_area"],
                name="Area objective",
            )
            >> gr.cp_vec_function(
                fun=lambda df: gr.eval_form_pma(
                    md_beam,
                    betas=dict(g_stress=3, g_disp=3),
                    df_det=df,
                    append=False,
                )
                var=["w", "t"],
                out=["g_stress", "g_disp"],
                name="Reliability constraints",
            )
            >> gr.cp_bounds(w=(2, 4), t=(2, 4))
        )
        # Run the optimization
        (
            md_opt
            >> gr.ev_min(
                out_min="c_area",
                out_geq=["g_stress", "g_disp"],
            )
        )

    """
    ## Check invariants
    invariants_eval_model(model)
    invariants_eval_df(df_corr, arg_name="df_corr", acc_none=True)
    invariants_eval_df(df_det, arg_name="df_det", valid_str=["nom"])
    if betas is None:
        raise ValueError(
            "Must provide `betas` keyword argument to define reliability targets"
        )
    if not set(betas.keys()).issubset(set(model.out)):
        raise ValueError("betas.keys() must be subset of model.out")
    if not cons is None:
        if not (set(cons.keys()) == set(betas.keys())):
            raise ValueError("cons.keys() must be same as betas.keys()")
        else:
            if df_corr is None:
                raise ValueError("Must provide df_corr is using cons")
        raise NotImplementedError

    df_det = model.var_outer(
        DataFrame(data=zeros((1, model.n_var_rand)), columns=model.var_rand),
        df_det=df_det,
    )
    df_det = df_det[model.var_det]

    df_return = DataFrame()
    for ind in range(df_det.shape[0]):
        ## Loop over objectives
        for key in betas.keys():
            ## Temp dataframe
            df_inner = df_det.iloc[[ind]].reset_index(drop=True)

            ## Construct lambdas
            def objective(z):
                ## Transform: standard normal-to-random variable
                df_norm = DataFrame(data=[z], columns=model.var_rand)
                df_rand = model.norm2rand(df_norm)
                df = model.var_outer(df_rand, df_det=df_inner)

                df_res = eval_df(model, df=df)
                g = df_res[key].iloc[0]

                # return (g, jac)
                return g

            def con_beta(z):
                return z.dot(z) - (betas[key])**2

            ## Use conservative direction for initial guess
            signs = array(
                [model.density.marginals[k].sign for k in model.var_rand])
            if length(signs) > 0:
                z0 = betas[key] * signs / length(signs)
            else:
                z0 = betas[key] * ones(model.n_var_rand) / sqrt(
                    model.n_var_rand)

            ## Minimize
            res_all = []
            for jnd in range(n_restart):
                res = minimize(
                    objective,
                    z0,
                    args=(),
                    method="SLSQP",
                    jac=False,
                    tol=tol,
                    options={
                        "maxiter": n_maxiter,
                        "disp": False
                    },
                    constraints=[{
                        "type": "eq",
                        "fun": con_beta
                    }],
                )
                # Append only a successful result
                if res["status"] == 0:
                    res_all.append(res)
                # Set a random start; repeat
                z0 = multivariate_normal([0] * model.n_var_rand,
                                         eye(model.n_var_rand))
                z0 = z0 / length(z0) * betas[key]

            # Choose value among restarts
            n_iter_total = sum([res_all[i].nit for i in range(len(res_all))])
            if len(res_all) > 0:
                i_star = argmin([res.fun for res in res_all])
                x_star = res_all[i_star].x
                fun_star = res_all[i_star].fun
                if verbose:
                    print("out = {}: Optimization successful".format(key))
                    print("n_iter = {}".format(res_all[i_star].nit))
                    print("n_iter_total = {}".format(n_iter_total))
            else:
                ## WARNING
                x_star = [NaN] * model.n_var_rand
                fun_star = NaN
                if verbose:
                    print("out = {}: Optimization unsuccessful".format(key))
                    print("n_iter = {}".format(res_all[i_star].nit))
                    print("n_iter_total = {}".format(n_iter_total))

            ## Extract results
            if append:
                df_inner = concat(
                    (
                        df_inner,
                        model.norm2rand(
                            DataFrame(data=[x_star], columns=model.var_rand)),
                    ),
                    axis=1,
                    sort=False,
                )
            df_inner[key] = [fun_star]
            df_return = concat((df_return, df_inner), axis=0, sort=False)

    if not append:
        df_return = (df_return.groupby(model.var_det).agg(
            {s: max
             for s in betas.keys()}).reset_index())

    return df_return
Esempio n. 4
0
def eval_sinews(
    model,
    n_density=10,
    n_sweeps=3,
    seed=None,
    df_det=None,
    varname="sweep_var",
    indname="sweep_ind",
    append=True,
    skip=False,
):
    r"""Sweep study

    Perform coordinate sweeps over each model random variable ("sinew" design). Use random starting points drawn from the joint density. Optionally sweep the deterministic variables.

    For more expensive models, it can be helpful to tune n_density and n_sweeps to achieve a reasonable runtime.

    Use gr.plot_auto() to construct a quick visualization of the output dataframe. Use `skip` version to visualize the design, and non-skipped version to visualize the results.

    Args:
        model (gr.Model): Model to evaluate
        n_density (numeric): Number of points along each sweep
        n_sweeps (numeric): Number of sweeps per-random variable
        seed (int): Random seed to use
        df_det (DataFrame): Deterministic levels for evaluation;
            use "nom" for nominal deterministic levels,
            use "swp" to sweep deterministic variables
        varname (str): Column name to give for sweep variable; default="sweep_var"
        indname (str): Column name to give for sweep index; default="sweep_ind"
        append (bool): Append results to conservative inputs?
        skip (bool): Skip evaluation of the functions?

    Returns:
        DataFrame: Results of evaluation or unevaluated design

    Examples::

        import grama as gr
        md = gr.make_cantilever_beam()
        # Skip evaluation, used to visualize the design (input points)
        df_design = md >> gr.ev_sinews(df_det="nom", skip=True)
        df_design >> gr.pt_auto()
        # Visualize the input-to-output relationships of the model
        df_sinew = md >> gr.ev_sinews(df_det="nom")
        df_sinew >> gr.pt_auto()

    """
    ## Common invariant checks
    invariants_eval_model(model, skip)
    invariants_eval_df(df_det, arg_name="df_det", valid_str=["nom", "swp"])
    ## Override model if deterministic sweeps desired
    if df_det == "swp":
        ## Collect sweep-able deterministic variables
        var_sweep = list(
            filter(
                lambda v: isfinite(model.domain.get_width(v))
                & (model.domain.get_width(v) > 0),
                model.var_det,
            ))
        ## Generate pseudo-marginals
        dicts_var = {}
        for v in var_sweep:
            dicts_var[v] = {
                "dist": "uniform",
                "loc": model.domain.get_bound(v)[0],
                "scale": model.domain.get_width(v),
            }
        ## Overwrite model
        model = comp_marginals(model, **dicts_var)
        ## Restore flag
        df_det = "nom"

    ## Set seed only if given
    if seed is not None:
        set_seed(seed)

    ## Ensure sample count is int
    if not isinstance(n_density, Integral):
        print("eval_sinews() is rounding n_density...")
        n_density = int(n_density)
    if not isinstance(n_sweeps, Integral):
        print("eval_sinews() is rounding n_sweeps...")
        n_sweeps = int(n_sweeps)

    ## Build quantile sweep data
    q_random = tile(random((1, model.n_var_rand, n_sweeps)), (n_density, 1, 1))
    q_dense = linspace(0, 1, num=n_density)
    Q_all = zeros((n_density * n_sweeps * model.n_var_rand, model.n_var_rand))
    C_var = ["tmp"] * (n_density * n_sweeps * model.n_var_rand)
    C_ind = [0] * (n_density * n_sweeps * model.n_var_rand)

    ## Interlace
    for i_input in range(model.n_var_rand):
        ind_base = i_input * n_density * n_sweeps
        for i_sweep in range(n_sweeps):
            ind_start = ind_base + i_sweep * n_density
            ind_end = ind_base + (i_sweep + 1) * n_density

            Q_all[ind_start:ind_end] = q_random[:, :, i_sweep]
            Q_all[ind_start:ind_end, i_input] = q_dense
            C_var[ind_start:ind_end] = [model.var_rand[i_input]] * n_density
            C_ind[ind_start:ind_end] = [i_sweep] * n_density

            ## Modify endpoints for infinite support
            if not isfinite(
                    model.density.marginals[model.var_rand[i_input]].q(0)):
                Q_all[ind_start, i_input] = 1 / n_density / 10
            if not isfinite(
                    model.density.marginals[model.var_rand[i_input]].q(1)):
                Q_all[ind_end - 1, i_input] = 1 - 1 / n_density / 10

    ## Assemble sampling plan
    df_pr = DataFrame(data=Q_all, columns=model.var_rand)
    df_rand = model.density.pr2sample(df_pr)
    df_rand[varname] = C_var
    df_rand[indname] = C_ind
    ## Construct outer-product DOE
    df_samp = model.var_outer(df_rand, df_det=df_det)

    if skip:
        ## Evaluation estimate
        runtime_est = model.runtime(df_samp.shape[0])
        if runtime_est > 0:
            print(
                "Estimated runtime for design with model ({0:1}):\n  {1:4.3} sec"
                .format(model.name, runtime_est))
        else:
            print(
                "Design runtime estimates unavailable; model has no timing data."
            )

        ## For autoplot
        with catch_warnings():
            simplefilter("ignore")
            df_samp._plot_info = {
                "type": "sinew_inputs",
                "var": model.var_rand
            }

        ## Pass-through
        return df_samp

    ## Apply
    df_res = eval_df(model, df=df_samp, append=append)
    ## For autoplot
    with catch_warnings():
        simplefilter("ignore")
        df_res._plot_info = {
            "type": "sinew_outputs",
            "var": model.var_rand,
            "out": model.out,
        }

    return df_res
Esempio n. 5
0
def eval_hybrid(
    model,
    n=1,
    plan="first",
    df_det=None,
    varname="hybrid_var",
    seed=None,
    append=True,
    skip=False,
):
    r"""Hybrid points for Sobol' indices

    Use the "hybrid point" design (Sobol', 1999) to support estimating Sobol'
    indices. Use gr.tran_sobol() to post-process the results and compute
    estimates.

    Args:
        model (gr.Model): Model to evaluate; must have CopulaIndependence
        n (numeric): Number of points along each sweep
        plan (str): Sobol' index to compute; plan={"first", "total"}
        seed (int): Random seed to use
        df_det (DataFrame): Deterministic levels for evaluation; use "nom"
            for nominal deterministic levels.
        varname (str): Column name to give for sweep variable; default="hybrid_var"
        append (bool): Append results to conservative inputs?
        skip (bool): Skip evaluation of the functions?

    Returns:
        DataFrame: Results of evaluation or unevaluated design

    References:
        I.M. Sobol', "Sensitivity Estimates for Nonlinear Mathematical Models" (1999) MMCE, Vol 1.

    Examples::

        import grama as gr
        md = gr.make_cantilever_beam()
        ## Compute the first-order indices
        df_first = md >> gr.ev_hybrid(df_det="nom", plan="first")
        df_first >> gr.tf_sobol()
        ## Compute the total-order indices
        df_total = md >> gr.ev_hybrid(df_det="nom", plan="total")
        df_total >> gr.tf_sobol()

    """
    ## Check invariants
    invariants_eval_model(model, skip)
    invariants_eval_df(df_det, arg_name="df_det", valid_str=["nom"])
    if not isinstance(model.density.copula, CopulaIndependence):
        raise ValueError(
            "model must have CopulaIndependence structure;\n" +
            "Sobol' indices only defined for independent variables")

    ## Set seed only if given
    if seed is not None:
        set_seed(seed)

    if not isinstance(n, Integral):
        print("eval_hybrid() is rounding n...")
        n = int(n)

    ## Draw hybrid points
    X = random((n, model.n_var_rand))
    Z = random((n, model.n_var_rand))

    ## Reserve space
    Q_all = zeros((n * (model.n_var_rand + 1), model.n_var_rand))
    Q_all[:n] = X  # Base samples
    C_var = ["_"] * (n * (model.n_var_rand + 1))

    ## Interleave samples
    for i_in in range(model.n_var_rand):
        i_start = (i_in + 1) * n
        i_end = (i_in + 2) * n

        if plan == "first":
            Q_all[i_start:i_end, :] = Z
            Q_all[i_start:i_end, i_in] = X[:, i_in]
        elif plan == "total":
            Q_all[i_start:i_end, :] = X
            Q_all[i_start:i_end, i_in] = Z[:, i_in]
        else:
            raise ValueError("plan must be `first` or `total`")

        C_var[i_start:i_end] = [model.var_rand[i_in]] * n

    ## Construct sampling plan
    df_pr = DataFrame(data=Q_all, columns=model.var_rand)
    ## Convert samples to desired marginals
    df_rand = model.density.pr2sample(df_pr)
    df_rand[varname] = C_var
    ## Construct outer-product DOE
    df_samp = model.var_outer(df_rand, df_det=df_det)

    if skip:
        with catch_warnings():
            simplefilter("ignore")
            df_samp._meta = dict(
                type="eval_hybrid",
                varname=varname,
                plan=plan,
                var_rand=model.var_rand,
                out=model.out,
            )

        return df_samp

    df_res = eval_df(model, df=df_samp, append=append)
    with catch_warnings():
        simplefilter("ignore")
        df_res._meta = dict(
            type="eval_hybrid",
            varname=varname,
            plan=plan,
            var_rand=model.var_rand,
            out=model.out,
        )

    return df_res
Esempio n. 6
0
def eval_pnd(model, df_train, df_test, signs, n=int(1e4), seed=None, append=True, \
    mean_prefix="_mean", sd_prefix="_sd"):
    """Approximate the probability non-dominated (PND)

    Approximates the probability non-dominated (PND) for a set of training points given a fitted probabilistic model. Used to rank a set of candidates in the context of multiobjective optimization.

    Args:
        model (gr.model): predictive model to evaluate
        df_train (DataFrame): dataframe with training data
        df_test (DataFrame): dataframe with test data
        signs (dict): dict with the variables you would like to use and
            minimization or maximization parameter for each
        append (bool): Append df_test to pnd algorithm outputs

    Kwargs:
        n (int): Number of draws for importance sampler
        seed (int): declarble seed value for reproducibility

    Returns:
        DataFrame: Results of predictive model going through a PND algorithm.
        Conatians both values and their scores.

    References:
        del Rosario, Zachary, et al. "Assessing the frontier: Active learning, model accuracy, and multi-objective candidate discovery and optimization." The Journal of Chemical Physics 153.2 (2020): 024112.

    Examples::

        import grama as gr

        ## Define a ground-truth model
        md_true = gr.make_pareto_random()
        df_data = (
            md_true
            >> gr.ev_sample(n=2e3, seed=101, df_det="nom")
        )
        ## Generate test/train data
        df_train = (
            df_data
            >> gr.tf_sample(n=10)
        )

        df_test = (
            df_data
            >> gr.anti_join(
                df_train,
                by = ["x1","x2"]
            )
            >> gr.tf_sample(n=200)
        )
        ## Fit a model to training data
        md_fit = (
            df_train
            >> gr.ft_gp(
                var=["x1","x2"]
                out=["y1","y2"]
            )
        )
        ## Rank training points by PND algorithm
        df_pnd = (
            md_fit
            >> gr.ev_pnd(
                df_train,
                df_test,
                signs = {"y1":1, "y2":1},
                seed = 101
            )
            >> gr.tf_arrange(gr.desc(DF.pr_scores))
        )

    """
    invariants_eval_model(model)
    invariants_eval_df(df_train, arg_name="df_train")
    invariants_eval_df(df_test, arg_name="df_test")


    # Check content
    if len(model.out)/2 < 2:
        raise ValueError('Given Model needs multiple outputs')

    if not set(model.var).issubset(set(df_train.columns)):
        raise ValueError("model.var must be subset of df_train.columns")

    if not set(model.var).issubset(set(df_test.columns)):
        raise ValueError("model.var must be subset of df_test.columns")

    for key in signs.keys():
        if key+mean_prefix not in model.out:
            raise ValueError(f"signs.{key} implies output {key+mean_prefix}, which is not found in provided md.out")
        if key+sd_prefix not in model.out:
            raise ValueError(f"signs{key} implies output {key+sd_prefix}, which is not found in provided sd.out")

    ## Compute predictions and predicted uncertainties
    df_pred = (
        df_test
        >> tf_md(md=model)
    )

    ## Setup for reshaping
    means = []
    sds = []
    columns = df_train.columns.values
    length = int(len(signs.keys()))
    outputs = [key for key in signs.keys() if key in columns]
    signs = [value for value in signs.values()]

    ## append mean and sd prefixes
    for i, value in enumerate(outputs):
        means.append(value+mean_prefix)
        sds.append(value+sd_prefix)

    ## Remove extra columns from df_test
    df_pred = df_pred[means + sds]

    ## Reshape data for PND algorithm
    X_pred = df_pred[means].values      # Predicted response values
    X_sig = df_pred[sds].values         # Predictive uncertainties
    X_train = df_train[outputs].values  # Training

    ### Create covariance matrices
    X_cov = zeros((X_sig.shape[0], length, length))
    for l in range(length):
        for i in range(X_sig.shape[0]):
            X_cov[i, l, l] = X_sig[i, l]

    ### Apply pnd
    pr_scores, var_values = approx_pnd(
        X_pred,
        X_cov,
        X_train,
        signs = signs,
        n = n,
        seed = seed
    )

    ### Package outputs
    df_pnd = DataFrame(
        {
            "pr_scores": pr_scores,
            "var_values": var_values,
        }
    )

    if append:
        return df_test.reset_index(drop=True).merge(df_pnd, left_index=True, right_index=True)
    return df_pnd
Esempio n. 7
0
def eval_min(
    model,
    out_min=None,
    out_geq=None,
    out_leq=None,
    out_eq=None,
    method="SLSQP",
    tol=1e-6,
    n_restart=1,
    n_maxiter=50,
    seed=None,
    df_start=None,
):
    r"""Constrained minimization using functions from a model

    Perform constrained minimization using functions from a model. Model must
    have deterministic variables only.

    Wrapper for scipy.optimize.minimize

    Args:
        model (gr.Model): Model to analyze. All model variables must be
            deterministic.
        out_min (str): Output to use as minimization objective.
        out_geq (None OR list of str): Outputs to use as geq constraints; out >= 0
        out_leq (None OR list of str): Outputs to use as leq constraints; out <= 0
        out_eq (None OR list of str): Outputs to use as equality constraints; out == 0

        method (str): Optimization method; see the documentation for
            scipy.optimize.minimize for options.
        tol (float): Optimization objective convergence tolerance
        n_restart (int): Number of restarts; beyond n_restart=1 random
            restarts are used.
        df_start (None or DataFrame): Specific starting values to use; overrides
            n_restart if non None provided.

    Returns:
        DataFrame: Results of optimization

    Examples::

        import grama as gr
        ## Define a model with objective and constraints
        md = (
            gr.Model("Constrained Rosenbrock")
            >> gr.cp_function(
                fun=lambda x: (1 - x[0])**2 + 100*(x[1] - x[0]**2)**2,
                var=["x", "y"],
                out=["c"],
            )
            >> gr.cp_function(
                fun=lambda x: (x[0] - 1)**3 - x[1] + 1,
                var=["x", "y"],
                out=["g1"],
            )
            >> gr.cp_function(
                fun=lambda x: x[0] + x[1] - 2,
                var=["x", "y"],
                out=["g2"],
            )
            >> gr.cp_bounds(
                x=(-1.5, +1.5),
                y=(-0.5, +2.5),
            )
        )

        ## Run the optimizer
        md >> gr.ev_min(
            out_min="c",
            out_leq=["g1", "g2"]
        )

    """
    ## Common invariant checks
    invariants_eval_model(model)
    invariants_eval_df(df_start, arg_name="df_start", acc_none=True)

    ## Check that model has only deterministic variables
    if model.n_var_rand > 0:
        raise ValueError("model must have no random variables")
    ## Check that objective is in model
    if not (out_min in model.out):
        raise ValueError("model must contain out_min")
    ## Check that constraints are in model
    if not (out_geq is None):
        out_diff = set(out_geq).difference(set(model.out))
        if len(out_diff) > 0:
            raise ValueError(
                "model must contain each out_geq; missing {}".format(out_diff))
    if not (out_leq is None):
        out_diff = set(out_leq).difference(set(model.out))
        if len(out_diff) > 0:
            raise ValueError(
                "model must contain each out_leq; missing {}".format(out_diff))
    if not (out_eq is None):
        out_diff = set(out_eq).difference(set(model.out))
        if len(out_diff) > 0:
            raise ValueError(
                "model must contain each out_eq; missing {}".format(out_diff))

    ## Formulate initial guess
    df_nom = eval_nominal(model, df_det="nom", skip=True)
    if df_start is None:
        df_start = df_nom[model.var]

        if n_restart > 1:
            if not (seed is None):
                setseed(seed)
            ## Collect sweep-able deterministic variables
            var_sweep = list(
                filter(
                    lambda v: isfinite(model.domain.get_width(v))
                    & (model.domain.get_width(v) > 0),
                    model.var_det,
                ))
            ## Generate pseudo-marginals
            dicts_var = {}
            for v in var_sweep:
                dicts_var[v] = {
                    "dist": "uniform",
                    "loc": model.domain.get_bound(v)[0],
                    "scale": model.domain.get_width(v),
                }
            ## Overwrite model
            md_sweep = comp_marginals(model, **dicts_var)
            md_sweep = comp_copula_independence(md_sweep)
            ## Generate random start points
            df_rand = eval_sample(
                md_sweep,
                n=n_restart - 1,
                df_det="nom",
                skip=True,
            )
            df_start = concat((df_start, df_rand[model.var]),
                              axis=0).reset_index(drop=True)
    else:
        n_restart = df_start.shape[0]

    ## Factory for wrapping model's output
    def make_fun(out, sign=+1):
        def fun(x):
            df = DataFrame([x], columns=model.var)
            df_res = eval_df(model, df)
            return sign * df_res[out]

        return fun

    ## Create helper functions for constraints
    constraints = []

    if not (out_geq is None):
        for out in out_geq:
            constraints.append({
                "type": "ineq",
                "fun": make_fun(out),
            })

    if not (out_leq is None):
        for out in out_leq:
            constraints.append({
                "type": "ineq",
                "fun": make_fun(out, sign=-1),
            })

    if not (out_eq is None):
        for out in out_eq:
            constraints.append({
                "type": "eq",
                "fun": make_fun(out),
            })

    ## Parse the bounds for minimize
    bounds = list(map(lambda k: model.domain.bounds[k], model.var))

    ## Run optimization
    df_res = DataFrame()
    for i in range(n_restart):
        x0 = df_start[model.var].iloc[i].values
        res = minimize(
            make_fun(out_min),
            x0,
            args=(),
            method=method,
            jac=False,
            tol=tol,
            options={
                "maxiter": n_maxiter,
                "disp": False
            },
            constraints=constraints,
            bounds=bounds,
        )

        df_opt = df_make(
            **dict(zip(model.var, res.x)),
            **dict(zip(map(lambda s: s + "_0", model.var), x0)),
        )
        df_tmp = eval_df(model, df=df_opt)
        df_tmp["success"] = [res.success]
        df_tmp["message"] = [res.message]
        df_tmp["n_iter"] = [res.nit]

        df_res = concat((df_res, df_tmp), axis=0).reset_index(drop=True)

    return df_res
Esempio n. 8
0
def eval_nls(
    model,
    df_data=None,
    out=None,
    var_fix=None,
    df_init=None,
    append=False,
    tol=1e-6,
    ftol=1e-9,
    gtol=1e-5,
    n_maxiter=100,
    n_restart=1,
    n_process=1,
    method="L-BFGS-B",
    seed=None,
    verbose=True,
):
    r"""Estimate with Nonlinear Least Squares (NLS)

    Estimate best-fit variable levels with nonlinear least squares (NLS).

    Args:
        model (gr.Model): Model to analyze. All model variables
            selected for fitting must be bounded or random. Deterministic
            variables may have semi-infinite bounds.
        df_data (DataFrame): Data for estimating parameters. Variables not
            found in df_data optimized in fitting.
        out (list or None): Output contributions to consider in computing MSE.
            Assumed to be model.out if left as None.
        var_fix (list or None): Variables to fix to nominal levels. Note that
            variables with domain width zero will automatically be fixed.
        df_init (DataFrame or None): Initial guesses for parameters; overrides 
        n_restart
        append (bool): Append metadata? (Initial guess, MSE, optimizer status)
        tol (float): Optimizer convergence tolerance
        n_maxiter (int): Optimizer maximum iterations
        n_restart (int): Number of restarts; beyond n_restart=1 random
            restarts are used.
        seed (int OR None): Random seed for restarts
        verbose (bool): Print messages to console?

    Returns:
        DataFrame: Results of estimation

    Examples::

        import grama as gr
        from grama.data import df_trajectory_full
        from grama.models import make_trajectory_linear

        md_trajectory = make_trajectory_linear()

        df_fit = (
            md_trajectory
            >> gr.ev_nls(df_data=df_trajectory_full)
        )

        print(df_fit)

    """
    ## Common invariant checks
    invariants_eval_model(model)
    invariants_eval_df(df_data, arg_name="df_data")
    invariants_eval_df(df_init, arg_name="df_init", acc_none=True)

    ## Check `out` invariants
    if out is None:
        out = model.out
        if verbose:
            print("... eval_nls setting out = {}".format(out))
    set_diff = set(out).difference(set(df_data.columns))
    if len(set_diff) > 0:
        raise ValueError("out must be subset of df_data.columns\n" +
                         "difference = {}".format(set_diff))

    ## Determine variables to be fixed
    if var_fix is None:
        var_fix = set()
    else:
        var_fix = set(var_fix)
    for var in model.var_det:
        wid = model.domain.get_width(var)
        if wid == 0:
            var_fix.add(var)
    if verbose:
        print("... eval_nls setting var_fix = {}".format(list(var_fix)))
    var_fix = list(var_fix)

    ## Determine variables for evaluation
    var_feat = set(model.var).intersection(set(df_data.columns))
    if verbose:
        print("... eval_nls setting var_feat = {}".format(var_feat))
    var_feat = list(var_feat)

    ## Determine variables for fitting
    var_fit = set(model.var).difference(set(var_fix).union(set(var_feat)))
    if len(var_fit) == 0:
        raise ValueError("No var selected for fitting!\n" +
                         "Try checking model bounds and df_data.columns.")
    var_fit = list(var_fit)

    ## Separate var_fit into det and rand
    var_fit_det = list(set(model.var_det).intersection(var_fit))
    var_fit_rand = list(set(model.var_rand).intersection(var_fit))

    ## Construct bounds, fix var_fit order
    var_fit = var_fit_det + var_fit_rand
    bounds = []
    var_prob = []
    for var in var_fit_det:
        if not isfinite(model.domain.get_nominal(var)):
            var_prob.append(var)
        bounds.append(model.domain.get_bound(var))
    if len(var_prob) > 0:
        raise ValueError(
            "all variables to be fitted must finite nominal value\n" +
            "offending var = {}".format(var_prob))

    for var in var_fit_rand:
        bounds.append((
            model.density.marginals[var].q(0),
            model.density.marginals[var].q(1),
        ))

    ## Determine initial guess points
    df_nom = eval_nominal(model, df_det="nom", skip=True)

    ## Use specified initial guess(es)
    if not (df_init is None):
        # Check invariants
        set_diff = list(set(var_fit).difference(set(df_init.columns)))
        if len(set_diff) > 0:
            raise ValueError("var_fit must be subset of df_init.columns\n" +
                             "difference = {}".format(set_diff))
        # Pull n_restart
        n_restart = df_init.shape[0]

    ## Generate initial guess(es)
    else:

        df_init = df_nom[var_fit]
        if n_restart > 1:
            if not (seed is None):
                setseed(seed)
            ## Collect sweep-able deterministic variables
            var_sweep = list(
                filter(
                    lambda v: isfinite(model.domain.get_width(v))
                    & (model.domain.get_width(v) > 0),
                    model.var_det,
                ))
            ## Generate pseudo-marginals
            dicts_var = {}
            for v in var_sweep:
                dicts_var[v] = {
                    "dist": "uniform",
                    "loc": model.domain.get_bound(v)[0],
                    "scale": model.domain.get_width(v),
                }
            ## Overwrite model
            md_sweep = comp_marginals(model, **dicts_var)
            md_sweep = comp_copula_independence(md_sweep)
            ## Generate random start points
            df_rand = eval_sample(
                md_sweep,
                n=n_restart - 1,
                df_det="nom",
                skip=True,
            )
            df_init = concat((df_init, df_rand[var_fit]),
                             axis=0).reset_index(drop=True)

    ## Iterate over initial guesses
    df_res = DataFrame()

    def fun_mp(i):
        x0 = df_init[var_fit].iloc[i].values

        ## Build evaluator
        def objective(x):
            """x = [var_fit]"""
            ## Evaluate model
            df_var = tran_outer(
                df_data[var_feat],
                concat(
                    (df_nom[var_fix].iloc[[0]],
                     df_make(**dict(zip(var_fit, x)))),
                    axis=1,
                ),
            )
            df_tmp = eval_df(model, df=df_var)

            ## Compute joint MSE
            return ((df_tmp[out].values - df_data[out].values)**2).mean()

        ## Run optimization
        res = minimize(
            objective,
            x0,
            args=(),
            method=method,
            jac=False,
            tol=tol,
            options={
                "maxiter": n_maxiter,
                "disp": False,
                "ftol": ftol,
                "gtol": gtol,
            },
            bounds=bounds,
        )

        df_tmp = df_make(
            **dict(zip(var_fit, res.x)),
            **dict(zip(map(lambda s: s + "_0", var_fit), x0)),
        )
        df_tmp["success"] = [res.success]
        df_tmp["message"] = [res.message]
        df_tmp["n_iter"] = [res.nit]
        df_tmp["mse"] = [res.fun]
        return df_tmp

    df_res = DataFrame()
    for i in range(n_restart):
        df_tmp = fun_mp(i)
        df_res = concat((df_res, df_tmp), axis=0).reset_index(drop=True)

    ## Post-process
    if append:
        return df_res
    return df_res[var_fit]