Esempio n. 1
0
        ## Construct model with Gaussian copula
        if len(var_fix) > 0:
            md_res = (Model(name) >> cp_function(
                lambda x: df_nom[var_fix].values,
                var=set(var_remain).difference(var_fix),
                out=var_fix,
                name="Fix variable levels",
            ) >> cp_md_det(md=md) >> cp_marginals(**marginals) >>
                      cp_copula_gaussian(df_corr=df_corr))
        else:
            md_res = (Model(name) >> cp_md_det(md=md) >> cp_marginals(
                **marginals) >> cp_copula_gaussian(df_corr=df_corr))

    ## Return deterministic model
    elif uq_method is None:
        md_res = (Model(name) >> cp_function(
            lambda x: df_best[var_fitted].values,
            var=var_remain,
            out=var_fitted,
            name="Fix variable levels",
        ) >> cp_md_det(md=md))

    else:
        raise ValueError(
            "uq_method option {} not recognized".format(uq_method))

    return md_res


ft_nls = add_pipe(fit_nls)
Esempio n. 2
0
    ### Add index column to dataset
    longer = df.reset_index().melt(id_vars="index",
                                   var_name=names_to,
                                   value_vars=columns,
                                   value_name=values_to)
    ### rename index column to desired: index_to
    longer.rename(columns={'index': index_to}, inplace=True)

    longer = index_to_cleanup(df, longer, data_index)

    return longer

    ######################################


tf_pivot_longer = add_pipe(tran_pivot_longer)


def tran_pivot_wider(
    df,
    #id_cols,
    names_from,
    indexes_from=None,
    #names_prefix,
    #names_sep,
    #names_glue = None,
    #names_sort = False,
    #names_glue,
    values_from=None,
    #values_fill = None,
    #values_fn = None
Esempio n. 3
0
                    df_res = concat((df_res, df_tmp), axis=0)
                else:
                    warn("Output {0:} had no contours at level {1:}".format(
                        o,
                        t,
                    ))

    ## Remove dummy column, if present
    if "_foo" in df_res.columns:
        df_res.drop("_foo", axis=1, inplace=True)

    # Drop index
    df_res = df_res.reset_index(drop=True)

    ## Attach metadata
    with catch_warnings():
        simplefilter("ignore")
        df_res._plot_info = {
            "type": "contour",
            "var": var,
            "out": "out",
            "level": "level",
            "aux": has_aux,
        }

    ## Return the results
    return df_res


ev_contour = add_pipe(eval_contour)
Esempio n. 4
0
            "Deterministic variables of md_base and md_var must match:\n" +
            "md_base is missing: {}\n".format(var_new.difference(var_base)) +
            "md_new is missing: {}".format(var_base.difference(var_new)))
    # Check that `weights` name does not collide
    if (var_weight in df_base.columns) and append:
        raise ValueError(
            "Weight name {} already in df_base.columns; ".format(var_weight) +
            "choose a new name.")

    ## Compute weight values
    # Use base model for importance distribution
    q = md_base.density.d(df_base)
    # Use new model for nominal distribution
    p = md_new.density.d(df_base)
    # Compute likelihood ratio
    w = p / q

    ## Return results
    df_res = DataFrame({var_weight: w})

    if append:
        df_res = concat(
            [df_base.reset_index(drop=True), df_res],
            axis=1,
        )

    return df_res


tf_reweight = add_pipe(tran_reweight)
Esempio n. 5
0
    ## Apply
    df_res = eval_df(model, df=df_samp, append=append)
    ## For autoplot
    with catch_warnings():
        simplefilter("ignore")
        df_res._plot_info = {
            "type": "sinew_outputs",
            "var": model.var_rand,
            "out": model.out,
        }

    return df_res


ev_sinews = add_pipe(eval_sinews)


## Hybrid points for Sobol' indices
# --------------------------------------------------
@curry
def eval_hybrid(
    model,
    n=1,
    plan="first",
    df_det=None,
    varname="hybrid_var",
    seed=None,
    append=True,
    skip=False,
):
Esempio n. 6
0
        raise ValueError("out must be subset of df.columns")
    ## Default input value
    if var is None:
        var = list(set(df.columns).difference(set(out)))
    ## Check more invariants
    set_inter = set(out).intersection(set(var))
    if len(set_inter) > 0:
        raise ValueError(
            "outputs and inputs must be disjoint; intersect = {}".format(
                set_inter))
    if not set(var).issubset(set(df.columns)):
        raise ValueError("var must be subset of df.columns")

    ## Construct gaussian process for each output
    functions = []

    for output in out:
        rf = RandomForestRegressor(**kwargs)
        set_seed(seed)
        rf.fit(df[var].values, df[output].values)
        name = "RF"

        fun = FunctionRFR(rf, var, [output], name, 0, return_std)
        functions.append(fun)

    ## Construct model
    return gr.Model(functions=functions, domain=domain, density=density)


ft_lolo = add_pipe(fit_lolo)
Esempio n. 7
0
        "-2": "blue",
        "-1": "darkturquoise",
        "0": "black",
        "+1": "salmon",
        "+2": "red"
    }, ) + scale_shape_manual(
        name="Patterns",
        values={
            "Below Limit": "s",
            "Above Limit": "s",
            "Low Run": "X",
            "High Run": "X",
            "Increasing Run": "^",
            "Decreasing Run": "v",
            "None": "."
        },
    ) + scale_linetype_manual(
        name="Guideline",
        values=dict(LCL="dashed", UCL="dashed", center="solid"),
    ) + guides(color=None) + facet_grid(
        "_var~.",
        scales="free_y",
        labeller=labeller(dict(X="Mean", S="Variability")),
    ) + labs(
        x="Group variable ({})".format(group),
        y="Value ({})".format(var),
    ))


pt_xbs = add_pipe(plot_xbs)
Esempio n. 8
0
    Returns:
        DataFrame: Results of evaluation or unevaluated design
    Notes:
        - Wrapper on pyDOE.lhs
    """
    ## Set seed only if given
    if seed is not None:
        set_seed(seed)

    ## Ensure sample count is int
    if not isinstance(n, Integral):
        print("eval_lhs() is rounding n...")
        n = int(n)

    ## Draw samples
    df_quant = DataFrame(data=lhs(model.n_var_rand, samples=n),
                         columns=model.var_rand)

    ## Convert samples to desired marginals
    df_rand = model.density.pr2sample(df_quant)
    ## Construct outer-product DOE
    df_samp = model.var_outer(df_rand, df_det=df_det)

    if skip:
        return df_samp
    else:
        return gr.eval_df(model, df=df_samp, append=append)


ev_lhs = add_pipe(eval_lhs)
Esempio n. 9
0
    ## Featurize
    try:
        featurizer = ElementProperty.from_preset(preset_name=preset_name)

    except NameError as e:
        error_string = str(e)
        raise NameError(
            error_string +
            "\n\nThis function requires the `matminer` package. " +
            "Try running the following to install the package:\n"
            "    pip install matminer"
        )

    df_res = StrToComposition().featurize_dataframe(
        df[[var_formula]], var_formula, ignore_errors=ignore_errors,
    )
    df_res = featurizer.featurize_dataframe(
        df_res, col_id="composition", ignore_errors=ignore_errors, **kwargs,
    )
    df_res.drop(columns=[var_formula, "composition"], inplace=True)

    ## Concatenate as necessary
    if append:
        df_res = concat((df, df_res), axis=1)

    return df_res


tf_feat_composition = add_pipe(tran_feat_composition)
Esempio n. 10
0
    df_res = model.evaluate_df(df)

    if append:
        df_res = concat(
            [
                df.reset_index(drop=True).drop(
                    model.out, axis=1, errors="ignore"),
                df_res,
            ],
            axis=1,
        )

    return df_res


ev_df = add_pipe(eval_df)


## Nominal evaluation
# --------------------------------------------------
@curry
def eval_nominal(model, df_det=None, append=True, skip=False):
    r"""Evaluate model at nominal values

    Evaluates a given model at a model nominal conditions (median).

    Args:
        model (gr.Model): Model to evaluate
        df_det (DataFrame): Deterministic levels for evaluation; use "nom"
            for nominal deterministic levels.
        append (bool): Append results to nominal inputs?
Esempio n. 11
0
        >>> from grama.models import make_cantilever_beam
        >>> md = make_cantilever_beam()
        >>> md >> \
        >>>     gr.ev_monte_carlo(n=100, df_det="nom", skip=True) >> \
        >>>     gr.pt_scattermat(var=md.var)
        >>> plt.show()

    """
    if var is None:
        raise ValueError("Must provide input columns list as keyword var")

    ## Plot
    return pairplot(data=df, vars=var)


pt_scattermat = add_pipe(plot_scattermat)


@curry
def plot_hists(df, out=None):
    r"""Construct histograms

    Create a set of histograms. Often used to visualize the results of random
    sampling for multiple outputs.

    Args:
        out (list of strings): Variables to plot

    Returns:
        Seaborn histogram plot
Esempio n. 12
0
            fun = FunctionGPR(gpr, var, [output], name, 0, var_min, var_max)
            functions.append(fun)

    except NameError as e:
        error_string = str(e)
        raise NameError(error_string +
                        "\n\nThis function requires the `sklearn` package. " +
                        "Try running the following to install the package:\n"
                        "    pip install scikit-learn")

    ## Construct model
    return Model(functions=functions, domain=domain, density=density)


ft_gp = add_pipe(fit_gp)


## Fit random forest model with sklearn
# --------------------------------------------------
@curry
def fit_rf(df,
           md=None,
           var=None,
           out=None,
           domain=None,
           density=None,
           seed=None,
           suppress_warnings=True,
           **kwargs):
    r"""Fit a random forest
Esempio n. 13
0
    """
    n_obs, n_in = df.shape

    ## Parse formulae for output names
    n_out = len(formulae)
    outputs = [""] * n_out
    for ind in range(n_out):
        ind_start = formulae[ind].find("~")
        outputs[ind] = formulae[ind][:ind_start].strip()

    ## Construct fits
    fits = []
    for ind in range(n_out):
        fits.append(smf.ols(formulae[ind], data=df).fit())

    def fit_all(df_new):
        n_obs_new, _ = df_new.shape
        result = zeros((n_obs_new, n_out))
        for ind in range(n_out):
            result[:, ind] = fits[ind].predict(df_new)
        return DataFrame(data=result, columns=outputs)

    ## Construct model
    return gr.model_vectorized(function=fit_all,
                               outputs=outputs,
                               domain=domain,
                               density=density)


ft_ols = add_pipe(fit_ols)
Esempio n. 14
0
        df_tmp["n_iter"] = [res.nit]
        df_tmp["mse"] = [res.fun]
        return df_tmp

    df_res = DataFrame()
    for i in range(n_restart):
        df_tmp = fun_mp(i)
        df_res = concat((df_res, df_tmp), axis=0).reset_index(drop=True)

    ## Post-process
    if append:
        return df_res
    return df_res[var_fit]


ev_nls = add_pipe(eval_nls)


## Minimize
# --------------------------------------------------
@curry
def eval_min(
    model,
    out_min=None,
    out_geq=None,
    out_leq=None,
    out_eq=None,
    method="SLSQP",
    tol=1e-6,
    n_restart=1,
    n_maxiter=50,
Esempio n. 15
0
            tf(**summaries_all)
            # >> tf_mutate(_kfold=i)
        )

        if var_fold is None:
            df_summary_tmp = df_summary_tmp >> tf_mutate(_kfold=i)
        else:
            df_summary_tmp[var_fold] = levels[i]

        df_res = concat((df_res, df_summary_tmp),
                        axis=0).reset_index(drop=True)

    return df_res


tf_kfolds = add_pipe(tran_kfolds)


## Bootstrap utility
# --------------------------------------------------
@curry
def tran_bootstrap(df,
                   tran=None,
                   n_boot=500,
                   n_sub=25,
                   con=0.90,
                   col_sel=None,
                   seed=None):
    r"""Estimate bootstrap confidence intervals

    Estimate bootstrap confidence intervals for a given transform. Uses the
Esempio n. 16
0
    outputs = df_res.drop(typename, axis=1).columns
    df_res[outputs] = df_res[outputs].apply(lambda row: round(row, decimals=digits))
    df_res.sort_values(typename, inplace=True)

    ## Filter, if necessary
    if not full:
        I_normalized = list(map(lambda s: s[0] == "S", df_res[typename]))
        df_res = df_res[I_normalized]

    ## Fill NaN's
    df_res.fillna(value=0, inplace=True)

    return df_res


tf_sobol = add_pipe(tran_sobol)

## Linear algebra tools
##################################################
## Principal Component Analysis (PCA)
@curry
def tran_pca(df, var=None, lamvar="lam", standardize=False):
    r"""Principal Component Analysis

    Compute principal directions and eigenvalues for a dataset. Can specify
    columns to analyze, or just analyze all numerical columns.

    Args:
        df (DataFrame): Data to analyze
        var (list of str or None): List of columns to analyze
        lambvar (str): Name to give eigenvalue column; default="lam"
Esempio n. 17
0
        poset = powerset(set(range(n)).difference({j}))
        data = zeros((s, len(out)))
        df_tmp = DataFrame(columns=out, data=data)

        for p in poset:
            den = n * comb(n - 1, len(p))

            for t in range(s):
                if t in inds:
                    t1 = cohort_mean(t, list(set(p).union({j})))
                    t0 = cohort_mean(t, p)

                    df_tmp.iloc[t] = df_tmp.iloc[t] + (t1 - t0).loc[0] / den
                else:
                    df_tmp.iloc[t] = NaN

        return df_tmp

    ## Compute cohort shapley over all variables
    df_res = DataFrame()
    for j in range(n):
        df_tmp = cohort_shapley(j)
        df_tmp.columns = [df_tmp.columns[i] + "_" + var[j] for i in range(len(out))]

        df_res = concat((df_res, df_tmp), axis=1)

    return df_res


tf_shapley_cohort = add_pipe(tran_shapley_cohort)
Esempio n. 18
0
    ## Concatenate as necessary
    if keep:
        df_res = concat(
            (df_res.reset_index(drop=True),
             df[var_leftover].reset_index(drop=True)),
            axis=1,
        )
    if append:
        df_res = concat(
            (df_res.reset_index(drop=True), df[var].reset_index(drop=True)),
            axis=1)

    return df_res


tf_tsne = add_pipe(tran_tsne)


# --------------------------------------------------
@curry
def tran_poly(df, degree=None, var=None, keep=True, **kwargs):
    r"""Compute polynomial features of a dataset

    Compute polynomial features of a dataset.

    Args:
        df (DataFrame): Hybrid point results from gr.eval_hybrid()

    Kwargs:
        degree (int): Maximum degree of polynomial features
        var (list or None): Variables in df on which to perform dimension reduction.
Esempio n. 19
0
        return df_samp
    else:
        df_res = gr.eval_df(model, df=df_samp, append=append)

        ## Attach metadata
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            df_res._plot_info = {
                "type": "monte_carlo_outputs",
                "out": model.out
            }

        return df_res


ev_monte_carlo = add_pipe(eval_monte_carlo)


## Marginal sweeps with random origins
# --------------------------------------------------
@curry
def eval_sinews(
    model,
    n_density=10,
    n_sweeps=3,
    seed=None,
    df_det=None,
    varname="sweep_var",
    indname="sweep_ind",
    append=True,
    skip=False,
Esempio n. 20
0
                    ),
                    axis=1,
                    sort=False,
                )
            df_inner[key] = [fun_star]
            df_return = concat((df_return, df_inner), axis=0, sort=False)

    if not append:
        df_return = (df_return.groupby(model.var_det).agg(
            {s: max
             for s in betas.keys()}).reset_index())

    return df_return


ev_form_pma = add_pipe(eval_form_pma)


@curry
def eval_form_ria(
    model,
    limits=None,
    cons=None,
    df_corr=None,
    df_det=None,
    append=True,
    tol=1e-3,
    n_maxiter=25,
    n_restart=1,
    verbose=False,
):
Esempio n. 21
0
    # Extract values
    Y = df[var].values
    if standardize:
        Y_mean = Y.mean(axis=0)
        Y_sd = Y.std(axis=0)
        Y = (Y - Y_mean) / Y_sd
    # Generate initial proposal points
    X0 = _perturbed_choice(Y, n)

    ## Run sp.ccp algorithm
    X, d, iter_c = _sp_cpp(X0, Y, delta=tol, iter_max=n_maxiter)
    if verbose:
        print(
            "tran_sp finished in {0:} iterations with distance criterion {1:4.3e}"
            .format(iter_c, d))
        if d > tol:
            warn(
                "Convergence tolerance not met; d = {0:4.3e} > tol = {1:4.3e}".
                format(d, tol),
                RuntimeWarning,
            )

    if standardize:
        X = X * Y_sd + Y_mean

    ## Package results
    return DataFrame(data=X, columns=var)


tf_sp = add_pipe(tran_sp)
Esempio n. 22
0
    """
    model_new = model.copy()

    ## Dispatch to core builder for consistent behavior
    fun, var, out, name, runtime = _comp_function_data(model, fun, var, out,
                                                       name, runtime)

    ## Add new function
    model_new.functions.append(gr.Function(fun, var, out, name, runtime))

    model_new.update()
    return model_new


cp_function = add_pipe(comp_function)


# Add vectorized function
# -------------------------
@curry
def comp_vec_function(model,
                      fun=None,
                      var=None,
                      out=None,
                      name=None,
                      runtime=0):
    r"""Add a vectorized function to a model

    Composition. Add a function to an existing model. Function must be
    vectorized over DataFrames, and must add new columns matching its `out`
Esempio n. 23
0
                                       seed=seed)

    ### Package outputs
    df_pnd = DataFrame({
        "pr_scores": pr_scores,
        "var_values": var_values,
    })

    if append:
        return df_test.reset_index(drop=True).merge(df_pnd,
                                                    left_index=True,
                                                    right_index=True)
    return df_pnd


ev_pnd = add_pipe(eval_pnd)


# Relative Pareto frontier calculation
def pareto_min_rel(X_test, X_base=None):
    r"""Determine if rows in X_test are optimal, compared to X_base

    Finds the Pareto-efficient test-points that minimize the column values,
    relative to a given set of base-points.

    Args:
        X_test (2d numpy array): Test point observations; rows are observations, columns are features
        X_base (2d numpy array): Base point observations; rows are observations, columns are features

    Returns:
        array of boolean values: Indicates if test observation is Pareto-efficient, relative to base points
Esempio n. 24
0
    try:
        df_res = DataFrame(
            data=UMAP(n_components=n_dim, random_state=seed,
                      **kwargs).fit_transform(df[var].values),
            columns=[out + "{}".format(i) for i in range(n_dim)],
        )

    except NameError as e:
        error_string = str(e)
        raise NameError(error_string +
                        "\n\nThis function requires the `umap` package. " +
                        "Try running the following to install the package:\n"
                        "    pip install umap-learn")

    ## Concatenate as necessary
    if keep:
        df_res = concat(
            (df_res.reset_index(drop=True),
             df[var_leftover].reset_index(drop=True)),
            axis=1,
        )
    if append:
        df_res = concat(
            (df_res.reset_index(drop=True), df[var].reset_index(drop=True)),
            axis=1)

    return df_res


tf_umap = add_pipe(tran_umap)
Esempio n. 25
0
    Returns:
        gr.model: Metamodel

    """
    ## Extract model information
    inputs = model.domain.inputs
    outputs = model.outputs

    ## Assign default arguments
    if ev is None:
        ev = gr.eval_lhs

    if ft is None:
        # Linear features for each output
        sum_inputs = "+".join(inputs)
        formulae = list(map(lambda output: output + "~" + sum_inputs, outputs))

        ft = lambda df: gr.fit_ols(
            df, formulae=formulae, domain=model.domain, density=model.density)

    ## Generate data
    df_results = ev(model, n_samples=n, seed=seed)

    ## Fit a model
    model = ft(df_results)

    return model


cp_metamodel = add_pipe(comp_metamodel)
Esempio n. 26
0
        df
        >> ggplot()
        + geom_segment(
            aes(
                var[0],
                var[1],
                xend=var[0]+"_end",
                yend=var[1]+"_end",
                linetype=out,
                color=level,
            )
        )
    )


pt_contour = add_pipe(plot_contour)

## tran_iocorr
# --------------------------------------------------
@curry
def plot_corrtile(df, var=None, out=None, corr=None):
    r"""
    """
    return (
        df
        >> ggplot(aes(var, out))
        + geom_tile(aes(fill=corr))
        + scale_fill_gradient2(name="Corr", midpoint=0)
        + theme(axis_text_x=element_text(angle=270))
    )