Exemple #1
0
def _error_scatter_data(
    arms: List[Union[PlotInSampleArm, PlotOutOfSampleArm]],
    y_axis_var: PlotMetric,
    x_axis_var: Optional[PlotMetric] = None,
    rel: bool = False,
    status_quo_arm: Optional[PlotInSampleArm] = None,
) -> Tuple[List[float], Optional[List[float]], List[float], List[float]]:
    y_metric_key = "y_hat" if y_axis_var.pred else "y"
    y_sd_key = "se_hat" if y_axis_var.pred else "se"

    arm_names = [a.name for a in arms]
    y = [getattr(a, y_metric_key).get(y_axis_var.metric, np.nan) for a in arms]
    y_se = [getattr(a, y_sd_key).get(y_axis_var.metric, np.nan) for a in arms]

    # Delta method if relative to status quo arm
    if rel:
        if status_quo_arm is None:
            raise ValueError(
                "`status_quo_arm` cannot be None for relative effects.")
        y_rel, y_se_rel = relativize(
            means_t=y,
            sems_t=y_se,
            mean_c=getattr(status_quo_arm,
                           y_metric_key).get(y_axis_var.metric),
            sem_c=getattr(status_quo_arm, y_sd_key).get(y_axis_var.metric),
            as_percent=True,
        )
        y = y_rel.tolist()
        y_se = y_se_rel.tolist()

    # x can be metric for a metric or arm names
    if x_axis_var is None:
        x = arm_names
        x_se = None
    else:
        x_metric_key = "y_hat" if x_axis_var.pred else "y"
        x_sd_key = "se_hat" if x_axis_var.pred else "se"
        x = [
            getattr(a, x_metric_key).get(x_axis_var.metric, np.nan)
            for a in arms
        ]
        x_se = [
            getattr(a, x_sd_key).get(x_axis_var.metric, np.nan) for a in arms
        ]

        if rel:
            # Delta method if relative to status quo arm
            x_rel, x_se_rel = relativize(
                means_t=x,
                sems_t=x_se,
                mean_c=getattr(status_quo_arm,
                               x_metric_key).get(x_axis_var.metric),
                sem_c=getattr(status_quo_arm, x_sd_key).get(x_axis_var.metric),
                as_percent=True,
            )
            x = x_rel.tolist()
            x_se = x_se_rel.tolist()
    return x, x_se, y, y_se
Exemple #2
0
def _extract_pareto_frontier_results(
    param_dicts: List[TParameterization],
    means: Mu,
    variances: Cov,
    primary_metric: str,
    secondary_metric: str,
    absolute_metrics: List[str],
    outcome_constraints: Optional[List[OutcomeConstraint]],
    status_quo_prediction: Optional[Tuple[Mu, Cov]],
) -> ParetoFrontierResults:
    """Extract prediction results into ParetoFrontierResults struture."""
    metrics = list(means.keys())
    means_out = {metric: m.copy() for metric, m in means.items()}
    sems_out = {metric: np.sqrt(v[metric]) for metric, v in variances.items()}

    # relativize predicted outcomes if requested
    primary_is_relative = primary_metric not in absolute_metrics
    secondary_is_relative = secondary_metric not in absolute_metrics
    # Relativized metrics require a status quo prediction
    if primary_is_relative or secondary_is_relative:
        if status_quo_prediction is None:
            raise AxError(
                "Relativized metrics require a valid status quo prediction")
        sq_mean, sq_sem = status_quo_prediction

        for metric in metrics:
            if metric not in absolute_metrics and metric in sq_mean:
                means_out[metric], sems_out[metric] = relativize(
                    means_t=means_out[metric],
                    sems_t=sems_out[metric],
                    mean_c=sq_mean[metric][0],
                    sem_c=np.sqrt(sq_sem[metric][metric][0]),
                    as_percent=True,
                )

    return ParetoFrontierResults(
        param_dicts=param_dicts,
        means={metric: means
               for metric, means in means_out.items()},
        sems={metric: sems
              for metric, sems in sems_out.items()},
        primary_metric=primary_metric,
        secondary_metric=secondary_metric,
        absolute_metrics=absolute_metrics,
        objective_thresholds=None,
        arm_names=None,
    )
Exemple #3
0
    def _get_relative_data(
            data: ObservationData,
            status_quo_data: ObservationData) -> ObservationData:
        L = len(data.metric_names)
        result = ObservationData(
            metric_names=data.metric_names,
            # zeros are just to create the shape so values can be set by index
            means=np.zeros(L),
            covariance=np.zeros((L, L)),
        )
        for i, metric in enumerate(data.metric_names):
            try:
                j = next(k for k in range(L)
                         if status_quo_data.metric_names[k] == metric)
            except (IndexError, StopIteration):
                raise ValueError(
                    "Relativization cannot be performed because "
                    "ObservationData for status quo is missing metrics")

            means_t = data.means[i]
            sems_t = sqrt(data.covariance[i][i])
            mean_c = status_quo_data.means[j]
            sem_c = sqrt(status_quo_data.covariance[j][j])

            # if the is the status quo
            if means_t == mean_c and sems_t == sem_c:
                means_rel, sems_rel = 0, 0
            else:
                means_rel, sems_rel = relativize(
                    means_t=means_t,
                    sems_t=sems_t,
                    mean_c=mean_c,
                    sem_c=sem_c,
                    as_percent=True,
                )
            result.means[i] = means_rel
            result.covariance[i][i] = sems_rel**2
        return result
Exemple #4
0
def get_observed_pareto_frontiers(
    experiment: Experiment,
    data: Optional[Data] = None,
    rel: bool = True,
) -> List[ParetoFrontierResults]:
    """
    Find all Pareto points from an experiment.

    Uses only values as observed in the data; no modeling is involved. Makes no
    assumption about the search space or types of parameters. If "data" is provided will
    use that, otherwise will use all data attached to the experiment.

    Uses all arms present in data; does not filter according to experiment
    search space.

    Assumes experiment has a multiobjective optimization config from which the
    objectives and outcome constraints will be extracted.

    Will generate a ParetoFrontierResults for every pair of metrics in the experiment's
    multiobjective optimization config.
    """
    if data is None:
        data = experiment.fetch_data()
    if experiment.optimization_config is None:
        raise ValueError("Experiment must have an optimization config")
    mb = get_tensor_converter_model(experiment=experiment, data=data)
    pareto_observations = observed_pareto_frontier(modelbridge=mb)
    # Convert to ParetoFrontierResults
    metric_names = [
        metric.name for metric in
        experiment.optimization_config.objective.metrics  # pyre-ignore
    ]
    pfr_means = {name: [] for name in metric_names}
    pfr_sems = {name: [] for name in metric_names}

    for obs in pareto_observations:
        for i, name in enumerate(obs.data.metric_names):
            pfr_means[name].append(obs.data.means[i])
            pfr_sems[name].append(np.sqrt(obs.data.covariance[i, i]))

    # Relativize as needed
    if rel and experiment.status_quo is not None:
        # Get status quo values
        sq_df = data.df[data.df["arm_name"] ==
                        experiment.status_quo.name  # pyre-ignore
                        ]
        sq_df = sq_df.to_dict(orient="list")  # pyre-ignore
        sq_means = {}
        sq_sems = {}
        for i, metric in enumerate(sq_df["metric_name"]):
            sq_means[metric] = sq_df["mean"][i]
            sq_sems[metric] = sq_df["sem"][i]
        # Relativize
        for name in metric_names:
            if np.isnan(sq_sems[name]) or np.isnan(pfr_sems[name]).any():
                # Just relativize means
                pfr_means[name] = [(mu / sq_means[name] - 1) * 100
                                   for mu in pfr_means[name]]
            else:
                # Use delta method
                pfr_means[name], pfr_sems[name] = relativize(
                    means_t=pfr_means[name],
                    sems_t=pfr_sems[name],
                    mean_c=sq_means[name],
                    sem_c=sq_sems[name],
                    as_percent=True,
                )
        absolute_metrics = []
    else:
        absolute_metrics = metric_names

    objective_thresholds = {}
    if experiment.optimization_config.objective_thresholds is not None:  # pyre-ignore
        for objth in experiment.optimization_config.objective_thresholds:
            is_rel = objth.metric.name not in absolute_metrics
            if objth.relative != is_rel:
                raise ValueError(
                    f"Objective threshold for {objth.metric.name} has "
                    f"rel={objth.relative} but was specified here as rel={is_rel}"
                )
            objective_thresholds[objth.metric.name] = objth.bound

    # Construct ParetoFrontResults for each pair
    pfr_list = []
    param_dicts = [obs.features.parameters for obs in pareto_observations]
    arm_names = [obs.arm_name for obs in pareto_observations]

    for metric_a, metric_b in combinations(metric_names, 2):
        pfr_list.append(
            ParetoFrontierResults(
                param_dicts=param_dicts,
                means=pfr_means,
                sems=pfr_sems,
                primary_metric=metric_a,
                secondary_metric=metric_b,
                absolute_metrics=absolute_metrics,
                objective_thresholds=objective_thresholds,
                arm_names=arm_names,
            ))
    return pfr_list