Esempio n. 1
0
def interact_empirical_model_validation(batch: BatchTrial, data: Data) -> AxPlotConfig:
    """Compare the model predictions for the batch arms against observed data.

    Relies on the model predictions stored on the generator_runs of batch.

    Args:
        batch: Batch on which to perform analysis.
        data: Observed data for the batch.
    Returns:
        AxPlotConfig for the plot.
    """
    insample_data: Dict[str, PlotInSampleArm] = {}
    metric_names = list(data.df["metric_name"].unique())
    for struct in batch.generator_run_structs:
        generator_run = struct.generator_run
        if generator_run.model_predictions is None:
            continue
        for i, arm in enumerate(generator_run.arms):
            arm_data = {
                "name": arm.name_or_short_signature,
                "y": {},
                "se": {},
                "parameters": arm.parameters,
                "y_hat": {},
                "se_hat": {},
                "context_stratum": None,
            }
            predictions = generator_run.model_predictions
            for _, row in data.df[
                data.df["arm_name"] == arm.name_or_short_signature
            ].iterrows():
                metric_name = row["metric_name"]
                # pyre-fixme[16]: Optional type has no attribute `__setitem__`.
                arm_data["y"][metric_name] = row["mean"]
                # pyre-fixme[16]: Item `None` of `Union[None, Dict[typing.Any,
                #  typing.Any], Dict[str, typing.Union[None, bool, float, int, str]],
                #  str]` has no attribute `__setitem__`.
                arm_data["se"][metric_name] = row["sem"]
                # pyre-fixme[16]: `Optional` has no attribute `__getitem__`.
                arm_data["y_hat"][metric_name] = predictions[0][metric_name][i]
                # pyre-fixme[16]: Item `None` of `Union[None, Dict[typing.Any,
                #  typing.Any], Dict[str, typing.Union[None, bool, float, int, str]],
                #  str]` has no attribute `__setitem__`.
                arm_data["se_hat"][metric_name] = predictions[1][metric_name][
                    metric_name
                ][i]
            # pyre-fixme[6]: Expected `Optional[Dict[str, Union[float, str]]]` for 1s...
            insample_data[arm.name_or_short_signature] = PlotInSampleArm(**arm_data)
    if not insample_data:
        raise ValueError("No model predictions present on the batch.")
    plot_data = PlotData(
        metrics=metric_names,
        in_sample=insample_data,
        out_of_sample=None,
        status_quo_name=None,
    )

    fig = _obs_vs_pred_dropdown_plot(data=plot_data, rel=False)
    fig["layout"]["title"] = "Cross-validation"
    return AxPlotConfig(data=fig, plot_type=AxPlotTypes.GENERIC)
Esempio n. 2
0
def _get_cv_plot_data(cv_results: List[CVResult]) -> PlotData:
    if len(cv_results) == 0:
        return PlotData(
            metrics=[], in_sample={}, out_of_sample=None, status_quo_name=None
        )

    # arm_name -> Arm data
    insample_data: Dict[str, PlotInSampleArm] = {}

    # Assume input is well formed and this is consistent
    metric_names = cv_results[0].predicted.metric_names

    for rid, cv_result in enumerate(cv_results):
        arm_name = cv_result.observed.arm_name
        arm_data = {
            "name": cv_result.observed.arm_name,
            "y": {},
            "se": {},
            "parameters": cv_result.observed.features.parameters,
            "y_hat": {},
            "se_hat": {},
            "context_stratum": None,
        }
        for i, mname in enumerate(cv_result.observed.data.metric_names):
            # pyre-fixme[16]: Optional type has no attribute `__setitem__`.
            arm_data["y"][mname] = cv_result.observed.data.means[i]
            # pyre-fixme[16]: Item `None` of `Union[None, Dict[typing.Any,
            #  typing.Any], Dict[str, typing.Union[None, bool, float, int, str]], str]`
            #  has no attribute `__setitem__`.
            arm_data["se"][mname] = np.sqrt(cv_result.observed.data.covariance[i][i])
        for i, mname in enumerate(cv_result.predicted.metric_names):
            # pyre-fixme[16]: Item `None` of `Union[None, Dict[typing.Any,
            #  typing.Any], Dict[str, typing.Union[None, bool, float, int, str]], str]`
            #  has no attribute `__setitem__`.
            arm_data["y_hat"][mname] = cv_result.predicted.means[i]
            # pyre-fixme[16]: Item `None` of `Union[None, Dict[typing.Any,
            #  typing.Any], Dict[str, typing.Union[None, bool, float, int, str]], str]`
            #  has no attribute `__setitem__`.
            arm_data["se_hat"][mname] = np.sqrt(cv_result.predicted.covariance[i][i])

        # Expected `str` for 2nd anonymous parameter to call `dict.__setitem__` but got
        # `Optional[str]`.
        # pyre-fixme[6]:
        insample_data[f"{arm_name}_{rid}"] = PlotInSampleArm(**arm_data)
    return PlotData(
        metrics=metric_names,
        in_sample=insample_data,
        out_of_sample=None,
        status_quo_name=None,
    )
Esempio n. 3
0
def _get_in_sample_arms(
    model: ModelBridge,
    metric_names: Set[str],
    fixed_features: Optional[ObservationFeatures] = None,
) -> Tuple[Dict[str, PlotInSampleArm], RawData, Dict[str, TParameterization]]:
    """Get in-sample arms from a model with observed and predicted values
    for specified metrics.

    Returns a PlotInSampleArm object in which repeated observations are merged
    with IVW, and a RawData object in which every observation is listed.

    Fixed features input can be used to override fields of the insample arms
    when making model predictions.

    Args:
        model: An instance of the model bridge.
        metric_names: Restrict predictions to these metrics. If None, uses all
            metrics in the model.
        fixed_features: Features that should be fixed in the arms this function
            will obtain predictions for.

    Returns:
        A tuple containing

        - Map from arm name to PlotInSampleArm.
        - List of the data for each observation like::

            {'metric_name': 'likes', 'arm_name': '0_0', 'mean': 1., 'sem': 0.1}

        - Map from arm name to parameters
    """
    observations = model.get_training_data()
    # Calculate raw data
    raw_data = []
    arm_name_to_parameters = {}
    for obs in observations:
        arm_name_to_parameters[obs.arm_name] = obs.features.parameters
        for j, metric_name in enumerate(obs.data.metric_names):
            if metric_name in metric_names:
                raw_data.append({
                    "metric_name": metric_name,
                    "arm_name": obs.arm_name,
                    "mean": obs.data.means[j],
                    "sem": np.sqrt(obs.data.covariance[j, j]),
                })

    # Check that we have one ObservationFeatures per arm name since we
    # key by arm name and the model is not Multi-task.
    # If "TrialAsTask" is present, one of the arms is also chosen.
    if ("TrialAsTask" not in model.transforms.keys()) and (
            len(arm_name_to_parameters) != len(observations)):
        logger.error(
            "Have observations of arms with different features but same"
            " name. Arbitrary one will be plotted.")

    # Merge multiple measurements within each Observation with IVW to get
    # un-modeled prediction
    t = IVW(None, [], [])
    obs_data = t.transform_observation_data([obs.data for obs in observations],
                                            [])
    # Start filling in plot data
    in_sample_plot: Dict[str, PlotInSampleArm] = {}
    for i, obs in enumerate(observations):
        if obs.arm_name is None:
            raise ValueError("Observation must have arm name for plotting.")

        # Extract raw measurement
        obs_y = {}  # Observed metric means.
        obs_se = {}  # Observed metric standard errors.
        # Use the IVW data, not obs.data
        for j, metric_name in enumerate(obs_data[i].metric_names):
            if metric_name in metric_names:
                obs_y[metric_name] = obs_data[i].means[j]
                obs_se[metric_name] = np.sqrt(obs_data[i].covariance[j, j])
        # Make a prediction.
        if model.training_in_design[i]:
            features = obs.features
            if fixed_features is not None:
                features.update_features(fixed_features)
            pred_y, pred_se = _predict_at_point(model, features, metric_names)
        else:
            # Use raw data for out-of-design points
            pred_y = obs_y
            pred_se = obs_se
        in_sample_plot[not_none(obs.arm_name)] = PlotInSampleArm(
            name=not_none(obs.arm_name),
            y=obs_y,
            se=obs_se,
            parameters=obs.features.parameters,
            y_hat=pred_y,
            se_hat=pred_se,
            context_stratum=None,
        )
    return in_sample_plot, raw_data, arm_name_to_parameters
Esempio n. 4
0
def _get_batch_comparison_plot_data(
    observations: List[Observation],
    batch_x: int,
    batch_y: int,
    rel: bool = False,
    status_quo_name: Optional[str] = None,
) -> PlotData:
    """Compute PlotData for comparing repeated arms across trials.

    Args:
        observations: List of observations.
        batch_x: Batch for x-axis.
        batch_y: Batch for y-axis.
        rel: Whether to relativize data against status_quo arm.
        status_quo_name: Name of the status_quo arm.

    Returns:
        PlotData: a plot data object.
    """
    if rel and status_quo_name is None:
        raise ValueError("Experiment status quo must be set for rel=True")
    x_observations = {
        observation.arm_name: observation
        for observation in observations
        if observation.features.trial_index == batch_x
    }
    y_observations = {
        observation.arm_name: observation
        for observation in observations
        if observation.features.trial_index == batch_y
    }

    # Assume input is well formed and metric_names are consistent across observations
    metric_names = observations[0].data.metric_names
    insample_data: Dict[str, PlotInSampleArm] = {}
    for arm_name, x_observation in x_observations.items():
        # Restrict to arms present in both trials
        if arm_name not in y_observations:
            continue

        y_observation = y_observations[arm_name]
        arm_data = {
            "name": arm_name,
            "y": {},
            "se": {},
            "parameters": x_observation.features.parameters,
            "y_hat": {},
            "se_hat": {},
            "context_stratum": None,
        }
        for i, mname in enumerate(x_observation.data.metric_names):
            # pyre-fixme[16]: Optional type has no attribute `__setitem__`.
            arm_data["y"][mname] = x_observation.data.means[i]
            arm_data["se"][mname] = np.sqrt(
                x_observation.data.covariance[i][i])
        for i, mname in enumerate(y_observation.data.metric_names):
            arm_data["y_hat"][mname] = y_observation.data.means[i]
            arm_data["se_hat"][mname] = np.sqrt(
                y_observation.data.covariance[i][i])
        # Expected `str` for 2nd anonymous parameter to call `dict.__setitem__` but got
        # `Optional[str]`.
        # pyre-fixme[6]:
        insample_data[arm_name] = PlotInSampleArm(**arm_data)

    return PlotData(
        metrics=metric_names,
        in_sample=insample_data,
        out_of_sample=None,
        status_quo_name=status_quo_name,
    )
Esempio n. 5
0
def _get_in_sample_arms(
    model: ModelBridge, metric_names: Set[str]
) -> Tuple[Dict[str, PlotInSampleArm], RawData, Dict[str, TParameterization]]:
    """Get in-sample arms from a model with observed and predicted values
    for specified metrics.

    Returns a PlotInSampleArm object in which repeated observations are merged
    with IVW, and a RawData object in which every observation is listed.

    Args:
        model: An instance of the model bridge.
        metric_names: Restrict predictions to these metrics. If None, uses all
            metrics in the model.

    Returns:
        A tuple containing

        - Map from arm name to PlotInSampleArm.
        - List of the data for each observation like::

            {'metric_name': 'likes', 'arm_name': '0_0', 'mean': 1., 'sem': 0.1}

        - Map from arm name to parameters
    """
    observations = model.get_training_data()
    # Calculate raw data
    raw_data = []
    cond_name_to_parameters = {}
    for obs in observations:
        cond_name_to_parameters[obs.arm_name] = obs.features.parameters
        for j, metric_name in enumerate(obs.data.metric_names):
            if metric_name in metric_names:
                raw_data.append({
                    "metric_name": metric_name,
                    "arm_name": obs.arm_name,
                    "mean": obs.data.means[j],
                    "sem": np.sqrt(obs.data.covariance[j, j]),
                })
    # Check that we have one ObservationFeatures per arm name since we
    # key by arm name.
    if len(cond_name_to_parameters) != len(observations):
        logger.error(
            "Have observations of arms with different features but same"
            " name. Arbitrary one will be plotted.")
    # Merge multiple measurements within each Observation with IVW to get
    # un-modeled prediction
    t = IVW(None, [], [])
    obs_data = t.transform_observation_data([obs.data for obs in observations],
                                            [])
    # Start filling in plot data
    in_sample_plot: Dict[str, PlotInSampleArm] = {}
    for i, obs in enumerate(observations):
        if obs.arm_name is None:
            raise ValueError("Observation must have arm name for plotting.")

        # Extract raw measurement
        obs_y = {}
        obs_se = {}
        # Use the IVW data, not obs.data
        for j, metric_name in enumerate(obs_data[i].metric_names):
            if metric_name in metric_names:
                obs_y[metric_name] = obs_data[i].means[j]
                obs_se[metric_name] = np.sqrt(obs_data[i].covariance[j, j])
        # Make a prediction.
        if model.training_in_design[i]:
            pred_y, pred_se = _predict_at_point(model, obs.features,
                                                metric_names)
        else:
            # Use raw data for out-of-design points
            pred_y = obs_y
            pred_se = obs_se
        in_sample_plot[obs.arm_name] = PlotInSampleArm(
            name=obs.arm_name,
            y=obs_y,
            se=obs_se,
            parameters=obs.features.parameters,
            y_hat=pred_y,
            se_hat=pred_se,
            context_stratum=None,
        )
    return in_sample_plot, raw_data, cond_name_to_parameters