Exemple #1
0
 def testUpdateValidation(self):
     base_data = MapData(
         df=pd.DataFrame(
             [
                 {
                     "arm_name": "0_1",
                     "mean": 3.7,
                     "sem": 0.5,
                     "metric_name": "b",
                     "epoch": 0,
                 }
             ]
         ),
         map_keys=["epoch"],
     )
     new_data_wrong_map_keys = MapData(
         df=pd.DataFrame(
             [
                 {
                     "arm_name": "0_1",
                     "mean": 3.7,
                     "sem": 0.5,
                     "metric_name": "b",
                     "iteration": 0,
                 }
             ]
         ),
         map_keys=["iteration"],
     )
     with self.assertRaisesRegex(
         ValueError, "Inconsistent map_keys found in new data."
     ):
         base_data.update(new_data=new_data_wrong_map_keys)
Exemple #2
0
    def fetch_trial_data(self,
                         trial: BaseTrial,
                         noisy: bool = True,
                         **kwargs: Any) -> MapData:
        if self._trial_index_to_timestamp[
                trial.index] == 0 or trial.status.is_running:
            self._trial_index_to_timestamp[trial.index] += 1

        datas = []
        for timestamp in range(self._trial_index_to_timestamp[trial.index]):
            res = [
                self.f(
                    np.fromiter(arm.parameters.values(), dtype=float),
                    timestamp=timestamp,
                ) for arm in trial.arms
            ]

            df = pd.DataFrame({
                "arm_name": [arm.name for arm in trial.arms],
                "metric_name": self.name,
                "sem": self.noise_sd if noisy else 0.0,
                "trial_index": trial.index,
                "mean": [item["mean"] for item in res],
                **{
                    mki.key: [item[mki.key] for item in res]
                    for mki in self.map_key_infos
                },
            })

            datas.append(MapData(df=df, map_key_infos=self.map_key_infos))

        return MapData.from_multiple_map_data(datas)
Exemple #3
0
    def test_init(self):
        empty = MapData()
        self.assertTrue(empty.map_df.empty)

        with self.assertRaisesRegex(ValueError,
                                    "map_key_infos may be `None` iff"):
            MapData(df=self.df, map_key_infos=None)
Exemple #4
0
 def testUpdate(self):
     base_data = MapData(
         df=pd.DataFrame([{
             "arm_name": "0_1",
             "mean": 3.7,
             "sem": 0.5,
             "metric_name": "b",
             "epoch": 0,
         }]),
         map_keys=["epoch"],
     )
     new_data = MapData(
         df=pd.DataFrame([
             {
                 "arm_name": "0_1",
                 "mean": 3.7,
                 "sem": 0.5,
                 "metric_name": "b",
                 "epoch": 0,
             },
             {
                 "arm_name": "0_1",
                 "mean": 3.7,
                 "sem": 0.5,
                 "metric_name": "b",
                 "epoch": 1,
             },
         ]),
         map_keys=["epoch"],
     )
     base_data.update(new_data=new_data)
     self.assertEqual(3, base_data.df.shape[0])
Exemple #5
0
 def testDeduplicateData(self):
     map_data = MapData(df=self.df, map_keys=self.map_keys)
     dedup_data = map_data.deduplicate_data()
     df = dedup_data.df
     self.assertEqual(df.shape[0], 4)
     self.assertEqual(
         float(df[df["arm_name"] == "0_1"][df["metric_name"] == "a"]["mean"]), 0.5
     )
Exemple #6
0
    def fetch_experiment_data_multi(
        cls,
        experiment: Experiment,
        metrics: Iterable[Metric],
        trials: Optional[Iterable[BaseTrial]] = None,
        **kwargs: Any,
    ) -> AbstractDataFrameData:
        """Fetch multiple metrics data for an experiment."""
        if trials is None:
            trials = list(experiment.trials.values())
        if any(not isinstance(trial, Trial) for trial in trials):
            raise RuntimeError(
                f"Only (non-batch) Trials are supported by {cls.__name__}")
        ids = cls.get_ids_from_trials(trials=trials)

        trials_filtered, ids_filtered = [], []
        for trial, id_ in zip(trials, ids):
            if id_ is None:
                logger.info(
                    f"Could not get id for Trial {trial.index}. Ignoring.")
            else:
                trials_filtered.append(trial)
                ids_filtered.append(id_)

        if len(ids_filtered) == 0:
            logger.info("Could not get ids from trials. Returning empty data.")
            return MapData(map_keys=[cls.MAP_KEY])
        all_curve_series = cls.get_curves_from_ids(ids=ids)  # pyre-ignore [6]
        if all(id_ not in all_curve_series for id_ in ids_filtered):
            logger.info("Could not get curves from ids. Returning empty data.")
            return MapData(map_keys=[cls.MAP_KEY])

        for id_, curve_series in all_curve_series.items():
            for m in metrics:
                if m.curve_name not in curve_series:  # pyre-ignore [16]
                    logger.info(
                        f"{m.curve_name} not (yet) present in curves from {id_}. "
                        "Returning data without this metric.")

        dfs = []
        for trial, id_ in zip(trials_filtered, ids_filtered):
            if id_ not in all_curve_series:
                logger.info(
                    f"Could not get curve data for id {id_}. Ignoring.")
                continue
            curve_series = all_curve_series[id_]
            for m in metrics:
                cs = curve_series[m.curve_name].rename(
                    "mean")  # pyre-ignore [6]
                dfi = cs.reset_index().rename(  # pyre-ignore [16]
                    columns={"index": cls.MAP_KEY})
                dfi["trial_index"] = trial.index
                dfi["arm_name"] = trial.arm.name
                dfi["metric_name"] = m.name
                dfi["sem"] = float("nan")
                dfs.append(dfi.drop_duplicates())
        df = pd.concat(dfs, axis=0, ignore_index=True)
        return MapData(df, map_keys=[cls.MAP_KEY])
Exemple #7
0
 def testBadMapData(self):
     df = pd.DataFrame([{"bad_field": "0_0", "bad_field_2": {"x": 0, "y": "a"}}])
     with self.assertRaisesRegex(
         ValueError, "map_keys may only be `None` when `df` is also None "
     ):
         MapData(df=df)
     with self.assertRaisesRegex(
         ValueError, "Dataframe must contain required columns"
     ):
         MapData(map_keys=["bad_field"], df=df)
Exemple #8
0
    def testFetchDataWithMapData(self):
        evaluations = {
            "0_0": [
                ({
                    "epoch": 1
                }, {
                    "no_fetch_impl_metric": (3.7, 0.5)
                }),
                ({
                    "epoch": 2
                }, {
                    "no_fetch_impl_metric": (3.8, 0.5)
                }),
                ({
                    "epoch": 3
                }, {
                    "no_fetch_impl_metric": (3.9, 0.5)
                }),
                ({
                    "epoch": 4
                }, {
                    "no_fetch_impl_metric": (4.0, 0.5)
                }),
            ],
        }

        self.experiment.add_tracking_metric(metric=MapMetric(
            name="no_fetch_impl_metric"))
        self.experiment.new_trial()
        self.experiment.trials[0].mark_running(no_runner_required=True)
        first_epoch = MapData.from_map_evaluations(
            evaluations={
                arm_name: partial_results[0:1]
                for arm_name, partial_results in evaluations.items()
            },
            trial_index=0,
        )
        self.experiment.attach_data(first_epoch)
        remaining_epochs = MapData.from_map_evaluations(
            evaluations={
                arm_name: partial_results[1:4]
                for arm_name, partial_results in evaluations.items()
            },
            trial_index=0,
        )
        self.experiment.attach_data(remaining_epochs)
        self.experiment.trials[0].mark_completed()

        expected_data = remaining_epochs
        actual_data = self.experiment.lookup_data(
            keep_latest_map_values_only=False)
        self.assertEqual(expected_data, actual_data)
Exemple #9
0
 def testFetchDataWithMapData(self):
     evaluations = {
         "0_0": [
             ({
                 "epoch": 1
             }, {
                 "tracking": (3.7, 0.5)
             }),
             ({
                 "epoch": 2
             }, {
                 "tracking": (3.8, 0.5)
             }),
             ({
                 "epoch": 3
             }, {
                 "tracking": (3.9, 0.5)
             }),
             ({
                 "epoch": 4
             }, {
                 "tracking": (4.0, 0.5)
             }),
         ],
     }
     self.experiment.new_trial()
     self.experiment.trials[0].mark_running(no_runner_required=True)
     first_epoch = MapData.from_map_evaluations(
         evaluations={
             arm_name: partial_results[0:1]
             for arm_name, partial_results in evaluations.items()
         },
         trial_index=0,
     )
     self.experiment.attach_data(first_epoch)
     remaining_epochs = MapData.from_map_evaluations(
         evaluations={
             arm_name: partial_results[1:4]
             for arm_name, partial_results in evaluations.items()
         },
         trial_index=0,
     )
     self.experiment.attach_data(remaining_epochs)
     self.experiment.trials[0].mark_completed()
     expected_data = MapData.from_map_evaluations(evaluations=evaluations,
                                                  trial_index=0)
     actual_data = self.experiment.fetch_data()
     self.assertEqual(expected_data, actual_data)
Exemple #10
0
    def testMapData(self):
        self.assertEqual(MapData(), MapData())
        map_data = MapData(df=self.df, map_keys=self.map_keys)
        self.assertEqual(map_data, map_data)
        self.assertEqual(map_data.df_hash, self.df_hash)

        df = map_data.df
        self.assertEqual(
            float(
                df[df["arm_name"] == "0_0"][df["metric_name"] == "a"]["mean"]),
            2.0)
        self.assertEqual(
            float(df[df["arm_name"] == "0_1"][df["metric_name"] == "b"][
                df["epoch"] == 0]["sem"]),
            0.5,
        )
Exemple #11
0
def get_map_data(trial_index: int = 0) -> MapData:
    evaluations = {
        "status_quo": [
            ({"epoch": 1}, {"ax_test_metric": (1.0, 0.5)}),
            ({"epoch": 2}, {"ax_test_metric": (2.0, 0.5)}),
            ({"epoch": 3}, {"ax_test_metric": (3.0, 0.5)}),
            ({"epoch": 4}, {"ax_test_metric": (4.0, 0.5)}),
        ],
        "0_0": [
            ({"epoch": 1}, {"ax_test_metric": (3.7, 0.5)}),
            ({"epoch": 2}, {"ax_test_metric": (3.8, 0.5)}),
            ({"epoch": 3}, {"ax_test_metric": (3.9, 0.5)}),
            ({"epoch": 4}, {"ax_test_metric": (4.0, 0.5)}),
        ],
        "0_1": [
            ({"epoch": 1}, {"ax_test_metric": (3.0, 0.5)}),
            ({"epoch": 2}, {"ax_test_metric": (5.0, 0.5)}),
            ({"epoch": 3}, {"ax_test_metric": (6.0, 0.5)}),
            ({"epoch": 4}, {"ax_test_metric": (1.0, 0.5)}),
        ],
    }
    return MapData.from_map_evaluations(
        evaluations=evaluations,  # pyre-ignore [6]: Spurious param type mismatch.
        trial_index=trial_index,
        map_keys=["epoch"],
    )
Exemple #12
0
 def fetch_trial_data(self,
                      trial: BaseTrial,
                      noisy: bool = True,
                      **kwargs: Any) -> MapData:
     noise_sd = self.noise_sd if noisy else 0.0
     arm_names = []
     mean = []
     # assume kwargs = {map_keys: [...], key=list(values) for key in map_keys}
     map_keys = kwargs.get("map_keys", [])
     map_keys_values = defaultdict(list)
     for name, arm in trial.arms_by_name.items():
         map_keys_dict_of_lists = {
             k: v
             for k, v in kwargs.items() if k in map_keys
         }
         map_keys_df = pd.DataFrame.from_dict(map_keys_dict_of_lists,
                                              orient="index").transpose()
         for _, row in map_keys_df.iterrows():
             x = self._merge_parameters_and_map_keys(
                 parameters=arm.parameters, map_key_series=row)
             # TODO(jej): Use hierarchical DF here for easier syntax?
             arm_names.append(name)
             mean.append(self.f(x) + np.random.randn() * noise_sd)
         for map_key, values in map_keys_dict_of_lists.items():
             map_keys_values[map_key].extend(values)
     df = pd.DataFrame({
         "arm_name": arm_names,
         "metric_name": self.name,
         "mean": mean,
         "sem": noise_sd,
         "trial_index": trial.index,
         **map_keys_values,
     })
     return MapData(df=df, map_keys=map_keys)
Exemple #13
0
    def setUp(self):
        self.df = pd.DataFrame([
            {
                "arm_name": "0_0",
                "epoch": 0,
                "mean": 2.0,
                "sem": 0.2,
                "trial_index": 1,
                "metric_name": "a",
            },
            {
                "arm_name": "0_0",
                "epoch": 0,
                "mean": 1.8,
                "sem": 0.3,
                "trial_index": 1,
                "metric_name": "b",
            },
            {
                "arm_name": "0_1",
                "epoch": 0,
                "mean": 4.0,
                "sem": 0.6,
                "trial_index": 1,
                "metric_name": "a",
            },
            {
                "arm_name": "0_1",
                "epoch": 0,
                "mean": 3.7,
                "sem": 0.5,
                "trial_index": 1,
                "metric_name": "b",
            },
            {
                "arm_name": "0_1",
                "epoch": 1,
                "mean": 0.5,
                "sem": None,
                "trial_index": 1,
                "metric_name": "a",
            },
            {
                "arm_name": "0_1",
                "epoch": 1,
                "mean": 3.0,
                "sem": None,
                "trial_index": 1,
                "metric_name": "b",
            },
        ])

        self.map_key_infos = [MapKeyInfo(
            key="epoch",
            default_value=0,
        )]

        self.mmd = MapData(df=self.df, map_key_infos=self.map_key_infos)
Exemple #14
0
    def testFromMapEvaluations(self):
        map_data = MapData.from_map_evaluations(
            evaluations={
                "0_1": [
                    ({
                        "f1": 1.0,
                        "f2": 0.5
                    }, {
                        "b": (3.7, 0.5)
                    }),
                    ({
                        "f1": 1.0,
                        "f2": 0.75
                    }, {
                        "b": (3.8, 0.5)
                    }),
                ]
            },
            trial_index=0,
        )
        self.assertEqual(len(map_data.df), 2)
        self.assertEqual(map_data.map_keys, ["f1", "f2"])

        with self.assertRaises(ValueError):
            MapData.from_map_evaluations(
                evaluations={
                    "0_1": [
                        ({
                            "f1": 1.0,
                            "f2": 0.5
                        }, {
                            "b": (3.7, 0.5)
                        }),
                        ({
                            "epoch": 1.0,
                            "mc_samples": 0.75
                        }, {
                            "b": (3.8, 0.5)
                        }),
                    ]
                },
                trial_index=0,
            )
Exemple #15
0
def data_and_evaluations_from_raw_data(
    raw_data: Dict[str, TEvaluationOutcome],
    metric_names: List[str],
    trial_index: int,
    sample_sizes: Dict[str, int],
    start_time: Optional[int] = None,
    end_time: Optional[int] = None,
) -> Tuple[Dict[str, TEvaluationOutcome], AbstractDataFrameData]:
    """Transforms evaluations into Ax Data.

    Each evaluation is either a trial evaluation: {metric_name -> (mean, SEM)}
    or a fidelity trial evaluation for multi-fidelity optimizations:
    [(fidelities, {metric_name -> (mean, SEM)})].

    Args:
        raw_data: Mapping from arm name to raw_data.
        metric_names: Names of metrics used to transform raw data to evaluations.
        trial_index: Index of the trial, for which the evaluations are.
        sample_sizes: Number of samples collected for each arm, may be empty
            if unavailable.
        start_time: Optional start time of run of the trial that produced this
            data, in milliseconds.
        end_time: Optional end time of run of the trial that produced this
            data, in milliseconds.
    """
    evaluations = {
        arm_name: raw_data_to_evaluation(
            raw_data=raw_data[arm_name],
            metric_names=metric_names,
            start_time=start_time,
            end_time=end_time,
        )
        for arm_name in raw_data
    }
    if all(isinstance(evaluations[x], dict) for x in evaluations.keys()):
        # All evaluations are no-fidelity evaluations.
        data = Data.from_evaluations(
            evaluations=cast(Dict[str, TTrialEvaluation], evaluations),
            trial_index=trial_index,
            sample_sizes=sample_sizes,
            start_time=start_time,
            end_time=end_time,
        )
    elif all(isinstance(evaluations[x], list) for x in evaluations.keys()):
        # All evaluations are map evaluations.
        data = MapData.from_map_evaluations(
            evaluations=cast(Dict[str, TMapTrialEvaluation], evaluations),
            trial_index=trial_index,
        )
    else:
        raise ValueError(  # pragma: no cover
            "Evaluations included a mixture of no-fidelity and with-fidelity "
            "evaluations, which is not currently supported."
        )
    return evaluations, data
Exemple #16
0
    def testFromMultipleData(self):
        data = [
            MapData(
                df=pd.DataFrame(
                    [
                        {
                            "arm_name": "0_1",
                            "mean": 3.7,
                            "sem": 0.5,
                            "metric_name": "b",
                            "epoch": 0,
                        },
                    ]
                ),
                map_keys=["epoch"],
            ),
            MapData(
                df=pd.DataFrame(
                    [
                        {
                            "arm_name": "0_1",
                            "mean": 3.7,
                            "sem": 0.5,
                            "metric_name": "b",
                            "epoch": 0,
                        },
                        {
                            "arm_name": "0_1",
                            "mean": 3.7,
                            "sem": 0.5,
                            "metric_name": "b",
                            "epoch": 1,
                        },
                    ]
                ),
                map_keys=["epoch"],
            ),
        ]

        merged_data = MapData.from_multiple_data(data)
        self.assertIsInstance(merged_data, MapData)
        self.assertEqual(3, merged_data.df.shape[0])
Exemple #17
0
 def testCopyStructureWithDF(self):
     map_data = MapData(df=self.df, map_keys=self.map_keys)
     small_df = pd.DataFrame([
         {
             "arm_name": "0_1",
             "mean": 3.7,
             "sem": 0.5,
             "metric_name": "b",
             "epoch": 0,
         },
         {
             "arm_name": "0_1",
             "mean": 3.7,
             "sem": 0.5,
             "metric_name": "b",
             "epoch": 1,
         },
     ])
     new_map_data = map_data.copy_structure_with_df(df=small_df)
     self.assertEqual(new_map_data.map_keys, ["epoch"])
Exemple #18
0
    def test_upcast(self):
        fresh = MapData(df=self.df, map_key_infos=self.map_key_infos)
        self.assertIsNone(
            fresh._memo_df)  # Assert df is not cached before first call

        self.assertEqual(
            fresh.df.columns.size,
            fresh.map_df.columns.size - len(self.mmd.map_key_infos),
        )

        self.assertIsNotNone(
            fresh._memo_df)  # Assert df is cached after first call
Exemple #19
0
    def test_from_map_evaluations(self):
        map_data = MapData.from_map_evaluations(
            evaluations={
                "0_1": [
                    ({
                        "f1": 1.0,
                        "f2": 0.5
                    }, {
                        "b": (3.7, 0.5)
                    }),
                    ({
                        "f1": 1.0,
                        "f2": 0.75
                    }, {
                        "b": (3.8, 0.5)
                    }),
                ]
            },
            trial_index=0,
        )

        self.assertEqual(len(map_data.map_df), 2)
        self.assertEqual(set(map_data.map_keys), {"f1", "f2"})

        with self.assertRaisesRegex(
                ValueError, "Inconsistent map_key sets in evaluations"):
            MapData.from_map_evaluations(
                evaluations={
                    "0_1": [
                        ({
                            "f1": 1.0,
                            "f2": 0.5
                        }, {
                            "b": (3.7, 0.5)
                        }),
                    ]
                },
                map_key_infos=[MapKeyInfo(key="f1", default_value=0.0)],
                trial_index=0,
            )
Exemple #20
0
def _subsample_map_data(map_data: MapData,
                        keep_every_k_per_arm: int) -> MapData:
    """Helper function for keeping every kth row for each arm."""
    map_df = map_data.map_df
    # count the rows for each arm name and keep every n
    keep = map_df.groupby(["arm_name"]).cumcount()
    keep = (keep % keep_every_k_per_arm) == 0
    map_df_filtered = map_df[keep]
    return MapData(
        df=map_df_filtered,  # pyre-ignore[6]
        map_key_infos=map_data.map_key_infos,
        description=map_data.description,
    )
Exemple #21
0
    def testFetchTrialsData(self):
        exp = self._setupBraninExperiment(n=5)
        batch_0 = exp.trials[0]
        batch_1 = exp.trials[1]
        batch_0.mark_completed()
        batch_1.mark_completed()
        batch_0_data = exp.fetch_trials_data(trial_indices=[0])
        self.assertEqual(set(batch_0_data.df["trial_index"].values), {0})
        self.assertEqual(set(batch_0_data.df["arm_name"].values),
                         {a.name
                          for a in batch_0.arms})
        batch_1_data = exp.fetch_trials_data(trial_indices=[1])
        self.assertEqual(set(batch_1_data.df["trial_index"].values), {1})
        self.assertEqual(set(batch_1_data.df["arm_name"].values),
                         {a.name
                          for a in batch_1.arms})
        self.assertEqual(
            exp.fetch_trials_data(trial_indices=[0, 1]),
            MapData.from_multiple_data([batch_0_data, batch_1_data]),
        )

        # Since NoisyFunctionMap metric has overwrite_existing_data = True,
        # we should only have one df per trial now
        self.assertEqual(len(exp.data_by_trial[0]), 1)

        with self.assertRaisesRegex(ValueError, ".* not associated .*"):
            exp.fetch_trials_data(trial_indices=[2])
        # Try to fetch data when there are only metrics and no attached data.
        exp.remove_tracking_metric(
            metric_name="b")  # Remove implemented metric.
        exp.add_tracking_metric(
            MapMetric(name="b"))  # Add unimplemented metric.
        self.assertEqual(len(exp.fetch_trials_data(trial_indices=[0]).map_df),
                         30)
        # Try fetching attached data.
        exp.attach_data(batch_0_data)
        exp.attach_data(batch_1_data)
        self.assertEqual(exp.fetch_trials_data(trial_indices=[0]),
                         batch_0_data)
        self.assertEqual(exp.fetch_trials_data(trial_indices=[1]),
                         batch_1_data)
        self.assertEqual(set(batch_0_data.df["trial_index"].values), {0})
        self.assertEqual(set(batch_0_data.df["arm_name"].values),
                         {a.name
                          for a in batch_0.arms})
Exemple #22
0
    def fetch_trial_data(
        self, trial: BaseTrial, noisy: bool = True, **kwargs: Any
    ) -> MapData:
        # This timestamp parameter will be incremented each time f is called to
        # simulate a true timestamp.
        self._timestamp = -1

        s = super()  # Must assign super() to capture outer scope inside comprehension
        rows = [
            s.fetch_trial_data(
                trial=trial,
                noisy=noisy,
                **kwargs,
            )
            for _ in range(3)
        ]

        return MapData.from_multiple_map_data(rows)
Exemple #23
0
 def testFetchTrialsData(self):
     exp = self._setupBraninExperiment(n=5)
     batch_0 = exp.trials[0]
     batch_1 = exp.trials[1]
     batch_0.mark_completed()
     batch_1.mark_completed()
     batch_0_data = exp.fetch_trials_data(trial_indices=[0])
     self.assertEqual(set(batch_0_data.df["trial_index"].values), {0})
     self.assertEqual(set(batch_0_data.df["arm_name"].values),
                      {a.name
                       for a in batch_0.arms})
     batch_1_data = exp.fetch_trials_data(trial_indices=[1])
     self.assertEqual(set(batch_1_data.df["trial_index"].values), {1})
     self.assertEqual(set(batch_1_data.df["arm_name"].values),
                      {a.name
                       for a in batch_1.arms})
     self.assertEqual(
         exp.fetch_trials_data(trial_indices=[0, 1]),
         MapData.from_multiple_data([batch_0_data, batch_1_data]),
     )
     with self.assertRaisesRegex(ValueError, ".* not associated .*"):
         exp.fetch_trials_data(trial_indices=[2])
     # Try to fetch data when there are only metrics and no attached data.
     exp.remove_tracking_metric(
         metric_name="b")  # Remove implemented metric.
     exp.add_tracking_metric(
         MapMetric(name="b"))  # Add unimplemented metric.
     self.assertTrue(exp.fetch_trials_data(trial_indices=[0]).df.empty)
     # Try fetching attached data.
     exp.attach_data(batch_0_data)
     exp.attach_data(batch_1_data)
     self.assertEqual(exp.fetch_trials_data(trial_indices=[0]),
                      batch_0_data)
     self.assertEqual(exp.fetch_trials_data(trial_indices=[1]),
                      batch_1_data)
     self.assertEqual(set(batch_0_data.df["trial_index"].values), {0})
     self.assertEqual(set(batch_0_data.df["arm_name"].values),
                      {a.name
                       for a in batch_0.arms})
Exemple #24
0
    def fetch_trial_data(self,
                         trial: BaseTrial,
                         noisy: bool = True,
                         **kwargs: Any) -> MapData:
        res = [
            self.f(np.fromiter(arm.parameters.values(), dtype=float))
            for arm in trial.arms
        ]

        df = pd.DataFrame({
            "arm_name": [arm.name for arm in trial.arms],
            "metric_name": self.name,
            "sem": self.noise_sd if noisy else 0.0,
            "trial_index": trial.index,
            "mean": [item["mean"] for item in res],
            **{
                mki.key: [item[mki.key] for item in res]
                for mki in self.map_key_infos
            },
        })

        return MapData(df=df, map_key_infos=self.map_key_infos)
Exemple #25
0
 def testFromMultipleDataValidation(self):
     # Non-MapData raises an error
     with self.assertRaisesRegex(ValueError, "Non-MapData in inputs."):
         data_elt_A = Data(df=pd.DataFrame([{
             "arm_name": "0_1",
             "mean": 3.7,
             "sem": 0.5,
             "metric_name": "b",
         }]), )
         data_elt_B = Data(df=pd.DataFrame([{
             "arm_name": "0_1",
             "mean": 3.7,
             "sem": 0.5,
             "metric_name": "b",
         }]), )
         MapData.from_multiple_data([data_elt_A, data_elt_B])
     # Inconsistent keys raise an error
     with self.assertRaisesRegex(
             ValueError, "Inconsistent map_keys found in data iterable."):
         data_elt_A = MapData(
             df=pd.DataFrame([{
                 "arm_name": "0_1",
                 "mean": 3.7,
                 "sem": 0.5,
                 "metric_name": "b",
                 "epoch": 0,
             }]),
             map_keys=["epoch"],
         )
         data_elt_B = MapData(
             df=pd.DataFrame([{
                 "arm_name": "0_1",
                 "mean": 3.7,
                 "sem": 0.5,
                 "metric_name": "b",
                 "iteration": 1,
             }]),
             map_keys=["iteration"],
         )
         MapData.from_multiple_data([data_elt_A, data_elt_B])
Exemple #26
0
    def testObservationsFromMapData(self):
        truth = {
            0.5: {
                "arm_name": "0_0",
                "parameters": {"x": 0, "y": "a", "z": 1},
                "mean": 2.0,
                "sem": 2.0,
                "trial_index": 1,
                "metric_name": "a",
                "updated_parameters": {"x": 0, "y": "a", "z": 0.5},
                "mean_t": np.array([2.0]),
                "covariance_t": np.array([[4.0]]),
                "z": 0.5,
                "timestamp": 50,
            },
            0.25: {
                "arm_name": "0_1",
                "parameters": {"x": 1, "y": "b", "z": 0.5},
                "mean": 3.0,
                "sem": 3.0,
                "trial_index": 2,
                "metric_name": "a",
                "updated_parameters": {"x": 1, "y": "b", "z": 0.25},
                "mean_t": np.array([3.0]),
                "covariance_t": np.array([[9.0]]),
                "z": 0.25,
                "timestamp": 25,
            },
            1: {
                "arm_name": "0_0",
                "parameters": {"x": 0, "y": "a", "z": 1},
                "mean": 4.0,
                "sem": 4.0,
                "trial_index": 1,
                "metric_name": "b",
                "updated_parameters": {"x": 0, "y": "a", "z": 1},
                "mean_t": np.array([4.0]),
                "covariance_t": np.array([[16.0]]),
                "z": 1,
                "timestamp": 100,
            },
        }
        arms = {
            obs["arm_name"]: Arm(name=obs["arm_name"], parameters=obs["parameters"])
            for _, obs in truth.items()
        }
        experiment = Mock()
        experiment._trial_indices_by_status = {status: set() for status in TrialStatus}
        trials = {
            obs["trial_index"]: Trial(
                experiment, GeneratorRun(arms=[arms[obs["arm_name"]]])
            )
            for _, obs in truth.items()
        }
        type(experiment).arms_by_name = PropertyMock(return_value=arms)
        type(experiment).trials = PropertyMock(return_value=trials)

        df = pd.DataFrame(list(truth.values()))[
            ["arm_name", "trial_index", "mean", "sem", "metric_name", "z", "timestamp"]
        ]
        data = MapData(
            df=df,
            map_key_infos=[
                MapKeyInfo(key="z", default_value=0.0),
                MapKeyInfo(key="timestamp", default_value=0.0),
            ],
        )
        observations = observations_from_map_data(experiment, data)

        self.assertEqual(len(observations), 3)

        for obs in observations:
            t = truth[obs.features.parameters["z"]]
            self.assertEqual(obs.features.parameters, t["updated_parameters"])
            self.assertEqual(obs.features.trial_index, t["trial_index"])
            self.assertEqual(obs.data.metric_names, [t["metric_name"]])
            self.assertTrue(np.array_equal(obs.data.means, t["mean_t"]))
            self.assertTrue(np.array_equal(obs.data.covariance, t["covariance_t"]))
            self.assertEqual(obs.arm_name, t["arm_name"])
            self.assertEqual(obs.features.metadata, {"timestamp": t["timestamp"]})
Exemple #27
0
    def test_combine(self):
        mmd_double = MapData.from_multiple_map_data([self.mmd, self.mmd])
        self.assertEqual(mmd_double.map_df.size, 2 * self.mmd.map_df.size)
        self.assertEqual(mmd_double.map_key_infos, self.mmd.map_key_infos)

        different_map_df = pd.DataFrame([
            {
                "arm_name": "0_3",
                "timestamp": 11,
                "mean": 2.0,
                "sem": 0.2,
                "trial_index": 1,
                "metric_name": "a",
            },
            {
                "arm_name": "0_3",
                "timestamp": 18,
                "mean": 1.8,
                "sem": 0.3,
                "trial_index": 1,
                "metric_name": "b",
            },
        ])
        different_map_key_infos = [
            MapKeyInfo(key="timestamp", default_value=0.0)
        ]
        different_mmd = MapData(df=different_map_df,
                                map_key_infos=different_map_key_infos)

        combined = MapData.from_multiple_map_data([self.mmd, different_mmd])
        self.assertEqual(len(combined.map_df),
                         len(self.mmd.map_df) + len(different_mmd.map_df))
        self.assertEqual(combined.map_df.columns.size,
                         self.mmd.map_df.columns.size + 1)
        self.assertEqual(combined.map_key_infos,
                         self.map_key_infos + different_map_key_infos)

        combined_subset = MapData.from_multiple_map_data(
            [self.mmd, different_mmd], ["a"])
        self.assertTrue((combined_subset.map_df["metric_name"] == "a").all())

        data_df = pd.DataFrame([
            {
                "arm_name": "0_4",
                "mean": 2.0,
                "sem": 0.2,
                "trial_index": 1,
                "metric_name": "a",
            },
            {
                "arm_name": "0_4",
                "mean": 1.8,
                "sem": 0.3,
                "trial_index": 1,
                "metric_name": "b",
            },
        ])
        data = Data(df=data_df)

        downcast_combined = MapData.from_multiple_data([self.mmd, data])
        self.assertEqual(len(downcast_combined.map_df),
                         len(self.mmd.map_df) + len(data.df))
        self.assertEqual(downcast_combined.map_df.columns.size,
                         self.mmd.map_df.columns.size)
        self.assertEqual(downcast_combined.map_key_infos, self.map_key_infos)

        # Check that the Data's rows' epoch cell has the correct default value
        self.assertTrue((downcast_combined.map_df[
            downcast_combined.map_df["arm_name"] == "0_4"]["epoch"] ==
                         self.mmd.map_key_infos[0].default_value).all())