def testUpdateValidation(self): base_data = MapData( df=pd.DataFrame( [ { "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 0, } ] ), map_keys=["epoch"], ) new_data_wrong_map_keys = MapData( df=pd.DataFrame( [ { "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "iteration": 0, } ] ), map_keys=["iteration"], ) with self.assertRaisesRegex( ValueError, "Inconsistent map_keys found in new data." ): base_data.update(new_data=new_data_wrong_map_keys)
def fetch_trial_data(self, trial: BaseTrial, noisy: bool = True, **kwargs: Any) -> MapData: if self._trial_index_to_timestamp[ trial.index] == 0 or trial.status.is_running: self._trial_index_to_timestamp[trial.index] += 1 datas = [] for timestamp in range(self._trial_index_to_timestamp[trial.index]): res = [ self.f( np.fromiter(arm.parameters.values(), dtype=float), timestamp=timestamp, ) for arm in trial.arms ] df = pd.DataFrame({ "arm_name": [arm.name for arm in trial.arms], "metric_name": self.name, "sem": self.noise_sd if noisy else 0.0, "trial_index": trial.index, "mean": [item["mean"] for item in res], **{ mki.key: [item[mki.key] for item in res] for mki in self.map_key_infos }, }) datas.append(MapData(df=df, map_key_infos=self.map_key_infos)) return MapData.from_multiple_map_data(datas)
def test_init(self): empty = MapData() self.assertTrue(empty.map_df.empty) with self.assertRaisesRegex(ValueError, "map_key_infos may be `None` iff"): MapData(df=self.df, map_key_infos=None)
def testUpdate(self): base_data = MapData( df=pd.DataFrame([{ "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 0, }]), map_keys=["epoch"], ) new_data = MapData( df=pd.DataFrame([ { "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 0, }, { "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 1, }, ]), map_keys=["epoch"], ) base_data.update(new_data=new_data) self.assertEqual(3, base_data.df.shape[0])
def testDeduplicateData(self): map_data = MapData(df=self.df, map_keys=self.map_keys) dedup_data = map_data.deduplicate_data() df = dedup_data.df self.assertEqual(df.shape[0], 4) self.assertEqual( float(df[df["arm_name"] == "0_1"][df["metric_name"] == "a"]["mean"]), 0.5 )
def fetch_experiment_data_multi( cls, experiment: Experiment, metrics: Iterable[Metric], trials: Optional[Iterable[BaseTrial]] = None, **kwargs: Any, ) -> AbstractDataFrameData: """Fetch multiple metrics data for an experiment.""" if trials is None: trials = list(experiment.trials.values()) if any(not isinstance(trial, Trial) for trial in trials): raise RuntimeError( f"Only (non-batch) Trials are supported by {cls.__name__}") ids = cls.get_ids_from_trials(trials=trials) trials_filtered, ids_filtered = [], [] for trial, id_ in zip(trials, ids): if id_ is None: logger.info( f"Could not get id for Trial {trial.index}. Ignoring.") else: trials_filtered.append(trial) ids_filtered.append(id_) if len(ids_filtered) == 0: logger.info("Could not get ids from trials. Returning empty data.") return MapData(map_keys=[cls.MAP_KEY]) all_curve_series = cls.get_curves_from_ids(ids=ids) # pyre-ignore [6] if all(id_ not in all_curve_series for id_ in ids_filtered): logger.info("Could not get curves from ids. Returning empty data.") return MapData(map_keys=[cls.MAP_KEY]) for id_, curve_series in all_curve_series.items(): for m in metrics: if m.curve_name not in curve_series: # pyre-ignore [16] logger.info( f"{m.curve_name} not (yet) present in curves from {id_}. " "Returning data without this metric.") dfs = [] for trial, id_ in zip(trials_filtered, ids_filtered): if id_ not in all_curve_series: logger.info( f"Could not get curve data for id {id_}. Ignoring.") continue curve_series = all_curve_series[id_] for m in metrics: cs = curve_series[m.curve_name].rename( "mean") # pyre-ignore [6] dfi = cs.reset_index().rename( # pyre-ignore [16] columns={"index": cls.MAP_KEY}) dfi["trial_index"] = trial.index dfi["arm_name"] = trial.arm.name dfi["metric_name"] = m.name dfi["sem"] = float("nan") dfs.append(dfi.drop_duplicates()) df = pd.concat(dfs, axis=0, ignore_index=True) return MapData(df, map_keys=[cls.MAP_KEY])
def testBadMapData(self): df = pd.DataFrame([{"bad_field": "0_0", "bad_field_2": {"x": 0, "y": "a"}}]) with self.assertRaisesRegex( ValueError, "map_keys may only be `None` when `df` is also None " ): MapData(df=df) with self.assertRaisesRegex( ValueError, "Dataframe must contain required columns" ): MapData(map_keys=["bad_field"], df=df)
def testFetchDataWithMapData(self): evaluations = { "0_0": [ ({ "epoch": 1 }, { "no_fetch_impl_metric": (3.7, 0.5) }), ({ "epoch": 2 }, { "no_fetch_impl_metric": (3.8, 0.5) }), ({ "epoch": 3 }, { "no_fetch_impl_metric": (3.9, 0.5) }), ({ "epoch": 4 }, { "no_fetch_impl_metric": (4.0, 0.5) }), ], } self.experiment.add_tracking_metric(metric=MapMetric( name="no_fetch_impl_metric")) self.experiment.new_trial() self.experiment.trials[0].mark_running(no_runner_required=True) first_epoch = MapData.from_map_evaluations( evaluations={ arm_name: partial_results[0:1] for arm_name, partial_results in evaluations.items() }, trial_index=0, ) self.experiment.attach_data(first_epoch) remaining_epochs = MapData.from_map_evaluations( evaluations={ arm_name: partial_results[1:4] for arm_name, partial_results in evaluations.items() }, trial_index=0, ) self.experiment.attach_data(remaining_epochs) self.experiment.trials[0].mark_completed() expected_data = remaining_epochs actual_data = self.experiment.lookup_data( keep_latest_map_values_only=False) self.assertEqual(expected_data, actual_data)
def testFetchDataWithMapData(self): evaluations = { "0_0": [ ({ "epoch": 1 }, { "tracking": (3.7, 0.5) }), ({ "epoch": 2 }, { "tracking": (3.8, 0.5) }), ({ "epoch": 3 }, { "tracking": (3.9, 0.5) }), ({ "epoch": 4 }, { "tracking": (4.0, 0.5) }), ], } self.experiment.new_trial() self.experiment.trials[0].mark_running(no_runner_required=True) first_epoch = MapData.from_map_evaluations( evaluations={ arm_name: partial_results[0:1] for arm_name, partial_results in evaluations.items() }, trial_index=0, ) self.experiment.attach_data(first_epoch) remaining_epochs = MapData.from_map_evaluations( evaluations={ arm_name: partial_results[1:4] for arm_name, partial_results in evaluations.items() }, trial_index=0, ) self.experiment.attach_data(remaining_epochs) self.experiment.trials[0].mark_completed() expected_data = MapData.from_map_evaluations(evaluations=evaluations, trial_index=0) actual_data = self.experiment.fetch_data() self.assertEqual(expected_data, actual_data)
def testMapData(self): self.assertEqual(MapData(), MapData()) map_data = MapData(df=self.df, map_keys=self.map_keys) self.assertEqual(map_data, map_data) self.assertEqual(map_data.df_hash, self.df_hash) df = map_data.df self.assertEqual( float( df[df["arm_name"] == "0_0"][df["metric_name"] == "a"]["mean"]), 2.0) self.assertEqual( float(df[df["arm_name"] == "0_1"][df["metric_name"] == "b"][ df["epoch"] == 0]["sem"]), 0.5, )
def get_map_data(trial_index: int = 0) -> MapData: evaluations = { "status_quo": [ ({"epoch": 1}, {"ax_test_metric": (1.0, 0.5)}), ({"epoch": 2}, {"ax_test_metric": (2.0, 0.5)}), ({"epoch": 3}, {"ax_test_metric": (3.0, 0.5)}), ({"epoch": 4}, {"ax_test_metric": (4.0, 0.5)}), ], "0_0": [ ({"epoch": 1}, {"ax_test_metric": (3.7, 0.5)}), ({"epoch": 2}, {"ax_test_metric": (3.8, 0.5)}), ({"epoch": 3}, {"ax_test_metric": (3.9, 0.5)}), ({"epoch": 4}, {"ax_test_metric": (4.0, 0.5)}), ], "0_1": [ ({"epoch": 1}, {"ax_test_metric": (3.0, 0.5)}), ({"epoch": 2}, {"ax_test_metric": (5.0, 0.5)}), ({"epoch": 3}, {"ax_test_metric": (6.0, 0.5)}), ({"epoch": 4}, {"ax_test_metric": (1.0, 0.5)}), ], } return MapData.from_map_evaluations( evaluations=evaluations, # pyre-ignore [6]: Spurious param type mismatch. trial_index=trial_index, map_keys=["epoch"], )
def fetch_trial_data(self, trial: BaseTrial, noisy: bool = True, **kwargs: Any) -> MapData: noise_sd = self.noise_sd if noisy else 0.0 arm_names = [] mean = [] # assume kwargs = {map_keys: [...], key=list(values) for key in map_keys} map_keys = kwargs.get("map_keys", []) map_keys_values = defaultdict(list) for name, arm in trial.arms_by_name.items(): map_keys_dict_of_lists = { k: v for k, v in kwargs.items() if k in map_keys } map_keys_df = pd.DataFrame.from_dict(map_keys_dict_of_lists, orient="index").transpose() for _, row in map_keys_df.iterrows(): x = self._merge_parameters_and_map_keys( parameters=arm.parameters, map_key_series=row) # TODO(jej): Use hierarchical DF here for easier syntax? arm_names.append(name) mean.append(self.f(x) + np.random.randn() * noise_sd) for map_key, values in map_keys_dict_of_lists.items(): map_keys_values[map_key].extend(values) df = pd.DataFrame({ "arm_name": arm_names, "metric_name": self.name, "mean": mean, "sem": noise_sd, "trial_index": trial.index, **map_keys_values, }) return MapData(df=df, map_keys=map_keys)
def setUp(self): self.df = pd.DataFrame([ { "arm_name": "0_0", "epoch": 0, "mean": 2.0, "sem": 0.2, "trial_index": 1, "metric_name": "a", }, { "arm_name": "0_0", "epoch": 0, "mean": 1.8, "sem": 0.3, "trial_index": 1, "metric_name": "b", }, { "arm_name": "0_1", "epoch": 0, "mean": 4.0, "sem": 0.6, "trial_index": 1, "metric_name": "a", }, { "arm_name": "0_1", "epoch": 0, "mean": 3.7, "sem": 0.5, "trial_index": 1, "metric_name": "b", }, { "arm_name": "0_1", "epoch": 1, "mean": 0.5, "sem": None, "trial_index": 1, "metric_name": "a", }, { "arm_name": "0_1", "epoch": 1, "mean": 3.0, "sem": None, "trial_index": 1, "metric_name": "b", }, ]) self.map_key_infos = [MapKeyInfo( key="epoch", default_value=0, )] self.mmd = MapData(df=self.df, map_key_infos=self.map_key_infos)
def testFromMapEvaluations(self): map_data = MapData.from_map_evaluations( evaluations={ "0_1": [ ({ "f1": 1.0, "f2": 0.5 }, { "b": (3.7, 0.5) }), ({ "f1": 1.0, "f2": 0.75 }, { "b": (3.8, 0.5) }), ] }, trial_index=0, ) self.assertEqual(len(map_data.df), 2) self.assertEqual(map_data.map_keys, ["f1", "f2"]) with self.assertRaises(ValueError): MapData.from_map_evaluations( evaluations={ "0_1": [ ({ "f1": 1.0, "f2": 0.5 }, { "b": (3.7, 0.5) }), ({ "epoch": 1.0, "mc_samples": 0.75 }, { "b": (3.8, 0.5) }), ] }, trial_index=0, )
def data_and_evaluations_from_raw_data( raw_data: Dict[str, TEvaluationOutcome], metric_names: List[str], trial_index: int, sample_sizes: Dict[str, int], start_time: Optional[int] = None, end_time: Optional[int] = None, ) -> Tuple[Dict[str, TEvaluationOutcome], AbstractDataFrameData]: """Transforms evaluations into Ax Data. Each evaluation is either a trial evaluation: {metric_name -> (mean, SEM)} or a fidelity trial evaluation for multi-fidelity optimizations: [(fidelities, {metric_name -> (mean, SEM)})]. Args: raw_data: Mapping from arm name to raw_data. metric_names: Names of metrics used to transform raw data to evaluations. trial_index: Index of the trial, for which the evaluations are. sample_sizes: Number of samples collected for each arm, may be empty if unavailable. start_time: Optional start time of run of the trial that produced this data, in milliseconds. end_time: Optional end time of run of the trial that produced this data, in milliseconds. """ evaluations = { arm_name: raw_data_to_evaluation( raw_data=raw_data[arm_name], metric_names=metric_names, start_time=start_time, end_time=end_time, ) for arm_name in raw_data } if all(isinstance(evaluations[x], dict) for x in evaluations.keys()): # All evaluations are no-fidelity evaluations. data = Data.from_evaluations( evaluations=cast(Dict[str, TTrialEvaluation], evaluations), trial_index=trial_index, sample_sizes=sample_sizes, start_time=start_time, end_time=end_time, ) elif all(isinstance(evaluations[x], list) for x in evaluations.keys()): # All evaluations are map evaluations. data = MapData.from_map_evaluations( evaluations=cast(Dict[str, TMapTrialEvaluation], evaluations), trial_index=trial_index, ) else: raise ValueError( # pragma: no cover "Evaluations included a mixture of no-fidelity and with-fidelity " "evaluations, which is not currently supported." ) return evaluations, data
def testFromMultipleData(self): data = [ MapData( df=pd.DataFrame( [ { "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 0, }, ] ), map_keys=["epoch"], ), MapData( df=pd.DataFrame( [ { "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 0, }, { "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 1, }, ] ), map_keys=["epoch"], ), ] merged_data = MapData.from_multiple_data(data) self.assertIsInstance(merged_data, MapData) self.assertEqual(3, merged_data.df.shape[0])
def testCopyStructureWithDF(self): map_data = MapData(df=self.df, map_keys=self.map_keys) small_df = pd.DataFrame([ { "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 0, }, { "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 1, }, ]) new_map_data = map_data.copy_structure_with_df(df=small_df) self.assertEqual(new_map_data.map_keys, ["epoch"])
def test_upcast(self): fresh = MapData(df=self.df, map_key_infos=self.map_key_infos) self.assertIsNone( fresh._memo_df) # Assert df is not cached before first call self.assertEqual( fresh.df.columns.size, fresh.map_df.columns.size - len(self.mmd.map_key_infos), ) self.assertIsNotNone( fresh._memo_df) # Assert df is cached after first call
def test_from_map_evaluations(self): map_data = MapData.from_map_evaluations( evaluations={ "0_1": [ ({ "f1": 1.0, "f2": 0.5 }, { "b": (3.7, 0.5) }), ({ "f1": 1.0, "f2": 0.75 }, { "b": (3.8, 0.5) }), ] }, trial_index=0, ) self.assertEqual(len(map_data.map_df), 2) self.assertEqual(set(map_data.map_keys), {"f1", "f2"}) with self.assertRaisesRegex( ValueError, "Inconsistent map_key sets in evaluations"): MapData.from_map_evaluations( evaluations={ "0_1": [ ({ "f1": 1.0, "f2": 0.5 }, { "b": (3.7, 0.5) }), ] }, map_key_infos=[MapKeyInfo(key="f1", default_value=0.0)], trial_index=0, )
def _subsample_map_data(map_data: MapData, keep_every_k_per_arm: int) -> MapData: """Helper function for keeping every kth row for each arm.""" map_df = map_data.map_df # count the rows for each arm name and keep every n keep = map_df.groupby(["arm_name"]).cumcount() keep = (keep % keep_every_k_per_arm) == 0 map_df_filtered = map_df[keep] return MapData( df=map_df_filtered, # pyre-ignore[6] map_key_infos=map_data.map_key_infos, description=map_data.description, )
def testFetchTrialsData(self): exp = self._setupBraninExperiment(n=5) batch_0 = exp.trials[0] batch_1 = exp.trials[1] batch_0.mark_completed() batch_1.mark_completed() batch_0_data = exp.fetch_trials_data(trial_indices=[0]) self.assertEqual(set(batch_0_data.df["trial_index"].values), {0}) self.assertEqual(set(batch_0_data.df["arm_name"].values), {a.name for a in batch_0.arms}) batch_1_data = exp.fetch_trials_data(trial_indices=[1]) self.assertEqual(set(batch_1_data.df["trial_index"].values), {1}) self.assertEqual(set(batch_1_data.df["arm_name"].values), {a.name for a in batch_1.arms}) self.assertEqual( exp.fetch_trials_data(trial_indices=[0, 1]), MapData.from_multiple_data([batch_0_data, batch_1_data]), ) # Since NoisyFunctionMap metric has overwrite_existing_data = True, # we should only have one df per trial now self.assertEqual(len(exp.data_by_trial[0]), 1) with self.assertRaisesRegex(ValueError, ".* not associated .*"): exp.fetch_trials_data(trial_indices=[2]) # Try to fetch data when there are only metrics and no attached data. exp.remove_tracking_metric( metric_name="b") # Remove implemented metric. exp.add_tracking_metric( MapMetric(name="b")) # Add unimplemented metric. self.assertEqual(len(exp.fetch_trials_data(trial_indices=[0]).map_df), 30) # Try fetching attached data. exp.attach_data(batch_0_data) exp.attach_data(batch_1_data) self.assertEqual(exp.fetch_trials_data(trial_indices=[0]), batch_0_data) self.assertEqual(exp.fetch_trials_data(trial_indices=[1]), batch_1_data) self.assertEqual(set(batch_0_data.df["trial_index"].values), {0}) self.assertEqual(set(batch_0_data.df["arm_name"].values), {a.name for a in batch_0.arms})
def fetch_trial_data( self, trial: BaseTrial, noisy: bool = True, **kwargs: Any ) -> MapData: # This timestamp parameter will be incremented each time f is called to # simulate a true timestamp. self._timestamp = -1 s = super() # Must assign super() to capture outer scope inside comprehension rows = [ s.fetch_trial_data( trial=trial, noisy=noisy, **kwargs, ) for _ in range(3) ] return MapData.from_multiple_map_data(rows)
def testFetchTrialsData(self): exp = self._setupBraninExperiment(n=5) batch_0 = exp.trials[0] batch_1 = exp.trials[1] batch_0.mark_completed() batch_1.mark_completed() batch_0_data = exp.fetch_trials_data(trial_indices=[0]) self.assertEqual(set(batch_0_data.df["trial_index"].values), {0}) self.assertEqual(set(batch_0_data.df["arm_name"].values), {a.name for a in batch_0.arms}) batch_1_data = exp.fetch_trials_data(trial_indices=[1]) self.assertEqual(set(batch_1_data.df["trial_index"].values), {1}) self.assertEqual(set(batch_1_data.df["arm_name"].values), {a.name for a in batch_1.arms}) self.assertEqual( exp.fetch_trials_data(trial_indices=[0, 1]), MapData.from_multiple_data([batch_0_data, batch_1_data]), ) with self.assertRaisesRegex(ValueError, ".* not associated .*"): exp.fetch_trials_data(trial_indices=[2]) # Try to fetch data when there are only metrics and no attached data. exp.remove_tracking_metric( metric_name="b") # Remove implemented metric. exp.add_tracking_metric( MapMetric(name="b")) # Add unimplemented metric. self.assertTrue(exp.fetch_trials_data(trial_indices=[0]).df.empty) # Try fetching attached data. exp.attach_data(batch_0_data) exp.attach_data(batch_1_data) self.assertEqual(exp.fetch_trials_data(trial_indices=[0]), batch_0_data) self.assertEqual(exp.fetch_trials_data(trial_indices=[1]), batch_1_data) self.assertEqual(set(batch_0_data.df["trial_index"].values), {0}) self.assertEqual(set(batch_0_data.df["arm_name"].values), {a.name for a in batch_0.arms})
def fetch_trial_data(self, trial: BaseTrial, noisy: bool = True, **kwargs: Any) -> MapData: res = [ self.f(np.fromiter(arm.parameters.values(), dtype=float)) for arm in trial.arms ] df = pd.DataFrame({ "arm_name": [arm.name for arm in trial.arms], "metric_name": self.name, "sem": self.noise_sd if noisy else 0.0, "trial_index": trial.index, "mean": [item["mean"] for item in res], **{ mki.key: [item[mki.key] for item in res] for mki in self.map_key_infos }, }) return MapData(df=df, map_key_infos=self.map_key_infos)
def testFromMultipleDataValidation(self): # Non-MapData raises an error with self.assertRaisesRegex(ValueError, "Non-MapData in inputs."): data_elt_A = Data(df=pd.DataFrame([{ "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", }]), ) data_elt_B = Data(df=pd.DataFrame([{ "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", }]), ) MapData.from_multiple_data([data_elt_A, data_elt_B]) # Inconsistent keys raise an error with self.assertRaisesRegex( ValueError, "Inconsistent map_keys found in data iterable."): data_elt_A = MapData( df=pd.DataFrame([{ "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 0, }]), map_keys=["epoch"], ) data_elt_B = MapData( df=pd.DataFrame([{ "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "iteration": 1, }]), map_keys=["iteration"], ) MapData.from_multiple_data([data_elt_A, data_elt_B])
def testObservationsFromMapData(self): truth = { 0.5: { "arm_name": "0_0", "parameters": {"x": 0, "y": "a", "z": 1}, "mean": 2.0, "sem": 2.0, "trial_index": 1, "metric_name": "a", "updated_parameters": {"x": 0, "y": "a", "z": 0.5}, "mean_t": np.array([2.0]), "covariance_t": np.array([[4.0]]), "z": 0.5, "timestamp": 50, }, 0.25: { "arm_name": "0_1", "parameters": {"x": 1, "y": "b", "z": 0.5}, "mean": 3.0, "sem": 3.0, "trial_index": 2, "metric_name": "a", "updated_parameters": {"x": 1, "y": "b", "z": 0.25}, "mean_t": np.array([3.0]), "covariance_t": np.array([[9.0]]), "z": 0.25, "timestamp": 25, }, 1: { "arm_name": "0_0", "parameters": {"x": 0, "y": "a", "z": 1}, "mean": 4.0, "sem": 4.0, "trial_index": 1, "metric_name": "b", "updated_parameters": {"x": 0, "y": "a", "z": 1}, "mean_t": np.array([4.0]), "covariance_t": np.array([[16.0]]), "z": 1, "timestamp": 100, }, } arms = { obs["arm_name"]: Arm(name=obs["arm_name"], parameters=obs["parameters"]) for _, obs in truth.items() } experiment = Mock() experiment._trial_indices_by_status = {status: set() for status in TrialStatus} trials = { obs["trial_index"]: Trial( experiment, GeneratorRun(arms=[arms[obs["arm_name"]]]) ) for _, obs in truth.items() } type(experiment).arms_by_name = PropertyMock(return_value=arms) type(experiment).trials = PropertyMock(return_value=trials) df = pd.DataFrame(list(truth.values()))[ ["arm_name", "trial_index", "mean", "sem", "metric_name", "z", "timestamp"] ] data = MapData( df=df, map_key_infos=[ MapKeyInfo(key="z", default_value=0.0), MapKeyInfo(key="timestamp", default_value=0.0), ], ) observations = observations_from_map_data(experiment, data) self.assertEqual(len(observations), 3) for obs in observations: t = truth[obs.features.parameters["z"]] self.assertEqual(obs.features.parameters, t["updated_parameters"]) self.assertEqual(obs.features.trial_index, t["trial_index"]) self.assertEqual(obs.data.metric_names, [t["metric_name"]]) self.assertTrue(np.array_equal(obs.data.means, t["mean_t"])) self.assertTrue(np.array_equal(obs.data.covariance, t["covariance_t"])) self.assertEqual(obs.arm_name, t["arm_name"]) self.assertEqual(obs.features.metadata, {"timestamp": t["timestamp"]})
def test_combine(self): mmd_double = MapData.from_multiple_map_data([self.mmd, self.mmd]) self.assertEqual(mmd_double.map_df.size, 2 * self.mmd.map_df.size) self.assertEqual(mmd_double.map_key_infos, self.mmd.map_key_infos) different_map_df = pd.DataFrame([ { "arm_name": "0_3", "timestamp": 11, "mean": 2.0, "sem": 0.2, "trial_index": 1, "metric_name": "a", }, { "arm_name": "0_3", "timestamp": 18, "mean": 1.8, "sem": 0.3, "trial_index": 1, "metric_name": "b", }, ]) different_map_key_infos = [ MapKeyInfo(key="timestamp", default_value=0.0) ] different_mmd = MapData(df=different_map_df, map_key_infos=different_map_key_infos) combined = MapData.from_multiple_map_data([self.mmd, different_mmd]) self.assertEqual(len(combined.map_df), len(self.mmd.map_df) + len(different_mmd.map_df)) self.assertEqual(combined.map_df.columns.size, self.mmd.map_df.columns.size + 1) self.assertEqual(combined.map_key_infos, self.map_key_infos + different_map_key_infos) combined_subset = MapData.from_multiple_map_data( [self.mmd, different_mmd], ["a"]) self.assertTrue((combined_subset.map_df["metric_name"] == "a").all()) data_df = pd.DataFrame([ { "arm_name": "0_4", "mean": 2.0, "sem": 0.2, "trial_index": 1, "metric_name": "a", }, { "arm_name": "0_4", "mean": 1.8, "sem": 0.3, "trial_index": 1, "metric_name": "b", }, ]) data = Data(df=data_df) downcast_combined = MapData.from_multiple_data([self.mmd, data]) self.assertEqual(len(downcast_combined.map_df), len(self.mmd.map_df) + len(data.df)) self.assertEqual(downcast_combined.map_df.columns.size, self.mmd.map_df.columns.size) self.assertEqual(downcast_combined.map_key_infos, self.map_key_infos) # Check that the Data's rows' epoch cell has the correct default value self.assertTrue((downcast_combined.map_df[ downcast_combined.map_df["arm_name"] == "0_4"]["epoch"] == self.mmd.map_key_infos[0].default_value).all())