def testFromMultipleDataValidation(self): # Non-MapData raises an error with self.assertRaisesRegex(ValueError, "Non-MapData in inputs."): data_elt_A = Data(df=pd.DataFrame([{ "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", }]), ) data_elt_B = Data(df=pd.DataFrame([{ "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", }]), ) MapData.from_multiple_data([data_elt_A, data_elt_B]) # Inconsistent keys raise an error with self.assertRaisesRegex( ValueError, "Inconsistent map_keys found in data iterable."): data_elt_A = MapData( df=pd.DataFrame([{ "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 0, }]), map_keys=["epoch"], ) data_elt_B = MapData( df=pd.DataFrame([{ "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "iteration": 1, }]), map_keys=["iteration"], ) MapData.from_multiple_data([data_elt_A, data_elt_B])
def testFetchTrialsData(self): exp = self._setupBraninExperiment(n=5) batch_0 = exp.trials[0] batch_1 = exp.trials[1] batch_0.mark_completed() batch_1.mark_completed() batch_0_data = exp.fetch_trials_data(trial_indices=[0]) self.assertEqual(set(batch_0_data.df["trial_index"].values), {0}) self.assertEqual(set(batch_0_data.df["arm_name"].values), {a.name for a in batch_0.arms}) batch_1_data = exp.fetch_trials_data(trial_indices=[1]) self.assertEqual(set(batch_1_data.df["trial_index"].values), {1}) self.assertEqual(set(batch_1_data.df["arm_name"].values), {a.name for a in batch_1.arms}) self.assertEqual( exp.fetch_trials_data(trial_indices=[0, 1]), MapData.from_multiple_data([batch_0_data, batch_1_data]), ) # Since NoisyFunctionMap metric has overwrite_existing_data = True, # we should only have one df per trial now self.assertEqual(len(exp.data_by_trial[0]), 1) with self.assertRaisesRegex(ValueError, ".* not associated .*"): exp.fetch_trials_data(trial_indices=[2]) # Try to fetch data when there are only metrics and no attached data. exp.remove_tracking_metric( metric_name="b") # Remove implemented metric. exp.add_tracking_metric( MapMetric(name="b")) # Add unimplemented metric. self.assertEqual(len(exp.fetch_trials_data(trial_indices=[0]).map_df), 30) # Try fetching attached data. exp.attach_data(batch_0_data) exp.attach_data(batch_1_data) self.assertEqual(exp.fetch_trials_data(trial_indices=[0]), batch_0_data) self.assertEqual(exp.fetch_trials_data(trial_indices=[1]), batch_1_data) self.assertEqual(set(batch_0_data.df["trial_index"].values), {0}) self.assertEqual(set(batch_0_data.df["arm_name"].values), {a.name for a in batch_0.arms})
def testFromMultipleData(self): data = [ MapData( df=pd.DataFrame( [ { "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 0, }, ] ), map_keys=["epoch"], ), MapData( df=pd.DataFrame( [ { "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 0, }, { "arm_name": "0_1", "mean": 3.7, "sem": 0.5, "metric_name": "b", "epoch": 1, }, ] ), map_keys=["epoch"], ), ] merged_data = MapData.from_multiple_data(data) self.assertIsInstance(merged_data, MapData) self.assertEqual(3, merged_data.df.shape[0])
def testFetchTrialsData(self): exp = self._setupBraninExperiment(n=5) batch_0 = exp.trials[0] batch_1 = exp.trials[1] batch_0.mark_completed() batch_1.mark_completed() batch_0_data = exp.fetch_trials_data(trial_indices=[0]) self.assertEqual(set(batch_0_data.df["trial_index"].values), {0}) self.assertEqual(set(batch_0_data.df["arm_name"].values), {a.name for a in batch_0.arms}) batch_1_data = exp.fetch_trials_data(trial_indices=[1]) self.assertEqual(set(batch_1_data.df["trial_index"].values), {1}) self.assertEqual(set(batch_1_data.df["arm_name"].values), {a.name for a in batch_1.arms}) self.assertEqual( exp.fetch_trials_data(trial_indices=[0, 1]), MapData.from_multiple_data([batch_0_data, batch_1_data]), ) with self.assertRaisesRegex(ValueError, ".* not associated .*"): exp.fetch_trials_data(trial_indices=[2]) # Try to fetch data when there are only metrics and no attached data. exp.remove_tracking_metric( metric_name="b") # Remove implemented metric. exp.add_tracking_metric( MapMetric(name="b")) # Add unimplemented metric. self.assertTrue(exp.fetch_trials_data(trial_indices=[0]).df.empty) # Try fetching attached data. exp.attach_data(batch_0_data) exp.attach_data(batch_1_data) self.assertEqual(exp.fetch_trials_data(trial_indices=[0]), batch_0_data) self.assertEqual(exp.fetch_trials_data(trial_indices=[1]), batch_1_data) self.assertEqual(set(batch_0_data.df["trial_index"].values), {0}) self.assertEqual(set(batch_0_data.df["arm_name"].values), {a.name for a in batch_0.arms})
def test_combine(self): mmd_double = MapData.from_multiple_map_data([self.mmd, self.mmd]) self.assertEqual(mmd_double.map_df.size, 2 * self.mmd.map_df.size) self.assertEqual(mmd_double.map_key_infos, self.mmd.map_key_infos) different_map_df = pd.DataFrame([ { "arm_name": "0_3", "timestamp": 11, "mean": 2.0, "sem": 0.2, "trial_index": 1, "metric_name": "a", }, { "arm_name": "0_3", "timestamp": 18, "mean": 1.8, "sem": 0.3, "trial_index": 1, "metric_name": "b", }, ]) different_map_key_infos = [ MapKeyInfo(key="timestamp", default_value=0.0) ] different_mmd = MapData(df=different_map_df, map_key_infos=different_map_key_infos) combined = MapData.from_multiple_map_data([self.mmd, different_mmd]) self.assertEqual(len(combined.map_df), len(self.mmd.map_df) + len(different_mmd.map_df)) self.assertEqual(combined.map_df.columns.size, self.mmd.map_df.columns.size + 1) self.assertEqual(combined.map_key_infos, self.map_key_infos + different_map_key_infos) combined_subset = MapData.from_multiple_map_data( [self.mmd, different_mmd], ["a"]) self.assertTrue((combined_subset.map_df["metric_name"] == "a").all()) data_df = pd.DataFrame([ { "arm_name": "0_4", "mean": 2.0, "sem": 0.2, "trial_index": 1, "metric_name": "a", }, { "arm_name": "0_4", "mean": 1.8, "sem": 0.3, "trial_index": 1, "metric_name": "b", }, ]) data = Data(df=data_df) downcast_combined = MapData.from_multiple_data([self.mmd, data]) self.assertEqual(len(downcast_combined.map_df), len(self.mmd.map_df) + len(data.df)) self.assertEqual(downcast_combined.map_df.columns.size, self.mmd.map_df.columns.size) self.assertEqual(downcast_combined.map_key_infos, self.map_key_infos) # Check that the Data's rows' epoch cell has the correct default value self.assertTrue((downcast_combined.map_df[ downcast_combined.map_df["arm_name"] == "0_4"]["epoch"] == self.mmd.map_key_infos[0].default_value).all())