Exemple #1
0
 def testFromMultipleDataValidation(self):
     # Non-MapData raises an error
     with self.assertRaisesRegex(ValueError, "Non-MapData in inputs."):
         data_elt_A = Data(df=pd.DataFrame([{
             "arm_name": "0_1",
             "mean": 3.7,
             "sem": 0.5,
             "metric_name": "b",
         }]), )
         data_elt_B = Data(df=pd.DataFrame([{
             "arm_name": "0_1",
             "mean": 3.7,
             "sem": 0.5,
             "metric_name": "b",
         }]), )
         MapData.from_multiple_data([data_elt_A, data_elt_B])
     # Inconsistent keys raise an error
     with self.assertRaisesRegex(
             ValueError, "Inconsistent map_keys found in data iterable."):
         data_elt_A = MapData(
             df=pd.DataFrame([{
                 "arm_name": "0_1",
                 "mean": 3.7,
                 "sem": 0.5,
                 "metric_name": "b",
                 "epoch": 0,
             }]),
             map_keys=["epoch"],
         )
         data_elt_B = MapData(
             df=pd.DataFrame([{
                 "arm_name": "0_1",
                 "mean": 3.7,
                 "sem": 0.5,
                 "metric_name": "b",
                 "iteration": 1,
             }]),
             map_keys=["iteration"],
         )
         MapData.from_multiple_data([data_elt_A, data_elt_B])
Exemple #2
0
    def testFetchTrialsData(self):
        exp = self._setupBraninExperiment(n=5)
        batch_0 = exp.trials[0]
        batch_1 = exp.trials[1]
        batch_0.mark_completed()
        batch_1.mark_completed()
        batch_0_data = exp.fetch_trials_data(trial_indices=[0])
        self.assertEqual(set(batch_0_data.df["trial_index"].values), {0})
        self.assertEqual(set(batch_0_data.df["arm_name"].values),
                         {a.name
                          for a in batch_0.arms})
        batch_1_data = exp.fetch_trials_data(trial_indices=[1])
        self.assertEqual(set(batch_1_data.df["trial_index"].values), {1})
        self.assertEqual(set(batch_1_data.df["arm_name"].values),
                         {a.name
                          for a in batch_1.arms})
        self.assertEqual(
            exp.fetch_trials_data(trial_indices=[0, 1]),
            MapData.from_multiple_data([batch_0_data, batch_1_data]),
        )

        # Since NoisyFunctionMap metric has overwrite_existing_data = True,
        # we should only have one df per trial now
        self.assertEqual(len(exp.data_by_trial[0]), 1)

        with self.assertRaisesRegex(ValueError, ".* not associated .*"):
            exp.fetch_trials_data(trial_indices=[2])
        # Try to fetch data when there are only metrics and no attached data.
        exp.remove_tracking_metric(
            metric_name="b")  # Remove implemented metric.
        exp.add_tracking_metric(
            MapMetric(name="b"))  # Add unimplemented metric.
        self.assertEqual(len(exp.fetch_trials_data(trial_indices=[0]).map_df),
                         30)
        # Try fetching attached data.
        exp.attach_data(batch_0_data)
        exp.attach_data(batch_1_data)
        self.assertEqual(exp.fetch_trials_data(trial_indices=[0]),
                         batch_0_data)
        self.assertEqual(exp.fetch_trials_data(trial_indices=[1]),
                         batch_1_data)
        self.assertEqual(set(batch_0_data.df["trial_index"].values), {0})
        self.assertEqual(set(batch_0_data.df["arm_name"].values),
                         {a.name
                          for a in batch_0.arms})
Exemple #3
0
    def testFromMultipleData(self):
        data = [
            MapData(
                df=pd.DataFrame(
                    [
                        {
                            "arm_name": "0_1",
                            "mean": 3.7,
                            "sem": 0.5,
                            "metric_name": "b",
                            "epoch": 0,
                        },
                    ]
                ),
                map_keys=["epoch"],
            ),
            MapData(
                df=pd.DataFrame(
                    [
                        {
                            "arm_name": "0_1",
                            "mean": 3.7,
                            "sem": 0.5,
                            "metric_name": "b",
                            "epoch": 0,
                        },
                        {
                            "arm_name": "0_1",
                            "mean": 3.7,
                            "sem": 0.5,
                            "metric_name": "b",
                            "epoch": 1,
                        },
                    ]
                ),
                map_keys=["epoch"],
            ),
        ]

        merged_data = MapData.from_multiple_data(data)
        self.assertIsInstance(merged_data, MapData)
        self.assertEqual(3, merged_data.df.shape[0])
Exemple #4
0
 def testFetchTrialsData(self):
     exp = self._setupBraninExperiment(n=5)
     batch_0 = exp.trials[0]
     batch_1 = exp.trials[1]
     batch_0.mark_completed()
     batch_1.mark_completed()
     batch_0_data = exp.fetch_trials_data(trial_indices=[0])
     self.assertEqual(set(batch_0_data.df["trial_index"].values), {0})
     self.assertEqual(set(batch_0_data.df["arm_name"].values),
                      {a.name
                       for a in batch_0.arms})
     batch_1_data = exp.fetch_trials_data(trial_indices=[1])
     self.assertEqual(set(batch_1_data.df["trial_index"].values), {1})
     self.assertEqual(set(batch_1_data.df["arm_name"].values),
                      {a.name
                       for a in batch_1.arms})
     self.assertEqual(
         exp.fetch_trials_data(trial_indices=[0, 1]),
         MapData.from_multiple_data([batch_0_data, batch_1_data]),
     )
     with self.assertRaisesRegex(ValueError, ".* not associated .*"):
         exp.fetch_trials_data(trial_indices=[2])
     # Try to fetch data when there are only metrics and no attached data.
     exp.remove_tracking_metric(
         metric_name="b")  # Remove implemented metric.
     exp.add_tracking_metric(
         MapMetric(name="b"))  # Add unimplemented metric.
     self.assertTrue(exp.fetch_trials_data(trial_indices=[0]).df.empty)
     # Try fetching attached data.
     exp.attach_data(batch_0_data)
     exp.attach_data(batch_1_data)
     self.assertEqual(exp.fetch_trials_data(trial_indices=[0]),
                      batch_0_data)
     self.assertEqual(exp.fetch_trials_data(trial_indices=[1]),
                      batch_1_data)
     self.assertEqual(set(batch_0_data.df["trial_index"].values), {0})
     self.assertEqual(set(batch_0_data.df["arm_name"].values),
                      {a.name
                       for a in batch_0.arms})
Exemple #5
0
    def test_combine(self):
        mmd_double = MapData.from_multiple_map_data([self.mmd, self.mmd])
        self.assertEqual(mmd_double.map_df.size, 2 * self.mmd.map_df.size)
        self.assertEqual(mmd_double.map_key_infos, self.mmd.map_key_infos)

        different_map_df = pd.DataFrame([
            {
                "arm_name": "0_3",
                "timestamp": 11,
                "mean": 2.0,
                "sem": 0.2,
                "trial_index": 1,
                "metric_name": "a",
            },
            {
                "arm_name": "0_3",
                "timestamp": 18,
                "mean": 1.8,
                "sem": 0.3,
                "trial_index": 1,
                "metric_name": "b",
            },
        ])
        different_map_key_infos = [
            MapKeyInfo(key="timestamp", default_value=0.0)
        ]
        different_mmd = MapData(df=different_map_df,
                                map_key_infos=different_map_key_infos)

        combined = MapData.from_multiple_map_data([self.mmd, different_mmd])
        self.assertEqual(len(combined.map_df),
                         len(self.mmd.map_df) + len(different_mmd.map_df))
        self.assertEqual(combined.map_df.columns.size,
                         self.mmd.map_df.columns.size + 1)
        self.assertEqual(combined.map_key_infos,
                         self.map_key_infos + different_map_key_infos)

        combined_subset = MapData.from_multiple_map_data(
            [self.mmd, different_mmd], ["a"])
        self.assertTrue((combined_subset.map_df["metric_name"] == "a").all())

        data_df = pd.DataFrame([
            {
                "arm_name": "0_4",
                "mean": 2.0,
                "sem": 0.2,
                "trial_index": 1,
                "metric_name": "a",
            },
            {
                "arm_name": "0_4",
                "mean": 1.8,
                "sem": 0.3,
                "trial_index": 1,
                "metric_name": "b",
            },
        ])
        data = Data(df=data_df)

        downcast_combined = MapData.from_multiple_data([self.mmd, data])
        self.assertEqual(len(downcast_combined.map_df),
                         len(self.mmd.map_df) + len(data.df))
        self.assertEqual(downcast_combined.map_df.columns.size,
                         self.mmd.map_df.columns.size)
        self.assertEqual(downcast_combined.map_key_infos, self.map_key_infos)

        # Check that the Data's rows' epoch cell has the correct default value
        self.assertTrue((downcast_combined.map_df[
            downcast_combined.map_df["arm_name"] == "0_4"]["epoch"] ==
                         self.mmd.map_key_infos[0].default_value).all())