def test_atomic_read(self, name): dataframe = pd.DataFrame(dict(a=[1, 2], b=[4.0, 5.0])) csv_file = os.path.join(absltest.get_default_test_tmpdir(), name) utils_impl.atomic_write_to_csv(dataframe, csv_file) dataframe2 = utils_impl.atomic_read_from_csv(csv_file) pd.testing.assert_frame_equal(dataframe, dataframe2)
def test_rows_are_cleared_is_reflected_in_saved_file(self): temp_dir = self.get_temp_dir() metrics_mngr = metrics_manager.ScalarMetricsManager(temp_dir, prefix='foo') metrics_mngr.update_metrics(0, _create_dummy_metrics()) metrics_mngr.update_metrics(5, _create_dummy_metrics()) metrics_mngr.update_metrics(10, _create_dummy_metrics()) file_contents_before = utils_impl.atomic_read_from_csv( os.path.join(temp_dir, 'foo.metrics.csv.bz2')) self.assertEqual(3, len(file_contents_before.index)) metrics_mngr.clear_rounds_after(last_valid_round_num=7) file_contents_after = utils_impl.atomic_read_from_csv( os.path.join(temp_dir, 'foo.metrics.csv.bz2')) self.assertEqual(2, len(file_contents_after.index))
def __init__(self, root_metrics_dir: str = '/tmp', prefix: str = 'experiment', use_bz2: bool = True): """Returns an initialized `ScalarMetricsManager`. This class will maintain metrics in a CSV file in the filesystem. The path of the file is {`root_metrics_dir`}/{`prefix`}.metrics.csv (if use_bz2 is set to False) or {`root_metrics_dir`}/{`prefix`}.metrics.csv.bz2 (if use_bz2 is set to True). To use this class upon restart of an experiment at an earlier round number, you can initialize and then call the clear_rounds_after() method to remove all rows for round numbers later than the restart round number. This ensures that no duplicate rows of data exist in the CSV. Args: root_metrics_dir: A path on the filesystem to store CSVs. prefix: A string to use as the prefix of filename. Usually the name of a specific run in a larger grid of experiments sharing a common `root_metrics_dir`. use_bz2: A boolean indicating whether to zip the result metrics csv using bz2. Raises: ValueError: If `root_metrics_dir` is empty string. ValueError: If `prefix` is empty string. ValueError: If the specified metrics csv file already exists but does not contain a `round_num` column. """ super().__init__() if not root_metrics_dir: raise ValueError( 'Empty string passed for root_metrics_dir argument.') if not prefix: raise ValueError('Empty string passed for prefix argument.') if use_bz2: # Using .bz2 rather than .zip due to # https://github.com/pandas-dev/pandas/issues/26023 self._metrics_filename = os.path.join(root_metrics_dir, f'{prefix}.metrics.csv.bz2') else: self._metrics_filename = os.path.join(root_metrics_dir, f'{prefix}.metrics.csv') if not tf.io.gfile.exists(self._metrics_filename): utils_impl.atomic_write_to_csv(pd.DataFrame(), self._metrics_filename) self._metrics = utils_impl.atomic_read_from_csv(self._metrics_filename) if not self._metrics.empty and 'round_num' not in self._metrics.columns: raise ValueError( f'The specified csv file ({self._metrics_filename}) already exists ' 'but was not created by ScalarMetricsManager (it does not contain a ' '`round_num` column.') self._latest_round_num = (None if self._metrics.empty else self._metrics.round_num.max(axis=0))
def on_epoch_end(self, epoch: int, logs: Dict[Any, Any] = None): results_path = os.path.join(self._path, 'metric_results.csv') if tf.io.gfile.exists(results_path): # Read the results until now. results_df = utils_impl.atomic_read_from_csv(results_path) # Slice off results after the current epoch, this indicates the job # restarted. results_df = results_df[:epoch] # Add the new epoch. results_df = results_df.append(logs, ignore_index=True) else: results_df = pd.DataFrame(logs, index=[epoch]) utils_impl.atomic_write_to_csv(results_df, results_path)