Example #1
0
    def test_atomic_read(self):
        for name in ['foo.csv', 'baz.csv.bz2']:
            dataframe = pd.DataFrame(dict(a=[1, 2], b=[4.0, 5.0]))
            csv_file = os.path.join(absltest.get_default_test_tmpdir(), name)
            utils_impl.atomic_write_to_csv(dataframe, csv_file)

            dataframe2 = utils_impl.atomic_read_from_csv(csv_file)
            pd.testing.assert_frame_equal(dataframe, dataframe2)
Example #2
0
  def test_rows_are_cleared_is_reflected_in_saved_file(self):
    temp_dir = self.get_temp_dir()
    metrics_mngr = metrics_manager.ScalarMetricsManager(temp_dir, prefix='foo')

    metrics_mngr.update_metrics(0, _create_dummy_metrics())
    metrics_mngr.update_metrics(5, _create_dummy_metrics())
    metrics_mngr.update_metrics(10, _create_dummy_metrics())

    file_contents_before = utils_impl.atomic_read_from_csv(
        os.path.join(temp_dir, 'foo.metrics.csv.bz2'))
    self.assertEqual(3, len(file_contents_before.index))

    metrics_mngr.clear_rounds_after(last_valid_round_num=7)

    file_contents_after = utils_impl.atomic_read_from_csv(
        os.path.join(temp_dir, 'foo.metrics.csv.bz2'))
    self.assertEqual(2, len(file_contents_after.index))
    def __init__(self,
                 root_metrics_dir: str = '/tmp',
                 prefix: str = 'experiment',
                 use_bz2: bool = True):
        """Returns an initialized `ScalarMetricsManager`.

    This class will maintain metrics in a CSV file in the filesystem. The path
    of the file is {`root_metrics_dir`}/{`prefix`}.metrics.csv (if use_bz2 is
    set to False) or {`root_metrics_dir`}/{`prefix`}.metrics.csv.bz2 (if
    use_bz2 is set to True). To use this class upon restart of an experiment at
    an earlier round number, you can initialize and then call the
    clear_rounds_after() method to remove all rows for round numbers later than
    the restart round number. This ensures that no duplicate rows of data exist
    in the CSV.

    Args:
      root_metrics_dir: A path on the filesystem to store CSVs.
      prefix: A string to use as the prefix of filename. Usually the name of a
        specific run in a larger grid of experiments sharing a common
        `root_metrics_dir`.
      use_bz2: A boolean indicating whether to zip the result metrics csv using
        bz2.

    Raises:
      ValueError: If `root_metrics_dir` is empty string.
      ValueError: If `prefix` is empty string.
      ValueError: If the specified metrics csv file already exists but does not
        contain a `round_num` column.
    """
        super().__init__()
        if not root_metrics_dir:
            raise ValueError(
                'Empty string passed for root_metrics_dir argument.')
        if not prefix:
            raise ValueError('Empty string passed for prefix argument.')

        if use_bz2:
            # Using .bz2 rather than .zip due to
            # https://github.com/pandas-dev/pandas/issues/26023
            self._metrics_filename = os.path.join(root_metrics_dir,
                                                  f'{prefix}.metrics.csv.bz2')
        else:
            self._metrics_filename = os.path.join(root_metrics_dir,
                                                  f'{prefix}.metrics.csv')
        if not tf.io.gfile.exists(self._metrics_filename):
            utils_impl.atomic_write_to_csv(pd.DataFrame(),
                                           self._metrics_filename)

        self._metrics = utils_impl.atomic_read_from_csv(self._metrics_filename)
        if not self._metrics.empty and 'round_num' not in self._metrics.columns:
            raise ValueError(
                f'The specified csv file ({self._metrics_filename}) already exists '
                'but was not created by ScalarMetricsManager (it does not contain a '
                '`round_num` column.')

        self._latest_round_num = (None if self._metrics.empty else
                                  self._metrics.round_num.max(axis=0))
Example #4
0
 def on_epoch_end(self, epoch: int, logs: Dict[Any, Any] = None):
     results_path = os.path.join(self._path, 'metric_results.csv')
     if tf.io.gfile.exists(results_path):
         # Read the results until now.
         results_df = utils_impl.atomic_read_from_csv(results_path)
         # Slice off results after the current epoch, this indicates the job
         # restarted.
         results_df = results_df[:epoch]
         # Add the new epoch.
         results_df = results_df.append(logs, ignore_index=True)
     else:
         results_df = pd.DataFrame(logs, index=[epoch])
     utils_impl.atomic_write_to_csv(results_df, results_path)