Beispiel #1
0
    def testFilecmpSameSize(self):
        file1 = os.path.join(self._base_dir, "file1")
        file_io.write_string_to_file(file1, "This is a sentence\n" * 100)

        file2 = os.path.join(self._base_dir, "file2")
        file_io.write_string_to_file(file2, "This is b sentence\n" * 100)

        file3 = os.path.join(self._base_dir, "file3")
        file_io.write_string_to_file(file3, u"This is b sentence\n" * 100)

        self.assertFalse(file_io.filecmp(file1, file2))
        self.assertTrue(file_io.filecmp(file2, file3))
Beispiel #2
0
  def testFilecmpSameSize(self):
    file1 = os.path.join(self._base_dir, "file1")
    file_io.write_string_to_file(file1, "This is a sentence\n" * 100)

    file2 = os.path.join(self._base_dir, "file2")
    file_io.write_string_to_file(file2, "This is b sentence\n" * 100)

    file3 = os.path.join(self._base_dir, "file3")
    file_io.write_string_to_file(file3, u"This is b sentence\n" * 100)

    self.assertFalse(file_io.filecmp(file1, file2))
    self.assertTrue(file_io.filecmp(file2, file3))
Beispiel #3
0
  def testFilecmp(self):
    file1 = file_io.join(self._base_dir, "file1")
    file_io.write_string_to_file(file1, "This is a sentence\n" * 100)

    file2 = file_io.join(self._base_dir, "file2")
    file_io.write_string_to_file(file2, "This is another sentence\n" * 100)

    file3 = file_io.join(self._base_dir, "file3")
    file_io.write_string_to_file(file3, u"This is another sentence\n" * 100)

    self.assertFalse(file_io.filecmp(file1, file2))
    self.assertTrue(file_io.filecmp(file2, file3))
Beispiel #4
0
    def testFilecmpBinary(self):
        file1 = os.path.join(self._base_dir, "file1")
        file_io.FileIO(file1, "wb").write("testing\n\na")

        file2 = os.path.join(self._base_dir, "file2")
        file_io.FileIO(file2, "wb").write("testing\n\nb")

        file3 = os.path.join(self._base_dir, "file3")
        file_io.FileIO(file3, "wb").write("testing\n\nb")

        file4 = os.path.join(self._base_dir, "file4")
        file_io.FileIO(file4, "wb").write("testing\n\ntesting")

        self.assertFalse(file_io.filecmp(file1, file2))
        self.assertFalse(file_io.filecmp(file1, file4))
        self.assertTrue(file_io.filecmp(file2, file3))
Beispiel #5
0
  def testFilecmpBinary(self):
    file1 = os.path.join(self._base_dir, "file1")
    file_io.FileIO(file1, "wb").write("testing\n\na")

    file2 = os.path.join(self._base_dir, "file2")
    file_io.FileIO(file2, "wb").write("testing\n\nb")

    file3 = os.path.join(self._base_dir, "file3")
    file_io.FileIO(file3, "wb").write("testing\n\nb")

    file4 = os.path.join(self._base_dir, "file4")
    file_io.FileIO(file4, "wb").write("testing\n\ntesting")

    self.assertFalse(file_io.filecmp(file1, file2))
    self.assertFalse(file_io.filecmp(file1, file4))
    self.assertTrue(file_io.filecmp(file2, file3))
 def test_file_format_equivalence(self):
     """Test data conversion."""
     self.run_helper(
         "main",
         arguments=("--inputs", self.train_textproto, "--outputs",
                    self.train_binproto, "--addresses_path",
                    self.augmenter_replacement_input, "--phones_path",
                    self.augmenter_replacement_input, "--num_total", "0"))
     self.run_helper(
         "convert_data",
         arguments=("--train_data_input_path", self.train_binproto,
                    "--train_data_output_path", self.train_tfrecord,
                    "--dev_data_input_path", self.test_lftxt,
                    "--dev_data_output_path", self.dev_tfrecord,
                    "--test_data_input_paths", self.test2_lftxt,
                    "--test_data_output_paths", self.test2_tfrecord,
                    "--test_data_input_paths", self.test3_txt,
                    "--test_data_output_paths", self.test3_tfrecord))
     filecmp(self.train_tfrecord, self.dev_tfrecord)
     filecmp(self.test2_tfrecord, self.test3_tfrecord)
Beispiel #7
0
def get_asset_filename_to_add(asset_filepath, asset_filename_map):
  """Get a unique basename to add to the SavedModel if this file is unseen.

  Assets come from users as full paths, and we save them out to the
  SavedModel as basenames. In some cases, the basenames collide. Here,
  we dedupe asset basenames by first checking if the file is the same,
  and, if different, generate and return an index-suffixed basename
  that can be used to add the asset to the SavedModel.

  Args:
    asset_filepath: the full path to the asset that is being saved
    asset_filename_map: a dict of filenames used for saving the asset in
      the SavedModel to full paths from which the filenames were derived.

  Returns:
    Uniquified filename string if the file is not a duplicate, or the original
    filename if the file has already been seen and saved.
  """
  asset_filename = os.path.basename(asset_filepath)

  if asset_filename not in asset_filename_map:
    # This is an unseen asset. Safe to add.
    return asset_filename

  other_asset_filepath = asset_filename_map[asset_filename]
  if other_asset_filepath == asset_filepath:
    # This is the same file, stored twice in the list. No need
    # to make unique.
    return asset_filename

  # Else, asset_filename is in the map, and the filepath is different. Dedupe.
  if not file_io.filecmp(asset_filepath, other_asset_filepath):
    # Files are different; dedupe filenames.
    return _get_unique_asset_filename(asset_filename, asset_filename_map)

  # Files are the same; don't make unique.
  return asset_filename
Beispiel #8
0
def _get_asset_filename_to_add(asset_filepath, asset_filename_map):
  """Get a unique basename to add to the SavedModel if this file is unseen.

  Assets come from users as full paths, and we save them out to the
  SavedModel as basenames. In some cases, the basenames collide. Here,
  we dedupe asset basenames by first checking if the file is the same,
  and, if different, generate and return an index-suffixed basename
  that can be used to add the asset to the SavedModel.

  Args:
    asset_filepath: the full path to the asset that is being saved
    asset_filename_map: a dict of filenames used for saving the asset in
      the SavedModel to full paths from which the filenames were derived.

  Returns:
    Uniquified filename string if the file is not a duplicate, or the original
    filename if the file has already been seen and saved.
  """
  asset_filename = os.path.basename(asset_filepath)

  if asset_filename not in asset_filename_map:
    # This is an unseen asset. Safe to add.
    return asset_filename

  other_asset_filepath = asset_filename_map[asset_filename]
  if other_asset_filepath == asset_filepath:
    # This is the same file, stored twice in the collection list. No need
    # to make unique.
    return asset_filename

  # Else, asset_filename is in the map, and the filepath is different. Dedupe.
  if not file_io.filecmp(asset_filepath, other_asset_filepath):
    # Files are different; dedupe filenames.
    return _get_unique_asset_filename(asset_filename, asset_filename_map)

  # Files are the same; don't make unique.
  return asset_filename