def _finish_writer(self): if self._writer is not None: self._writer.close() meta = MergedSortRunMeta(self._partition_id, self._process_index) fname = meta.encode_merged_sort_run_fname() fpath = os.path.join(self._merged_dir, fname) gfile.Rename(self._tmp_fpath, fpath, True) self._merged_fpaths.append(fpath) self._writer = None self._process_index += 1
def finish(self): if self._begin_index is not None \ and self._end_index is not None: self._writer.close() meta = Merge.FileMeta(self._partition_id, self._begin_index, self._end_index) fpath = os.path.join(self._options.output_dir, common.partition_repr(self._partition_id), meta.encode_meta_to_fname()) gfile.Rename(self.get_tmp_fpath(), fpath, True) self._writer = None
def finish_example_id_dumper(self): self._tf_record_writer.close() if self.dumped_example_id_count() > 0: fpath = self._get_dumped_fpath() gfile.Rename(self._tmp_fpath, fpath, True) index_meta = visitor.IndexMeta(self._process_index, self._start_index, fpath) return index_meta, self._end_index assert self._start_index == self._end_index, "no example id dumped" gfile.Remove(self._tmp_fpath) return None, None
def finish_dumper(self): self._writer.close() meta = None if self._start_index is None or self._end_index is None: gfile.Remove(self._fpath) else: meta = SortRunMeta(self._process_index, self._start_index, self._end_index) fname = meta.encode_sort_run_fname() self._fpath = path.join(self._output_dir, fname) gfile.Rename(self._tmp_fpath, self._fpath, True) return meta
def finish(self): meta = None if self._writer is not None: self._writer.close() self._writer = None meta = RawDataPartitioner.FileMeta( self._options.partitioner_rank_id, self._process_index, self._begin_index, self._end_index) fpath = os.path.join(self._options.output_dir, common.partition_repr(self._partition_id), meta.encode_meta_to_fname()) gfile.Rename(self.get_tmp_fpath(), fpath, True) return meta
def finish(self): self._tf_record_writer.close() if self._record_count == 0: logging.warning("no record in potroal output file " \ "%s at partition %d. reomve the tmp " \ "file %s", self._fpath, self._partition_id, self._tmp_fpath) gfile.Remove(self._tmp_fpath) else: gfile.Rename(self._tmp_fpath, self._fpath, True) logging.warning("dump %d record in potral output file"\ " %s at partition %d", self._record_count, self._fpath, self._partition_id)
def _build_data_block_meta(self): tmp_meta_fpath = self._get_tmp_fpath() meta = self._data_block_meta with tf.io.TFRecordWriter(tmp_meta_fpath) as meta_writer: meta_writer.write(text_format.MessageToString(meta).encode()) if self._data_block_manager is not None: self._data_block_manager.commit_data_block_meta( tmp_meta_fpath, meta) else: meta_fname = encode_data_block_meta_fname(self._data_source_name, self._partition_id, meta.data_block_index) meta_fpath = os.path.join(self._get_data_block_dir(), meta_fname) gfile.Rename(tmp_meta_fpath, meta_fpath)
def _make_data_block_meta(self): meta_file_path_tmp = self._make_tmp_file_path() with tf.io.TFRecordWriter(meta_file_path_tmp) as meta_writer: meta_writer.write(text_format.MessageToString(self._data_block_meta).encode()) if self._data_block_manager is not None: meta_file_path = self._data_block_manager.update_data_block_meta( meta_file_path_tmp, self._data_block_meta ) else: meta_file_name = data_block_meta_file_name_wrap(self._data_source_name, self._partition_id, self._data_block_meta.data_block_index) meta_file_path = os.path.join(self._obtain_data_block_dir(), meta_file_name) gfile.Rename(meta_file_path_tmp, meta_file_path) return meta_file_path
def finish_data_block(self): assert self._example_num == len(self._data_block_meta.example_ids) self._writer.close() if len(self._data_block_meta.example_ids) > 0: self._data_block_meta.block_id = \ encode_block_id(self._data_source_name, self._data_block_meta) data_block_path = os.path.join( self._get_data_block_dir(), encode_data_block_fname(self._data_source_name, self._data_block_meta)) gfile.Rename(self._tmp_fpath, data_block_path, True) self._build_data_block_meta() return self._data_block_meta gfile.Remove(self._tmp_fpath) return None
def atomic_file(path, mode='w+b'): """Atomically saves data to a target path. Any existing data at the target path will be overwritten. Args: path: target path at which to save file mode: optional mode string Yields: file-like object """ with tempfile.NamedTemporaryFile(mode=mode) as tmp: yield tmp tmp.flush() # Necessary when the destination is on CNS. gfile.Copy(tmp.name, '%s.tmp' % path, overwrite=True) gfile.Rename('%s.tmp' % path, path, overwrite=True)
def test_rename_dir(self): """Test rename dir. """ # Setup and check preconditions. src_dir_name = "igfs:///test_rename_dir/1" dst_dir_name = "igfs:///test_rename_dir/2" gfile.MkDir(src_dir_name) # Rename directory. gfile.Rename(src_dir_name, dst_dir_name) # Check that only new name of directory is available. self.assertFalse(gfile.Exists(src_dir_name)) self.assertTrue(gfile.Exists(dst_dir_name)) self.assertTrue(gfile.IsDirectory(dst_dir_name)) # Remove directory. gfile.Remove(dst_dir_name) # Check that directory was removed. self.assertFalse(gfile.Exists(dst_dir_name))
def finish(self): self._csv_dict_writer.close() if self._csv_dict_writer.write_raw_num() == 0: logging.warning("no record in sort run merger %s at" \ "partition %d. reomve the tmp file %s" \ "create finish tag", self._fpath, self._partition_id, self._tmp_fpath) gfile.Remove(self._tmp_fpath) finish_tag_fpath = os.path.join(self._get_output_dir(), '_SUCCESS') with gfile.GFile(finish_tag_fpath, 'w') as fh: fh.write('') else: gfile.Rename(self._tmp_fpath, self._fpath, True) logging.warning("dump %d record in sort run merger: "\ "%s at partition %d", self._csv_dict_writer.write_raw_num(), self._fpath, self._partition_id)
def finish(self): meta = None if len(self._buffer) > 0: writer = self._get_output_writer() self._sort_buffer() for item in self._buffer: writer.write_item(item) writer.close() meta = RawDataPartitioner.FileMeta( self._options.partitioner_rank_id, self._process_index, self._begin_index, self._end_index) fpath = os.path.join(self._options.output_dir, common.partition_repr(self._partition_id), meta.encode_meta_to_fname()) gfile.Rename(self.get_tmp_fpath(), fpath, True) self._buffer = [] self._begin_index = None self._end_index = None return meta
def commit_data_block_meta(self, tmp_meta_fpath, data_block_meta): if not gfile.Exists(tmp_meta_fpath): raise RuntimeError("the tmp file is not existed {}"\ .format(tmp_meta_fpath)) with self._lock: if self._dumping_index is not None: raise RuntimeError( "data block with index {} is " \ "dumping".format(self._dumping_index) ) data_block_index = data_block_meta.data_block_index if data_block_index != self._dumped_index + 1: raise IndexError("the data block index shoud be consecutive") self._dumping_index = data_block_index meta_fpath = self._get_data_block_meta_path(data_block_index) gfile.Rename(tmp_meta_fpath, meta_fpath) self._dumping_index = None self._dumped_index = data_block_index self._evict_data_block_cache_if_full() self._data_block_meta_cache[data_block_index] = data_block_meta
def data_block_finalizer(self): assert self._saved_example_num == len(self._data_block_meta.example_ids) self._tf_record_writer.close() if len(self._data_block_meta.example_ids) > 0: self._data_block_meta.block_id = block_id_wrap(self._data_source_name, self._data_block_meta) data_block_path = os.path.join( self._obtain_data_block_dir(), data_block_file_name_wrap( self._data_source_name, self._data_block_meta ) ) gfile.Rename(self._tmp_file_path, data_block_path, True) meta_path = self._make_data_block_meta() if mode == "distribute": save_data_block_info(meta_path, data_block_path) return self._data_block_meta gfile.Remove(self._tmp_file_path) return None
def update_data_block_meta(self, meta_file_path_tmp, data_block_meta): if not gfile.Exists(meta_file_path_tmp): raise RuntimeError( "the tmp file does not existed {}".format(meta_file_path_tmp)) with self._lock: if self._saving_data_block_index is not None: raise RuntimeError("data block of index {} is saving".format( self._saving_data_block_index)) data_block_index = data_block_meta.data_block_index if data_block_index != self._saved_data_block_index + 1: raise IndexError("the data_block_index must be consecutive") self._saving_data_block_index = data_block_index data_block_meta_path = self._acquire_data_block_meta_path( data_block_index) gfile.Rename(meta_file_path_tmp, data_block_meta_path) self._saving_data_block_index = None self._saved_data_block_index = data_block_index self._remove_item_from_data_block_memory_buffer() self._data_block_meta_memory_buffer[ data_block_index] = data_block_meta return data_block_meta_path
def append_item(self, item, index): writer = self._get_output_writer() tf_item = item.tf_record writer.write(tf_item) if self._begin_index is None: self._begin_index = index self._end_index = index self._size_bytes += len(tf_item) if self._size_bytes >= self._options.output_item_threshold: writer.close() self.writer = None meta = Merge.FileMeta(self._partition_id, self._begin_index, self._end_index) fpath = os.path.join(self._options.output_dir, common.partition_repr(self._partition_id), meta.encode_meta_to_fname()) gfile.Rename(self.get_tmp_fpath(), fpath, True) self._size_bytes = 0 self._begin_index = None self._end_index = None self._writer = None
def test_rename_file(self): """Test rename file. """ # Setup and check preconditions. src_file_name = "igfs:///test_rename_file/1" dst_file_name = "igfs:///test_rename_file/2" with gfile.Open(src_file_name, mode="w") as w: w.write("42") self.assertTrue(gfile.Exists(src_file_name)) # Rename file. gfile.Rename(src_file_name, dst_file_name) # Check that only new name of file is available. self.assertFalse(gfile.Exists(src_file_name)) self.assertTrue(gfile.Exists(dst_file_name)) with gfile.Open(dst_file_name, mode="r") as r: data_v = r.read() self.assertEqual("42", data_v) # Remove file. gfile.Remove(dst_file_name) # Check that file was removed. self.assertFalse(gfile.Exists(dst_file_name))
def dump_rsa_key_as_pem(output_dir, key, fname): tmp_fpath = common.gen_tmp_fpath(output_dir) with gfile.GFile(tmp_fpath, 'w') as wf: wf.write(key.save_pkcs1()) key_fpath = os.path.join(output_dir, fname) gfile.Rename(tmp_fpath, key_fpath)
def dump_rsa_key_as_pem(output_dir, key, fname): tmp_fpath = os.path.join(output_dir, str(uuid.uuid1()) + '.tmp') with gfile.GFile(tmp_fpath, 'w') as wf: wf.write(key.save_pkcs1()) key_fpath = os.path.join(output_dir, fname) gfile.Rename(tmp_fpath, key_fpath)