def _inner_iter(self, fpath): with make_tf_record_iter(fpath) as record_iter: for record in record_iter: lite_example_ids = dj_pb.LiteExampleIds() lite_example_ids.ParseFromString(record) tf_example = tf.train.Example( features=lite_example_ids.features) rows = convert_tf_example_to_dict(tf_example) example_id_num = len(rows['example_id']) index = 0 while index < example_id_num: row = dict() for fn in SYNC_ALLOWED_OPTIONAL_FIELDS: if fn not in rows: continue value_list = rows[fn] if len(value_list) > 0: row[fn] = value_list[index] example_id_item = ExampleIdVisitor.ExampleIdItem( index + lite_example_ids.begin_index, row ) yield example_id_item index += 1
def _inner_iter(self, fpath): options = tf.io.TFRecordOptions( compression_type=self._options.compressed_type ) with common.make_tf_record_iter(fpath, options) as record_iter: for record in record_iter: yield TfExampleItem(record)
def _sync_dumped_data_block_meta(self): dumped_data_block_path = {} dumped_data_block_meta_path = {} dumped_data_block_meta = [] data_block_dir = self._data_block_dir() if not gfile.Exists(data_block_dir): gfile.MakeDirs(data_block_dir) elif not gfile.IsDirectory(data_block_dir): logging.fatal("%s must be the directory of data block for "\ "partition %d", data_block_dir, self._partition_id) os._exit(-1) # pylint: disable=protected-access for fpath in self._list_data_block_dir(): fname = ntpath.basename(fpath) if fname.endswith(DataBlockSuffix): ftag = fname[:-len(DataBlockSuffix)] dumped_data_block_path[ftag] = fpath elif fname.endswith(DataBlockMetaSuffix): ftag = fname[:-len(DataBlockMetaSuffix)] dumped_data_block_meta_path[ftag] = fpath else: gfile.Remove(fpath) for (ftag, fpath) in dumped_data_block_meta_path.items(): if ftag not in dumped_data_block_path: gfile.Remove(fpath) gfile.Remove(dumped_data_block_path[ftag]) else: with make_tf_record_iter(fpath) as record_iter: dbm = dj_pb.DataBlockMeta() dbm.ParseFromString(next(record_iter)) dumped_data_block_meta.append(dbm) dumped_data_block_meta = sorted(dumped_data_block_meta, key=lambda meta: meta.data_block_index) for (idx, meta) in enumerate(dumped_data_block_meta): if meta.data_block_index != idx: logging.fatal("data_block_index is not consecutive") os._exit(-1) # pylint: disable=protected-access if idx == 0: continue prev_meta = dumped_data_block_meta[idx - 1] if prev_meta.follower_restart_index > meta.follower_restart_index: logging.fatal("follower_restart_index is not Incremental") os._exit(-1) # pylint: disable=protected-access if prev_meta.leader_start_index >= meta.leader_start_index: logging.fatal("leader_start_index is not Incremental") os._exit(-1) # pylint: disable=protected-access if prev_meta.leader_end_index >= meta.leader_end_index: logging.fatal("leader_end_index is not Incremental") os._exit(-1) # pylint: disable=protected-access with self._lock: if len(dumped_data_block_meta) > len(self._dumped_data_block_meta): self._dumped_data_block_meta = dumped_data_block_meta
def _inner_iter(self, fpath): with make_tf_record_iter(fpath) as record_iter: for record in record_iter: lite_example_ids = dj_pb.LiteExampleIds() lite_example_ids.ParseFromString(record) example_id_num = len(lite_example_ids.example_id) event_time_num = len(lite_example_ids.event_time) assert example_id_num == event_time_num, \ "the size of example id and event time must the "\ "same. {} != {}".format(example_id_num, event_time_num) index = 0 while index < len(lite_example_ids.example_id): yield ExampleIdVisitor.ExampleIdItem( lite_example_ids.example_id[index], lite_example_ids.event_time[index], index + lite_example_ids.begin_index) index += 1
def _inner_iter(self, fpath): with make_tf_record_iter(fpath) as record_iter: for record in record_iter: example_id = dj_pb.SyncedExampleId() example_id.ParseFromString(record) yield example_id
def _inner_iter(self, fpath): with common.make_tf_record_iter(fpath) as record_iter: for record in record_iter: yield TfExampleItem(record)