Esempio n. 1
0
 def _init_file_record_count(self, recordio_files):
     self._data_blocks = []
     start = 0
     for file_path in recordio_files:
         with closing(recordio.Index(file_path)) as rio:
             num_records = rio.num_records()
             end = start + num_records
             self._data_blocks.append(RecordBlock(file_path, start, end))
             start = end
Esempio n. 2
0
 def _collect_file_records_from_dir(data_dir):
     if not data_dir:
         return {}
     f_records = {}
     for f in os.listdir(data_dir):
         p = os.path.join(data_dir, f)
         with closing(recordio.Index(p)) as rio:
             f_records[p] = rio.num_records()
     return f_records
Esempio n. 3
0
 def create_shards(self):
     data_dir = self._kwargs["data_dir"]
     start_ind = 0
     f_records = {}
     for f in os.listdir(data_dir):
         p = os.path.join(data_dir, f)
         with closing(recordio.Index(p)) as rio:
             f_records[p] = (start_ind, rio.num_records())
     return f_records