Example #1
0
 def _gen(self):
     """
     A generator supports the iter() protocol (e.g. a generator function),
     which is used to create a dataset for RecordIO.
     """
     while True:
         task = self._worker.get_task()
         if not task.shard_file_name:
             if task.type == elasticdl_pb2.WAIT:
                 self._pending_dataset = True
                 logger.info(
                     "Finish current dataset, maybe more data later")
             else:
                 logger.info("No more task, stopping")
             break
         with self._lock:
             if (self._training_with_evaluation
                     and task.type == elasticdl_pb2.EVALUATION):
                 self._pending_eval_tasks.append(task)
                 continue
             self._record_count += task.end - task.start
             self._pending_tasks_with_counts.append(
                 (task, self._record_count))
             if len(self._pending_tasks_with_counts) == 1:
                 self._current_task = task
         with closing(
                 recordio.Scanner(task.shard_file_name, task.start,
                                  task.end - task.start)) as reader:
             while True:
                 record = reader.record()
                 if record:
                     yield record
                 else:
                     break
Example #2
0
 def read_records(self, start, end):
     target_files = self._get_record_file(start, end)
     for file_path, start, count in target_files:
         with closing(recordio.Scanner(file_path, start, count)) as reader:
             while True:
                 record = reader.record()
                 if record:
                     yield record
                 else:
                     break
Example #3
0
 def read_records(self, task):
     with closing(
             recordio.Scanner(task.shard_name, task.start,
                              task.end - task.start)) as reader:
         while True:
             record = reader.record()
             if record:
                 yield record
             else:
                 break
Example #4
0
 def gen(self):
     for s in self._shards:
         with closing(recordio.Scanner(s[0], s[1],
                                       s[2] - s[1])) as reader:
             while True:
                 record = reader.record()
                 if record:
                     yield record
                 else:
                     break