def read_index_files(
     self, start_after_key: str, range_steps=None
 ) -> Tuple[List[bytes], list, str, List[str]]:
     """
         Read files like `trial_{datetime}/index/000/{step}_{worker}.json.
     :param start_after_key: str
     :param range_steps: str
     :return: Tuple( responses, steps, start_after_key, workers)
     """
     index_files = self.list_index_files()
     steps = []
     workers = []
     responses = []
     if start_after_key is not None:
         start_after_index = bisect_left(index_files, start_after_key)
     else:
         start_after_index = 0
     index_files = index_files[start_after_index:]  # ignore files we have already read
     for index_file in index_files:
         if self.index_file_cache.has_not_read(index_file):
             step = IndexFileLocationUtils.parse_step_from_index_file_name(index_file)
             if (
                 range_steps is not None and step_in_range(range_steps, step)
             ) or range_steps is None:
                 steps.append(step)
                 workers.append(parse_worker_name_from_file(index_file))
                 self.logger.debug(
                     f"Sagemaker-Debugger: Read {os.path.getsize(index_file)} bytes from file {index_file}"
                 )
                 with open(index_file) as f:
                     responses.append(f.read().encode())
             self.index_file_cache.add(index_file, start_after_key)
     if len(index_files) > 0:
         start_after_key = index_files[-1]  # Last file that we have read
     return responses, steps, start_after_key, workers
    def read_index_files(
        self, start_after_key: str, range_steps=None
    ) -> Tuple[List[bytes], list, str, List[str]]:
        """
            Read files like `trial_{datetime}/index/000/{step}_{worker}.json.
        :param start_after_key: str
        :param range_steps:
        :return: Tuple( responses, steps, start_after_key, workers)
        """
        object_requests = []
        steps = []
        workers = []
        index_files, start_after_key = self.list_index_files(start_after_key)
        self.logger.debug(f'Loaded Index Files: {",".join(index_files)}')
        for index_file in index_files:
            if self.index_file_cache.has_not_read(index_file):
                step = IndexFileLocationUtils.parse_step_from_index_file_name(index_file)
                if (
                    range_steps is not None and step_in_range(range_steps, step)
                ) or range_steps is None:
                    steps.append(step)
                    workers.append(parse_worker_name_from_file(index_file))
                    object_requests.append(
                        ReadObjectRequest(format(f"s3://{self.bucket_name}/") + index_file)
                    )
                self.index_file_cache.add(index_file, start_after_key)

        responses = self.s3_handler.get_objects(object_requests)
        return responses, steps, start_after_key, workers
Esempio n. 3
0
 def should_save_step(self, step_num: int):
     rval = False
     if self.save_steps and step_num in self.save_steps:
         rval = True
     elif (step_in_range((self.start_step, self.end_step), step_num)
           and step_num % self.save_interval == 0):
         rval = True
     return rval