def event_file_present_loop(self, tensor_location: TensorLocation):
     event_file_name = tensor_location.event_file_name
     event_file_present = self._is_event_file_present(event_file_name)
     num_retry = 0
     while not event_file_present and num_retry < self.event_file_retry_limit:
         if self._has_event_file_been_skipped(event_file_name):
             raise TensorUnavailableForStep(
                 tname=tensor_location.tensorname,
                 mode=tensor_location.mode,
                 step=tensor_location.mode_step,
             )
         elif has_training_ended(self.path) is True:
             self.logger.warn(
                 f"IndexReader: Training Has Ended"
                 f"\nIndexReader: {event_file_name} was written but not found."
             )
             raise TensorUnavailableForStep(
                 tname=tensor_location.tensorname,
                 mode=tensor_location.mode,
                 step=tensor_location.mode_step,
             )
         event_file_present = self._is_event_file_present(event_file_name)
         num_retry += 1
         time.sleep(2)
     if num_retry >= self.event_file_retry_limit:
         self.logger.warn(
             f"IndexReader: {event_file_name} was written but not found. After {num_retry} retries."
         )
         raise TensorUnavailableForStep(
             tname=tensor_location.tensorname,
             mode=tensor_location.mode,
             step=tensor_location.mode_step,
         )
     return
Exemple #2
0
 def _step(self, step_num, mode=ModeKeys.GLOBAL, worker=None):
     s = self._get_step_currently(step_num, mode, worker=worker)
     if s is not None:
         return s
     else:
         self.trial.maybe_refresh(self.name)
         ss = self.trial.has_passed_step(step_num, mode)
         if ss == StepState.AVAILABLE:
             s = self._get_step_currently(step_num, mode, worker=worker)
             if s is not None:
                 return s
             raise TensorUnavailableForStep(self.name, step_num, mode)
         elif ss == StepState.UNAVAILABLE:
             raise StepUnavailable(step_num, mode)
         elif ss == StepState.NOT_YET_AVAILABLE:
             if self.trial.loaded_all_steps is True:
                 last_step = -1
                 avail_steps = self.trial.steps(mode=mode)
                 if len(avail_steps) > 0:
                     last_step = avail_steps[-1]
                 raise NoMoreData(
                     "Looking for step:{} for mode {} and reached end of training. Max step available is {}"
                     .format(step_num, mode, last_step))
             raise StepNotYetAvailable(step_num, mode)
     assert False, "Should not happen"
Exemple #3
0
 def value(self, step_num, mode=ModeKeys.GLOBAL, worker=None):
     # step refreshes
     s = self._step(step_num=step_num, mode=mode, worker=worker)
     if s.value is not None:
         return s.value
     elif s.location is not None:
         value = self.trial.index_reader.fetch_tensor_value(s.location)
         if self.cache:
             s.value = value
         return value
     else:
         has_reduction_values = len(s.reduction_values()) > 0
         has_reduction_locations = len(s.reduction_locations()) > 0
         has_reductions = has_reduction_locations or has_reduction_values
         raise TensorUnavailableForStep(self.name, step_num, mode, has_reductions)
Exemple #4
0
    def reduction_value(self,
                        step_num,
                        reduction_name,
                        mode=ModeKeys.GLOBAL,
                        worker=None,
                        abs=False):
        """
        Returns the value of the reduction requested.
        If the tensor was saved as a reduction, then just fetches that.
        Else, tries to compute the reduction and returns. If the tensor value is not
        available, returns None as reduction
        Reductions are not cached. #TODO do we want to?
        :param step_num: step number
        :param mode: mode of job (train, eval, predict, etc).
                            If this is None, assumes step number is global
        :param reduction_name: name of reduction
        :param worker: name of worker
        :param abs: boolean which represents whether reduction should
                    be applied on absolute value of the tensor or not
        :return: reduction value requested as a float
        """

        s = self._step(step_num=step_num, mode=mode, worker=worker)
        rv = s.reduction_value(reduction_name, abs)
        rl = s.reduction_location(reduction_name, abs)
        if rv is not None:
            return rv
        elif rl is not None:
            return self.trial.index_reader.fetch_tensor_value(rl)
        else:
            if s.value is None and s.location is None:
                raise TensorUnavailableForStep(tname=reduction_name,
                                               step=step_num,
                                               mode=mode)
            elif s.value is None and s.location is not None:
                step_value = self.trial.index_reader.fetch_tensor_value(
                    s.location)
                if self.cache:
                    s.value = step_value  # save value if cache is set to True
            else:
                step_value = s.value

            return get_numpy_reduction(reduction_name, step_value, abs)
Exemple #5
0
 def workers(self, step_num, mode=ModeKeys.GLOBAL) -> list:
     step_dict = self._get_step_dict(step_num, mode)
     if step_dict is None:
         raise TensorUnavailableForStep(self.name, step_num, mode)
     return list(step_dict.keys())