コード例 #1
0
 def reset(self) -> Tuple[Experience, np.ndarray]:
     """
     reset gazebo, reset fsm, wait till fsm in 'running' state
     return experience without reward or action
     """
     cprint(f'resetting', self._logger)
     self._reset_filters()
     self._step = 0
     self._return = 0
     if self._config.ros_config.ros_launch_config.gazebo:
         self._reset_gazebo()
     self._reset_publisher.publish(Empty())
     self._clear_experience_values()
     while self.fsm_state != FsmState.Running \
             or self.observation is None \
             or self.terminal_state is None \
             or self.terminal_state is TerminationType.Unknown:
         self._run_shortly()
     self.observation = self._filter_observation(self.observation)
     self._current_experience = Experience(
         done=deepcopy(self.terminal_state),
         observation=deepcopy(self.observation),
         time_stamp=int(rospy.get_time() * 10**3),
         info={})
     self._previous_observation = deepcopy(self.observation)
     return self._current_experience, deepcopy(self.observation)
コード例 #2
0
 def reset(self) -> Tuple[Experience, np.ndarray]:
     self._reset_filters()
     observation = self._gym.reset()
     observation = self._filter_observation(observation)
     self._step_count = 0
     self._return = 0
     self.previous_observation = observation.copy()
     return Experience(done=TerminationType.NotDone, ), observation
 def setUp(self) -> None:
     self.output_dir = f'{os.environ["PWD"]}/test_dir/{get_filename_without_extension(__file__)}'
     os.makedirs(self.output_dir, exist_ok=True)
     self.batch = Dataset()
     self.durations = [10, 1, 5]
     self.step_reward = torch.as_tensor(1)
     self.end_reward = torch.as_tensor(10)
     for episode in range(3):
         for experience in range(self.durations[episode] - 1):
             self.batch.append(
                 Experience(observation=torch.as_tensor(5),
                            action=torch.as_tensor(5),
                            reward=self.step_reward,
                            done=torch.as_tensor(0)))
         self.batch.append(
             Experience(observation=torch.as_tensor(5),
                        action=torch.as_tensor(5),
                        reward=self.end_reward,
                        done=torch.as_tensor(2)))
コード例 #4
0
 def test_dataset_size(self):
     dataset = Dataset()
     dataset.append(
         Experience(observation=torch.as_tensor([0] * 10),
                    action=torch.as_tensor([1] * 3),
                    reward=torch.as_tensor(0),
                    done=torch.as_tensor(2)))
     first_size = dataset.get_memory_size()
     dataset.append(
         Experience(observation=torch.as_tensor([0] * 10),
                    action=torch.as_tensor([1] * 3),
                    reward=torch.as_tensor(0),
                    done=torch.as_tensor(2)))
     self.assertEqual(2 * first_size, dataset.get_memory_size())
     dataset = Dataset()
     dataset.append(
         Experience(observation=torch.as_tensor([0] * 10,
                                                dtype=torch.float32),
                    action=torch.as_tensor([1] * 3, dtype=torch.float32),
                    reward=torch.as_tensor(0, dtype=torch.float32),
                    done=torch.as_tensor(2, dtype=torch.float32)))
     second_size = dataset.get_memory_size()
     self.assertEqual(first_size, 2 * second_size)
コード例 #5
0
 def test_dataset_shuffle(self):
     run_length = 10
     dataset = Dataset()
     for run_index in range(3):
         for step_index in range(run_length + run_index):
             dataset.append(
                 Experience(
                     observation=torch.as_tensor((len(dataset), )),
                     action=torch.as_tensor((len(dataset), )),
                     reward=torch.as_tensor((0, )),
                     done=torch.as_tensor(
                         (0, )) if step_index != run_length + run_index - 1
                     else torch.as_tensor((1, ))))
     self.assertEqual(dataset.observations[0].item(), 0)
     dataset.shuffle()
     self.assertEqual(dataset.observations[0], dataset.actions[0])
     self.assertNotEqual(dataset.observations[0].item(), 0)
コード例 #6
0
 def test_dataset_subsample(self):
     run_length = 10
     subsample = 3
     dataset = Dataset()
     for run_index in range(3):
         for step_index in range(run_length + run_index):
             dataset.append(
                 Experience(
                     observation=torch.as_tensor((step_index, )),
                     action=torch.as_tensor((0, )),
                     reward=torch.as_tensor((0, )),
                     done=torch.as_tensor(
                         (0, )) if step_index != run_length + run_index - 1
                     else torch.as_tensor((1, ))))
     dataset.subsample(subsample)
     for exp_index in range(len(dataset)):
         self.assertTrue(
             dataset.observations[exp_index].item() % subsample == 0
             or dataset.done[exp_index].item() == 1)
コード例 #7
0
def load_run(directory: str, arrange_according_to_timestamp: bool = False, input_size: List[int] = None,
             scope: str = 'default') -> List[Experience]:
    run = {}
    time_stamps = {}
    for x in os.listdir(directory):
        #try:
        k = x if not x.endswith('.data') else x[:-5]
        time_stamps[x], run[k] = load_data(x, directory, size=input_size if k == 'observation' else None,
                                           scope=scope if k == 'observation' else None)
        #except:
        #    pass
    if arrange_according_to_timestamp:
        run = arrange_run_according_timestamps(run, time_stamps)
    if len(run.keys()) == 0:
        return []
    else:
        return [Experience(
            observation=run['observation'][index] if 'observation' in run.keys() else None,
            action=run['action'][index] if 'action' in run.keys() else None,
            reward=run['reward'][index] if 'reward' in run.keys() else None,
            done=run['done'][index] if 'done' in run.keys() else None
        ) for index in range(len(run['observation']))]
コード例 #8
0
 def step(self, action: Action) -> Tuple[Experience, np.ndarray]:
     self._step_count += 1
     observation, unfiltered_reward, done, info = self._gym.step(
         action.value)
     observation = self._filter_observation(observation)
     reward = self._filter_reward(unfiltered_reward)
     info['unfiltered_reward'] = unfiltered_reward
     self._return += unfiltered_reward
     terminal = TerminationType.Done if done or self._step_count >= self._config.max_number_of_steps != -1 \
         else TerminationType.NotDone
     if terminal == TerminationType.Done:
         info['return'] = self._return
     experience = Experience(done=terminal,
                             observation=self.previous_observation.copy(),
                             action=action,
                             reward=reward,
                             time_stamp=self._step_count,
                             info=info)
     if self._config.gym_config.render:
         self._gym.render()
     self.previous_observation = observation.copy()
     return experience, observation.copy()
コード例 #9
0
def experience_generator(input_size: tuple = (3, 100, 100),
                         output_size: tuple = (1, ),
                         continuous: bool = True,
                         fixed_input_value: float = None,
                         fixed_output_value: float = None):
    starting = 5
    running = np.random.randint(10, 12)
    ending = 1
    for step in range(starting + running + ending):
        experience = Experience(info={})
        if step < starting:
            experience.done = TerminationType.Unknown
        elif starting <= step < starting + running:
            experience.done = TerminationType.NotDone
        else:
            experience.done = TerminationType.Success
        experience.time_stamp = step
        experience.observation = np.random.randint(0, 255, size=input_size, dtype=np.uint8) \
            if fixed_input_value is None else fixed_input_value
        if fixed_output_value is not None:
            experience.action = np.asarray(fixed_output_value)
        else:
            if continuous:
                experience.action = np.random.random(output_size)
            else:
                assert len(output_size) == 1
                probabilities = [8]
                probabilities += [1] * (output_size[0] - 1)
                probabilities = [p / sum(probabilities) for p in probabilities]
                experience.action = np.asarray(
                    [np.argmax(np.random.multinomial(1, probabilities))])
        experience.reward = np.random.normal()
        yield experience
コード例 #10
0
    def _update_current_experience(self) -> bool:
        """
        If all experience fields are updated,
        store all experience fields in _current_experience fields end return True
        else False.
        :return: Bool whether all fields are updated
        """
        self._internal_update_terminal_state(
        )  # check count_steps for termination
        if self._config.ros_config.observation != '' and self.observation is None:
            cprint("waiting for observation",
                   self._logger,
                   msg_type=MessageType.debug)
            return False
        if self.reward is None:
            cprint("waiting for reward",
                   self._logger,
                   msg_type=MessageType.debug)
            return False
        if self.terminal_state is None:
            cprint("waiting for terminal state",
                   self._logger,
                   msg_type=MessageType.debug)
            return False
        if self.action is None and self.terminal_state == TerminationType.NotDone:
            # Don't wait for next action if episode is finished
            cprint("waiting for action",
                   self._logger,
                   msg_type=MessageType.debug)
            return False
        if None in [v for v in self.info.values() if not isinstance(v, Iterable)] and \
                self.terminal_state == TerminationType.NotDone:  # Don't wait for next info if episode is finished:
            cprint("waiting for info",
                   self._logger,
                   msg_type=MessageType.debug)
            return False
        self.observation = self._filter_observation(self.observation)
        self.info['unfiltered_reward'] = deepcopy(self.reward)
        self._return += self.reward
        self.reward = self._filter_reward(self.reward)
        if self.terminal_state in [
                TerminationType.Done, TerminationType.Success,
                TerminationType.Failure
        ]:
            self.info['return'] = self._return

        self._current_experience = Experience(
            done=deepcopy(self.terminal_state),
            observation=deepcopy(self._previous_observation),
            action=deepcopy(self.action),
            reward=deepcopy(self.reward),
            time_stamp=int(rospy.get_time() * 10**3),
            info={
                field_name: deepcopy(self.info[field_name])
                for field_name in self.info.keys()
            })
        cprint(
            f"update current experience: "
            f"done {self._current_experience.done}, "
            f"reward {self._current_experience.reward}, "
            f"time_stamp {self._current_experience.time_stamp}, "
            f"info: {[k for k in self._current_experience.info.keys()]}",
            self._logger,
            msg_type=MessageType.debug)
        self._previous_observation = deepcopy(self.observation)
        return True