예제 #1
0
 def _publish_reward_topic(self, reward, episode_number=1):
     """
     This function publishes the given reward in the reward topic for
     easy access from ROS infrastructure.
     :param reward:
     :param episode_number:
     :return:
     """
     reward_msg = RLExperimentInfo()
     reward_msg.episode_number = episode_number
     reward_msg.episode_reward = reward
     self.reward_pub.publish(reward_msg)
예제 #2
0
    def _publish_reward_topic(self, reward, steps, episode_number=1):
        """
        This function publishes the given reward in the reward topic for
        easy access from ROS infrastructure.
        :param reward:
        :param episode_number:
        :return:
        """
        reward_msg = RLExperimentInfo()
        reward_msg.episode_number = episode_number
        reward_msg.episode_reward = reward
        self.reward_pub.publish(reward_msg)
        self.reward_list.append(reward)
        self.episode_list.append(episode_number)
        self.step_list.append(steps)
        liste = str(reward) + ";" + str(episode_number) + ";" + str(steps) + "\n"

        with open(self.csv_name + '.csv', 'a') as csv:
            csv.write(str(liste))
    def run(self, agent, num_episodes, do_train=False):
        scores = deque(maxlen=100)
        for episode in range(num_episodes):
            state = self.env.reset().reshape(
                1, self.env.observation_space.shape[0])
            total_reward = 0
            for t in range(self.max_timesteps):
                action = agent.select_action(state, do_train)

                next_state, reward, done, _ = self.env.step(action)
                next_state = next_state.reshape(
                    1, self.env.observation_space.shape[0])
                reward = self.calc_reward(state, action, reward, next_state,
                                          done)

                if do_train:
                    agent.record(state, action, reward, next_state, done)

                total_reward += reward
                state = next_state
                if done:
                    reward_msg = RLExperimentInfo()
                    reward_msg.episode_number = episode
                    reward_msg.episode_reward = total_reward
                    self.reward_pub.publish(reward_msg)
                    break
            scores.append(total_reward)
            mean_score = np.mean(scores)
            if do_train:
                agent.replay()

            print(
                '[Episode {}] - Mean survival time over last 100 episodes was {} ticks.'
                .format(episode, mean_score))
        done_msg = Bool()
        done_msg.data = True
        self.done_pub.publish(done_msg)
예제 #4
0
 def _publish_reward_topic(self, reward, episode_number=1):
     reward_msg = RLExperimentInfo()
     reward_msg.episode_number = episode_number
     reward_msg.episode_reward = reward
     self.reward_pub.publish(reward_msg)
                if next_state_[j] < 0:
                    next_state_[j] = 0
                elif next_state_[j] > env.shape[j] - 1:
                    next_state_[j] = env.shape[j] - 1
            print("Go into state: ", next_state_, " from state: ", state_,
                  " by action: ", stringify(action))
            # Make the algorithm learn based on the results
            qlearn.learn(tuple(state_), action, reward, tuple(next_state_))

            if not (done):
                state = next_state
            else:
                rospy.loginfo("DONE")
                last_time_steps = np.append(last_time_steps, [int(t + 1)])
                reward_msg = RLExperimentInfo()
                reward_msg.episode_number = x
                reward_msg.episode_reward = cumulated_reward
                reward_pub.publish(reward_msg)
                break

        m, s = divmod(int(time.time() - start_time), 60)
        h, m = divmod(m, 60)
        rospy.loginfo(
            ("EP: " + str(x + 1) + " - [alpha: " +
             str(round(qlearn.alpha, 2)) + " - gamma: " +
             str(round(qlearn.gamma, 2)) + " - epsilon: " +
             str(round(qlearn.epsilon, 2)) + "] - Reward: " +
             str(cumulated_reward) + "     Time: %d:%02d:%02d" % (h, m, s)))

    done_msg = Bool()
    done_msg.data = True