def _publish_reward_topic(self, reward, episode_number=1): """ This function publishes the given reward in the reward topic for easy access from ROS infrastructure. :param reward: :param episode_number: :return: """ reward_msg = RLExperimentInfo() reward_msg.episode_number = episode_number reward_msg.episode_reward = reward self.reward_pub.publish(reward_msg)
def _publish_reward_topic(self, reward, steps, episode_number=1): """ This function publishes the given reward in the reward topic for easy access from ROS infrastructure. :param reward: :param episode_number: :return: """ reward_msg = RLExperimentInfo() reward_msg.episode_number = episode_number reward_msg.episode_reward = reward self.reward_pub.publish(reward_msg) self.reward_list.append(reward) self.episode_list.append(episode_number) self.step_list.append(steps) liste = str(reward) + ";" + str(episode_number) + ";" + str(steps) + "\n" with open(self.csv_name + '.csv', 'a') as csv: csv.write(str(liste))
def run(self, agent, num_episodes, do_train=False): scores = deque(maxlen=100) for episode in range(num_episodes): state = self.env.reset().reshape( 1, self.env.observation_space.shape[0]) total_reward = 0 for t in range(self.max_timesteps): action = agent.select_action(state, do_train) next_state, reward, done, _ = self.env.step(action) next_state = next_state.reshape( 1, self.env.observation_space.shape[0]) reward = self.calc_reward(state, action, reward, next_state, done) if do_train: agent.record(state, action, reward, next_state, done) total_reward += reward state = next_state if done: reward_msg = RLExperimentInfo() reward_msg.episode_number = episode reward_msg.episode_reward = total_reward self.reward_pub.publish(reward_msg) break scores.append(total_reward) mean_score = np.mean(scores) if do_train: agent.replay() print( '[Episode {}] - Mean survival time over last 100 episodes was {} ticks.' .format(episode, mean_score)) done_msg = Bool() done_msg.data = True self.done_pub.publish(done_msg)
def _publish_reward_topic(self, reward, episode_number=1): reward_msg = RLExperimentInfo() reward_msg.episode_number = episode_number reward_msg.episode_reward = reward self.reward_pub.publish(reward_msg)
if next_state_[j] < 0: next_state_[j] = 0 elif next_state_[j] > env.shape[j] - 1: next_state_[j] = env.shape[j] - 1 print("Go into state: ", next_state_, " from state: ", state_, " by action: ", stringify(action)) # Make the algorithm learn based on the results qlearn.learn(tuple(state_), action, reward, tuple(next_state_)) if not (done): state = next_state else: rospy.loginfo("DONE") last_time_steps = np.append(last_time_steps, [int(t + 1)]) reward_msg = RLExperimentInfo() reward_msg.episode_number = x reward_msg.episode_reward = cumulated_reward reward_pub.publish(reward_msg) break m, s = divmod(int(time.time() - start_time), 60) h, m = divmod(m, 60) rospy.loginfo( ("EP: " + str(x + 1) + " - [alpha: " + str(round(qlearn.alpha, 2)) + " - gamma: " + str(round(qlearn.gamma, 2)) + " - epsilon: " + str(round(qlearn.epsilon, 2)) + "] - Reward: " + str(cumulated_reward) + " Time: %d:%02d:%02d" % (h, m, s))) done_msg = Bool() done_msg.data = True