class controller: def __init__(self): print("init") self.experiments_num = 10 # self.human_sub = rospy.Subscriber("/RW_x_direction", Int16, self.simulate) self.game = Game() self.action_human = 0.0 self.action_agent = 0.0 # # self.human_sub = rospy.Subscriber("/rl/hand_action_x", action_msg, self.set_action_human) # self.human_sub = rospy.Subscriber("/rl/action_x", action_msg, self.set_action_human) self.agent_sub = rospy.Subscriber("/rl/total_action", action_agent, self.set_action_agent) # self.reward_pub = rospy.Publisher('/rl/reward', Int16, queue_size = 10) self.obs_robot_pub = rospy.Publisher('/rl/game_response', reward_observation, queue_size=10, latch=True) self.transmit_time_list = [] self.rewards_list = [] self.turn_list = [] self.interaction_time_list = [] self.interaction_training_time_list = [] self.mean_list = [] self.stdev_list = [] self.alpha_values = [] self.policy_loss_list = [] self.value_loss_list = [] self.critics_lr_list = [] self.value_critic_lr_list = [] self.actor_lr_list = [] self.plot_directory = package_path + "/src/plots/" if not os.path.exists(self.plot_directory): print("Dir %s was not found. Creating it..." % (self.plot_directory)) os.makedirs(self.plot_directory) def set_action_agent(self, action_agent): self.prev_state = self.game.getState() self.action_agent = action_agent.action[1] self.action_human = action_agent.action[0] self.transmit_time_list.append(rospy.get_rostime().to_sec() - action_agent.header.stamp.to_sec()) def set_human_agent(self, action_agent): if action_agent.action != 0.0: self.action_human = action_agent.action self.transmit_time_list.append(rospy.get_rostime().to_sec() - action_agent.header.stamp.to_sec()) def publish_reward_and_observations(self): h = std_msgs.msg.Header() h.stamp = rospy.Time.now() new_obs = reward_observation() new_obs.header = h new_obs.observations = self.game.getState() new_obs.prev_state = self.prev_state new_obs.final_state = self.game.finished new_obs.reward = self.game.getReward() self.obs_robot_pub.publish(new_obs) def game_loop(self): first_update = True global_time = rospy.get_rostime().to_sec() self.resetGame() for exp in range(MAX_STEPS + 1): if self.game.running: start_interaction_time = time.time() self.game.experiment = exp self.turns += 1 state = self.game.getState() agent_act = self.agent.next_action(state) tmp_time = time.time() act_human = self.action_human while time.time() - tmp_time < action_duration: exec_time = self.game.play([act_human, agent_act.item()]) reward = self.game.getReward() next_state = self.game.getState() done = self.game.finished episode = [state, reward, agent_act, next_state, done] self.agent.update_rw_state(episode) self.total_reward_per_game += reward self.interaction_time_list.append(time.time() - start_interaction_time) # when replay buffer has enough samples update gradient at every turn if len(self.agent.D) >= BATCH_SIZE: if first_update: print("\nStarting updates") first_update = False [ alpha, policy_loss, value_loss, critics_lr, value_critic_lr, actor_lr ] = self.agent.train(sample=episode) self.alpha_values.append(alpha.item()) self.policy_loss_list.append(policy_loss.item()) self.value_loss_list.append(value_loss.item()) self.critics_lr_list.append(critics_lr) self.value_critic_lr_list.append(value_critic_lr) self.actor_lr_list.append(actor_lr) self.interaction_training_time_list.append( time.time() - start_interaction_time) # run "offline_updates_num" offline gradient updates every "UPDATE_INTERVAL" steps if len(self.agent.D ) >= BATCH_SIZE and exp % UPDATE_INTERVAL == 0: print(str(offline_updates_num) + " Gradient upadates") self.game.waitScreen("Training... Please Wait.") pbar = tqdm(xrange(1, offline_updates_num + 1), unit_scale=1, smoothing=0) for _ in pbar: [ alpha, policy_loss, value_loss, critics_lr, value_critic_lr, actor_lr ] = self.agent.train(verbose=False) self.alpha_values.append(alpha.item()) self.policy_loss_list.append(policy_loss.item()) self.value_loss_list.append(value_loss.item()) self.critics_lr_list.append(critics_lr) self.value_critic_lr_list.append(value_critic_lr) self.actor_lr_list.append(actor_lr) # run trials mean_score, stdev_score = self.test() self.mean_list.append(mean_score) self.stdev_list.append(stdev_score) self.resetGame() else: self.turn_list.append(self.turns) self.rewards_list.append(self.total_reward_per_game) # reset game self.resetGame() self.save_and_plot_stats() print( "Average Execution time per interaction: %f milliseconds(stdev: %f). \n" % (mean(self.interaction_time_list) * 1e3, stdev(self.interaction_time_list) * 1e3)) print( "Average Execution time per interaction and online update: %f milliseconds(stdev: %f). \n" % (mean(self.interaction_training_time_list) * 1e3, stdev(self.interaction_training_time_list) * 1e3)) print("Total time of experiments is: %d minutes and %d seconds.\n" % ((rospy.get_rostime().to_sec() - global_time) / 60, (rospy.get_rostime().to_sec() - global_time) % 60)) self.game.endGame() def test(self): score_list = [] for game in range(test_num): score = 200 self.resetGame("Testing Model. Trial %d of %d." % (game + 1, test_num)) while self.game.running: self.game.experiment = "Test: " + str(game + 1) state = self.game.getState() agent_act = self.agent.next_action( state, stochastic=False) # take only the mean # print(agent_act) tmp_time = time.time() while time.time() - tmp_time < 0.2: exec_time = self.game.play( [self.action_human, agent_act.item()], total_games=test_num) score -= 1 score_list.append(score) return [mean(score_list), stdev(score_list)] def resetGame(self, msg=None): wait_time = 3 self.game.waitScreen(msg1="Put Right Wrist on starting point.", msg2=msg, duration=wait_time) self.game = Game() self.action_human = 0.0 self.action_agent = 0.0 self.game.start_time = time.time() self.total_reward_per_game = 0 self.turns = 0 def save_and_plot_stats(self): np.savetxt(self.plot_directory + 'alpha_values.csv', self.alpha_values, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'policy_loss.csv', self.policy_loss_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'value_loss.csv', self.value_loss_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'rewards_list.csv', self.rewards_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'turn_list.csv', self.turn_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'means.csv', self.mean_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'stdev.csv', self.stdev_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'critics_lr_list.csv', self.critics_lr_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'value_critic_lr_list.csv', self.value_critic_lr_list, delimiter=',', fmt='%f') np.savetxt(self.plot_directory + 'actor_lr_list.csv', self.actor_lr_list, delimiter=',', fmt='%f') plot(range(len(self.alpha_values)), self.alpha_values, "alpha_values", 'Alpha Value', 'Number of Gradient Updates', self.plot_directory, save=True) plot(range(len(self.policy_loss_list)), self.policy_loss_list, "policy_loss", 'Policy loss', 'Number of Gradient Updates', self.plot_directory, save=True) plot(range(len(self.value_loss_list)), self.value_loss_list, "value_loss_list", 'Value loss', 'Number of Gradient Updates', self.plot_directory, save=True) plot(range(len(self.rewards_list)), self.rewards_list, "Rewards_per_game", 'Total Rewards per Game', 'Number of Games', self.plot_directory, save=True) plot(range(len(self.turn_list)), self.turn_list, "Steps_per_game", 'Turns per Game', 'Number of Games', self.plot_directory, save=True) plot(range(len(self.critics_lr_list)), self.critics_lr_list, "critics_lr_list", 'Critic lr', 'Number of Gradient Updates', self.plot_directory, save=True) plot(range(len(self.value_critic_lr_list)), self.value_critic_lr_list, "value_critic_lr_list", 'Value lr', 'Number of Gradient Updates', self.plot_directory, save=True) plot(range(len(self.actor_lr_list)), self.actor_lr_list, "actor_lr_list", 'Actor lr', 'Number of Gradient Updates', self.plot_directory, save=True) plot(range(UPDATE_INTERVAL, MAX_STEPS + UPDATE_INTERVAL, UPDATE_INTERVAL), self.mean_list, "trials", 'Tests Score', 'Number of Interactions', self.plot_directory, save=True, variance=True, stdev=self.stdev_list)
class controller: def __init__(self): print("init") self.experiments_num = 10 # self.human_sub = rospy.Subscriber("/RW_x_direction", Int16, self.simulate) self.game = Game() self.action_human = 0.0 self.action_agent = 0.0 # # self.human_sub = rospy.Subscriber("/rl/hand_action_x", action_msg, self.set_action_human) # self.human_sub = rospy.Subscriber("/rl/action_x", action_msg, self.set_action_human) self.agent_sub = rospy.Subscriber("/rl/final_action", action_agent, self.set_action_agent) # self.reward_pub = rospy.Publisher('/rl/reward', Int16, queue_size = 10) self.obs_robot_pub = rospy.Publisher('/rl/reward_and_observation_game', reward_observation, queue_size=10, latch=True) self.transmit_time_list = [] self.prev_state = self.game.getState() self.publish_reward_and_observations() # def set_action_human(self,action_human): # self.action_human = action_human.action # self.transmit_time_list.append(rospy.get_rostime().to_sec() - action_human.header.stamp.to_sec()) def set_action_agent(self, action_agent): self.prev_state = self.game.getState() self.action_agent = action_agent.action[1] self.action_human = action_agent.action[0] self.transmit_time_list.append(rospy.get_rostime().to_sec() - action_agent.header.stamp.to_sec()) def publish_reward_and_observations(self): h = std_msgs.msg.Header() h.stamp = rospy.Time.now() new_obs = reward_observation() new_obs.header = h new_obs.observations = self.game.getState() new_obs.prev_state = self.prev_state new_obs.final_state = self.game.finished new_obs.reward = self.game.getReward() self.obs_robot_pub.publish(new_obs) def game_loop(self): total_time = [] # print self.action_human for exp in range(self.experiments_num): print("Experiment %d" % (exp + 1)) while self.game.running: exec_time = self.game.play( [self.action_human, self.action_agent]) total_time.append(exec_time) self.publish_reward_and_observations() # self.game.endGame() # publish last reward and state self.publish_reward_and_observations() # reset game self.game = Game() self.game.start_time = time.time() self.game.endGame()